From 7402fea0a8e63e3ea248257047c584afee8f8bde Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 16 May 2014 15:08:05 -0700 Subject: Vectorized the evaluation of tensor expression (using SSE, AVX, NEON, ...) Added the ability to parallelize the evaluation of a tensor expression over multiple cpu cores. Added the ability to offload the evaluation of a tensor expression to a GPU. --- unsupported/test/cxx11_tensor_thread_pool.cpp | 37 +++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 unsupported/test/cxx11_tensor_thread_pool.cpp (limited to 'unsupported/test/cxx11_tensor_thread_pool.cpp') diff --git a/unsupported/test/cxx11_tensor_thread_pool.cpp b/unsupported/test/cxx11_tensor_thread_pool.cpp new file mode 100644 index 000000000..c9de71da3 --- /dev/null +++ b/unsupported/test/cxx11_tensor_thread_pool.cpp @@ -0,0 +1,37 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_USE_THREADS + + +#include "main.h" +#include + +using Eigen::Tensor; + +void test_cxx11_tensor_thread_pool() +{ + Eigen::Tensor in1(Eigen::array(2,3,7)); + Eigen::Tensor in2(Eigen::array(2,3,7)); + Eigen::Tensor out(Eigen::array(2,3,7)); + + in1.setRandom(); + in2.setRandom(); + + Eigen::ThreadPoolDevice thread_pool_device(3); + out.device(thread_pool_device) = in1 + in2 * 3.14; + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_APPROX(out(Eigen::array(i,j,k)), in1(Eigen::array(i,j,k)) + in2(Eigen::array(i,j,k)) * 3.14f); + } + } + } +} -- cgit v1.2.3 From 8998f4099e20ebc80db0aba2582301cd48d31c5a Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 5 Jun 2014 10:49:34 -0700 Subject: Created additional tests for the tensor code. --- unsupported/test/CMakeLists.txt | 2 + unsupported/test/cxx11_tensor_comparisons.cpp | 84 +++++++++++++ unsupported/test/cxx11_tensor_contraction.cpp | 163 ++++++++++++++++++++++++++ unsupported/test/cxx11_tensor_device.cpp | 17 ++- unsupported/test/cxx11_tensor_expr.cpp | 149 ++++++++++++++++++++--- unsupported/test/cxx11_tensor_fixed_size.cpp | 14 +-- unsupported/test/cxx11_tensor_thread_pool.cpp | 7 +- 7 files changed, 406 insertions(+), 30 deletions(-) create mode 100644 unsupported/test/cxx11_tensor_comparisons.cpp create mode 100644 unsupported/test/cxx11_tensor_contraction.cpp (limited to 'unsupported/test/cxx11_tensor_thread_pool.cpp') diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index abc3375e5..d6072c9f3 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -102,6 +102,8 @@ if(EIGEN_TEST_CXX11) ei_add_test(cxx11_tensor_simple "-std=c++0x") ei_add_test(cxx11_tensor_symmetry "-std=c++0x") ei_add_test(cxx11_tensor_assign "-std=c++0x") + ei_add_test(cxx11_tensor_comparison "-std=c++0x") + ei_add_test(cxx11_tensor_contraction "-std=c++0x") ei_add_test(cxx11_tensor_expr "-std=c++0x") ei_add_test(cxx11_tensor_map "-std=c++0x") ei_add_test(cxx11_tensor_device "-std=c++0x") diff --git a/unsupported/test/cxx11_tensor_comparisons.cpp b/unsupported/test/cxx11_tensor_comparisons.cpp new file mode 100644 index 000000000..186f56ac3 --- /dev/null +++ b/unsupported/test/cxx11_tensor_comparisons.cpp @@ -0,0 +1,84 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include + +using Eigen::Tensor; +using Eigen::RowMajor; + +static void test_orderings() +{ + Tensor mat1(2,3,7); + Tensor mat2(2,3,7); + Tensor lt(2,3,7); + Tensor le(2,3,7); + Tensor gt(2,3,7); + Tensor ge(2,3,7); + + mat1.setRandom(); + mat2.setRandom(); + + lt = mat1 < mat2; + le = mat1 <= mat2; + gt = mat1 > mat2; + ge = mat1 >= mat2; + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_EQUAL(lt(i,j,k), mat1(i,j,k) < mat2(i,j,k)); + VERIFY_IS_EQUAL(le(i,j,k), mat1(i,j,k) <= mat2(i,j,k)); + VERIFY_IS_EQUAL(gt(i,j,k), mat1(i,j,k) > mat2(i,j,k)); + VERIFY_IS_EQUAL(ge(i,j,k), mat1(i,j,k) >= mat2(i,j,k)); + } + } + } +} + + +static void test_equality() +{ + Tensor mat1(2,3,7); + Tensor mat2(2,3,7); + + mat1.setRandom(); + mat2.setRandom(); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + if (random() < 0.5) { + mat2(i,j,k) = mat1(i,j,k); + } + } + } + } + + Tensor eq(2,3,7); + Tensor ne(2,3,7); + eq = (mat1 == mat2); + ne = (mat1 != mat2); + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_EQUAL(eq(i,j,k), mat1(i,j,k) == mat2(i,j,k)); + VERIFY_IS_EQUAL(ne(i,j,k), mat1(i,j,k) != mat2(i,j,k)); + } + } + } +} + + +void test_cxx11_tensor_comparisons() +{ + CALL_SUBTEST(test_orderings()); + CALL_SUBTEST(test_equality()); +} diff --git a/unsupported/test/cxx11_tensor_contraction.cpp b/unsupported/test/cxx11_tensor_contraction.cpp new file mode 100644 index 000000000..1c89dfdd1 --- /dev/null +++ b/unsupported/test/cxx11_tensor_contraction.cpp @@ -0,0 +1,163 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include + +using Eigen::Tensor; + +typedef Tensor::DimensionPair DimPair; + + +static void test_evals() +{ + Tensor mat1(2, 3); + Tensor mat2(2, 3); + Tensor mat3(3, 2); + + mat1.setRandom(); + mat2.setRandom(); + mat3.setRandom(); + + Tensor mat4(3,3); + mat4.setZero(); + Eigen::array dims3({{DimPair(0, 0)}}); + TensorEvaluator eval(mat1.contract(mat2, dims3)); + eval.evalTo(mat4.data()); + EIGEN_STATIC_ASSERT(TensorEvaluator::NumDims==2ul, YOU_MADE_A_PROGRAMMING_MISTAKE); + VERIFY_IS_EQUAL(eval.dimensions()[0], 3); + VERIFY_IS_EQUAL(eval.dimensions()[1], 3); + + VERIFY_IS_APPROX(mat4(0,0), mat1(0,0)*mat2(0,0) + mat1(1,0)*mat2(1,0)); + VERIFY_IS_APPROX(mat4(0,1), mat1(0,0)*mat2(0,1) + mat1(1,0)*mat2(1,1)); + VERIFY_IS_APPROX(mat4(0,2), mat1(0,0)*mat2(0,2) + mat1(1,0)*mat2(1,2)); + VERIFY_IS_APPROX(mat4(1,0), mat1(0,1)*mat2(0,0) + mat1(1,1)*mat2(1,0)); + VERIFY_IS_APPROX(mat4(1,1), mat1(0,1)*mat2(0,1) + mat1(1,1)*mat2(1,1)); + VERIFY_IS_APPROX(mat4(1,2), mat1(0,1)*mat2(0,2) + mat1(1,1)*mat2(1,2)); + VERIFY_IS_APPROX(mat4(2,0), mat1(0,2)*mat2(0,0) + mat1(1,2)*mat2(1,0)); + VERIFY_IS_APPROX(mat4(2,1), mat1(0,2)*mat2(0,1) + mat1(1,2)*mat2(1,1)); + VERIFY_IS_APPROX(mat4(2,2), mat1(0,2)*mat2(0,2) + mat1(1,2)*mat2(1,2)); + + Tensor mat5(2,2); + mat5.setZero(); + Eigen::array dims4({{DimPair(1, 1)}}); + TensorEvaluator eval2(mat1.contract(mat2, dims4)); + eval2.evalTo(mat5.data()); + EIGEN_STATIC_ASSERT(TensorEvaluator::NumDims==2ul, YOU_MADE_A_PROGRAMMING_MISTAKE); + VERIFY_IS_EQUAL(eval2.dimensions()[0], 2); + VERIFY_IS_EQUAL(eval2.dimensions()[1], 2); + + VERIFY_IS_APPROX(mat5(0,0), mat1(0,0)*mat2(0,0) + mat1(0,1)*mat2(0,1) + mat1(0,2)*mat2(0,2)); + VERIFY_IS_APPROX(mat5(0,1), mat1(0,0)*mat2(1,0) + mat1(0,1)*mat2(1,1) + mat1(0,2)*mat2(1,2)); + VERIFY_IS_APPROX(mat5(1,0), mat1(1,0)*mat2(0,0) + mat1(1,1)*mat2(0,1) + mat1(1,2)*mat2(0,2)); + VERIFY_IS_APPROX(mat5(1,1), mat1(1,0)*mat2(1,0) + mat1(1,1)*mat2(1,1) + mat1(1,2)*mat2(1,2)); + + Tensor mat6(2,2); + mat6.setZero(); + Eigen::array dims6({{DimPair(1, 0)}}); + TensorEvaluator eval3(mat1.contract(mat3, dims6)); + eval3.evalTo(mat6.data()); + EIGEN_STATIC_ASSERT(TensorEvaluator::NumDims==2ul, YOU_MADE_A_PROGRAMMING_MISTAKE); + VERIFY_IS_EQUAL(eval3.dimensions()[0], 2); + VERIFY_IS_EQUAL(eval3.dimensions()[1], 2); + + VERIFY_IS_APPROX(mat6(0,0), mat1(0,0)*mat3(0,0) + mat1(0,1)*mat3(1,0) + mat1(0,2)*mat3(2,0)); + VERIFY_IS_APPROX(mat6(0,1), mat1(0,0)*mat3(0,1) + mat1(0,1)*mat3(1,1) + mat1(0,2)*mat3(2,1)); + VERIFY_IS_APPROX(mat6(1,0), mat1(1,0)*mat3(0,0) + mat1(1,1)*mat3(1,0) + mat1(1,2)*mat3(2,0)); + VERIFY_IS_APPROX(mat6(1,1), mat1(1,0)*mat3(0,1) + mat1(1,1)*mat3(1,1) + mat1(1,2)*mat3(2,1)); +} + + +static void test_scalar() +{ + Tensor vec1({6}); + Tensor vec2({6}); + + vec1.setRandom(); + vec2.setRandom(); + + Tensor scalar(1); + scalar.setZero(); + Eigen::array dims({{DimPair(0, 0)}}); + TensorEvaluator eval(vec1.contract(vec2, dims)); + eval.evalTo(scalar.data()); + EIGEN_STATIC_ASSERT(TensorEvaluator::NumDims==1ul, YOU_MADE_A_PROGRAMMING_MISTAKE); + + float expected = 0.0f; + for (int i = 0; i < 6; ++i) { + expected += vec1(i) * vec2(i); + } + VERIFY_IS_APPROX(scalar(0), expected); +} + + +static void test_multidims() +{ + Tensor mat1(2, 2, 2); + Tensor mat2(2, 2, 2, 2); + + mat1.setRandom(); + mat2.setRandom(); + + Tensor mat3(2, 2, 2); + mat3.setZero(); + Eigen::array dims({{DimPair(1, 2), DimPair(2, 3)}}); + TensorEvaluator eval(mat1.contract(mat2, dims)); + eval.evalTo(mat3.data()); + EIGEN_STATIC_ASSERT(TensorEvaluator::NumDims==3ul, YOU_MADE_A_PROGRAMMING_MISTAKE); + VERIFY_IS_EQUAL(eval.dimensions()[0], 2); + VERIFY_IS_EQUAL(eval.dimensions()[1], 2); + VERIFY_IS_EQUAL(eval.dimensions()[2], 2); + + VERIFY_IS_APPROX(mat3(0,0,0), mat1(0,0,0)*mat2(0,0,0,0) + mat1(0,1,0)*mat2(0,0,1,0) + + mat1(0,0,1)*mat2(0,0,0,1) + mat1(0,1,1)*mat2(0,0,1,1)); + VERIFY_IS_APPROX(mat3(0,0,1), mat1(0,0,0)*mat2(0,1,0,0) + mat1(0,1,0)*mat2(0,1,1,0) + + mat1(0,0,1)*mat2(0,1,0,1) + mat1(0,1,1)*mat2(0,1,1,1)); + VERIFY_IS_APPROX(mat3(0,1,0), mat1(0,0,0)*mat2(1,0,0,0) + mat1(0,1,0)*mat2(1,0,1,0) + + mat1(0,0,1)*mat2(1,0,0,1) + mat1(0,1,1)*mat2(1,0,1,1)); + VERIFY_IS_APPROX(mat3(0,1,1), mat1(0,0,0)*mat2(1,1,0,0) + mat1(0,1,0)*mat2(1,1,1,0) + + mat1(0,0,1)*mat2(1,1,0,1) + mat1(0,1,1)*mat2(1,1,1,1)); + VERIFY_IS_APPROX(mat3(1,0,0), mat1(1,0,0)*mat2(0,0,0,0) + mat1(1,1,0)*mat2(0,0,1,0) + + mat1(1,0,1)*mat2(0,0,0,1) + mat1(1,1,1)*mat2(0,0,1,1)); + VERIFY_IS_APPROX(mat3(1,0,1), mat1(1,0,0)*mat2(0,1,0,0) + mat1(1,1,0)*mat2(0,1,1,0) + + mat1(1,0,1)*mat2(0,1,0,1) + mat1(1,1,1)*mat2(0,1,1,1)); + VERIFY_IS_APPROX(mat3(1,1,0), mat1(1,0,0)*mat2(1,0,0,0) + mat1(1,1,0)*mat2(1,0,1,0) + + mat1(1,0,1)*mat2(1,0,0,1) + mat1(1,1,1)*mat2(1,0,1,1)); + VERIFY_IS_APPROX(mat3(1,1,1), mat1(1,0,0)*mat2(1,1,0,0) + mat1(1,1,0)*mat2(1,1,1,0) + + mat1(1,0,1)*mat2(1,1,0,1) + mat1(1,1,1)*mat2(1,1,1,1)); +} + + +static void test_expr() +{ + Tensor mat1(2, 3); + Tensor mat2(3, 2); + mat1.setRandom(); + mat2.setRandom(); + + Tensor mat3(2,2); + + Eigen::array dims({{DimPair(1, 0)}}); + mat3 = mat1.contract(mat2, dims); + + VERIFY_IS_APPROX(mat3(0,0), mat1(0,0)*mat2(0,0) + mat1(0,1)*mat2(1,0) + mat1(0,2)*mat2(2,0)); + VERIFY_IS_APPROX(mat3(0,1), mat1(0,0)*mat2(0,1) + mat1(0,1)*mat2(1,1) + mat1(0,2)*mat2(2,1)); + VERIFY_IS_APPROX(mat3(1,0), mat1(1,0)*mat2(0,0) + mat1(1,1)*mat2(1,0) + mat1(1,2)*mat2(2,0)); + VERIFY_IS_APPROX(mat3(1,1), mat1(1,0)*mat2(0,1) + mat1(1,1)*mat2(1,1) + mat1(1,2)*mat2(2,1)); +} + + +void test_cxx11_tensor_contraction() +{ + CALL_SUBTEST(test_evals()); + CALL_SUBTEST(test_scalar()); + CALL_SUBTEST(test_multidims()); + CALL_SUBTEST(test_expr()); +} diff --git a/unsupported/test/cxx11_tensor_device.cpp b/unsupported/test/cxx11_tensor_device.cpp index 9eb1d0420..365b109c7 100644 --- a/unsupported/test/cxx11_tensor_device.cpp +++ b/unsupported/test/cxx11_tensor_device.cpp @@ -15,7 +15,7 @@ #include "main.h" -#include +#include using Eigen::Tensor; using Eigen::RowMajor; @@ -39,8 +39,12 @@ struct CPUContext { // Context for evaluation on GPU struct GPUContext { - GPUContext(const Eigen::TensorMap >& in1, Eigen::TensorMap >& in2, Eigen::TensorMap >& out) : in1_(in1), in2_(in2), out_(out) { } - + GPUContext(const Eigen::TensorMap >& in1, Eigen::TensorMap >& in2, Eigen::TensorMap >& out) : in1_(in1), in2_(in2), out_(out), gpu_device_(&stream_) { + cudaStreamCreate(&stream_); + } + ~GPUContext() { + cudaStreamDestroy(stream_); + } const Eigen::TensorMap >& in1() const { return in1_; } const Eigen::TensorMap >& in2() const { return in2_; } Eigen::TensorDevice >, Eigen::GpuDevice> out() { return TensorDevice >, Eigen::GpuDevice>(gpu_device_, out_); } @@ -49,6 +53,7 @@ struct GPUContext { const Eigen::TensorMap >& in1_; const Eigen::TensorMap >& in2_; Eigen::TensorMap >& out_; + cudaStream_t stream_; Eigen::GpuDevice gpu_device_; }; @@ -57,7 +62,7 @@ struct GPUContext { template static void test_contextual_eval(Context* context) { - context->out() = context->in1() + context->in2() * 3.14f; + context->out() = context->in1() + context->in2() * 3.14f + context->in1().constant(2.718f); } static void test_cpu() { @@ -73,7 +78,7 @@ static void test_cpu() { for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 7; ++k) { - VERIFY_IS_APPROX(out(Eigen::array(i,j,k)), in1(Eigen::array(i,j,k)) + in2(Eigen::array(i,j,k)) * 3.14f); + VERIFY_IS_APPROX(out(Eigen::array(i,j,k)), in1(Eigen::array(i,j,k)) + in2(Eigen::array(i,j,k)) * 3.14f + 2.718f); } } } @@ -111,7 +116,7 @@ static void test_gpu() { for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 7; ++k) { - VERIFY_IS_APPROX(out(Eigen::array(i,j,k)), in1(Eigen::array(i,j,k)) + in2(Eigen::array(i,j,k)) * 3.14f); + VERIFY_IS_APPROX(out(Eigen::array(i,j,k)), in1(Eigen::array(i,j,k)) + in2(Eigen::array(i,j,k)) * 3.14f + 2.718f); } } } diff --git a/unsupported/test/cxx11_tensor_expr.cpp b/unsupported/test/cxx11_tensor_expr.cpp index e0124da8c..e85fcbfa9 100644 --- a/unsupported/test/cxx11_tensor_expr.cpp +++ b/unsupported/test/cxx11_tensor_expr.cpp @@ -28,10 +28,10 @@ static void test_1d() float data3[6]; TensorMap> vec3(data3, 6); - vec3 = vec1.cwiseSqrt(); + vec3 = vec1.sqrt(); float data4[6]; TensorMap> vec4(data4, 6); - vec4 = vec2.cwiseSqrt(); + vec4 = vec2.square(); VERIFY_IS_APPROX(vec3(0), sqrtf(4.0)); VERIFY_IS_APPROX(vec3(1), sqrtf(8.0)); @@ -40,12 +40,12 @@ static void test_1d() VERIFY_IS_APPROX(vec3(4), sqrtf(23.0)); VERIFY_IS_APPROX(vec3(5), sqrtf(42.0)); - VERIFY_IS_APPROX(vec4(0), sqrtf(0.0)); - VERIFY_IS_APPROX(vec4(1), sqrtf(1.0)); - VERIFY_IS_APPROX(vec4(2), sqrtf(2.0)); - VERIFY_IS_APPROX(vec4(3), sqrtf(3.0)); - VERIFY_IS_APPROX(vec4(4), sqrtf(4.0)); - VERIFY_IS_APPROX(vec4(5), sqrtf(5.0)); + VERIFY_IS_APPROX(vec4(0), 0.0f); + VERIFY_IS_APPROX(vec4(1), 1.0f); + VERIFY_IS_APPROX(vec4(2), 2.0f * 2.0f); + VERIFY_IS_APPROX(vec4(3), 3.0f * 3.0f); + VERIFY_IS_APPROX(vec4(4), 4.0f * 4.0f); + VERIFY_IS_APPROX(vec4(5), 5.0f * 5.0f); vec3 = vec1 + vec2; VERIFY_IS_APPROX(vec3(0), 4.0f + 0.0f); @@ -79,8 +79,8 @@ static void test_2d() Tensor mat3(2,3); Tensor mat4(2,3); - mat3 = mat1.cwiseAbs(); - mat4 = mat2.cwiseAbs(); + mat3 = mat1.abs(); + mat4 = mat2.abs(); VERIFY_IS_APPROX(mat3(0,0), 0.0f); VERIFY_IS_APPROX(mat3(0,1), 1.0f); @@ -102,7 +102,7 @@ static void test_3d() Tensor mat1(2,3,7); Tensor mat2(2,3,7); - float val = 0.0; + float val = 1.0; for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 7; ++k) { @@ -118,28 +118,147 @@ static void test_3d() Tensor mat4(2,3,7); mat4 = mat2 * 3.14f; Tensor mat5(2,3,7); - mat5 = mat1.cwiseSqrt().cwiseSqrt(); + mat5 = mat1.inverse().log(); Tensor mat6(2,3,7); - mat6 = mat2.cwiseSqrt() * 3.14f; + mat6 = mat2.pow(0.5f) * 3.14f; + Tensor mat7(2,3,7); + mat7 = mat1.cwiseMax(mat5 * 2.0f).exp(); + Tensor mat8(2,3,7); + mat8 = (-mat2).exp() * 3.14f; - val = 0.0; + val = 1.0; for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 7; ++k) { VERIFY_IS_APPROX(mat3(i,j,k), val + val); VERIFY_IS_APPROX(mat4(i,j,k), val * 3.14f); - VERIFY_IS_APPROX(mat5(i,j,k), sqrtf(sqrtf(val))); + VERIFY_IS_APPROX(mat5(i,j,k), logf(1.0f/val)); VERIFY_IS_APPROX(mat6(i,j,k), sqrtf(val) * 3.14f); + VERIFY_IS_APPROX(mat7(i,j,k), expf((std::max)(val, mat5(i,j,k) * 2.0f))); + VERIFY_IS_APPROX(mat8(i,j,k), expf(-val) * 3.14f); val += 1.0; } } } } +static void test_constants() +{ + Tensor mat1(2,3,7); + Tensor mat2(2,3,7); + Tensor mat3(2,3,7); + + float val = 1.0; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + mat1(i,j,k) = val; + val += 1.0; + } + } + } + mat2 = mat1.constant(3.14f); + mat3 = mat1.cwiseMax(7.3f).exp(); + + val = 1.0; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_APPROX(mat2(i,j,k), 3.14f); + VERIFY_IS_APPROX(mat3(i,j,k), expf((std::max)(val, 7.3f))); + val += 1.0; + } + } + } +} + + +static void test_functors() +{ + Tensor mat1(2,3,7); + Tensor mat2(2,3,7); + Tensor mat3(2,3,7); + + float val = 1.0; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + mat1(i,j,k) = val; + val += 1.0; + } + } + } + mat2 = mat1.inverse().unaryExpr(&asinf); + mat3 = mat1.unaryExpr(&tanhf); + + val = 1.0; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_APPROX(mat2(i,j,k), asinf(1.0f / mat1(i,j,k))); + VERIFY_IS_APPROX(mat3(i,j,k), tanhf(mat1(i,j,k))); + val += 1.0; + } + } + } +} + +static void test_type_casting() +{ + Tensor mat1(2,3,7); + Tensor mat2(2,3,7); + Tensor mat3(2,3,7); + mat1.setRandom(); + mat2.setRandom(); + + mat3 = mat1.template cast(); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_APPROX(mat3(i,j,k), mat1(i,j,k) ? 1.0 : 0.0); + } + } + } + + mat3 = mat2.template cast(); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_APPROX(mat3(i,j,k), static_cast(mat2(i,j,k))); + } + } + } +} + +static void test_select() +{ + Tensor selector(2,3,7); + Tensor mat1(2,3,7); + Tensor mat2(2,3,7); + Tensor result(2,3,7); + + selector.setRandom(); + mat1.setRandom(); + mat2.setRandom(); + result = (selector > selector.constant(0.5f)).select(mat1, mat2); + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_APPROX(result(i,j,k), (selector(i,j,k) > 0.5f) ? mat1(i,j,k) : mat2(i,j,k)); + } + } + } +} + void test_cxx11_tensor_expr() { CALL_SUBTEST(test_1d()); CALL_SUBTEST(test_2d()); CALL_SUBTEST(test_3d()); + CALL_SUBTEST(test_constants()); + CALL_SUBTEST(test_functors()); + CALL_SUBTEST(test_type_casting()); + CALL_SUBTEST(test_select()); } diff --git a/unsupported/test/cxx11_tensor_fixed_size.cpp b/unsupported/test/cxx11_tensor_fixed_size.cpp index 214f6951d..d270486f2 100644 --- a/unsupported/test/cxx11_tensor_fixed_size.cpp +++ b/unsupported/test/cxx11_tensor_fixed_size.cpp @@ -33,10 +33,10 @@ static void test_1d() float data3[6]; TensorMap > > vec3(data3, 6); - vec3 = vec1.cwiseSqrt(); + vec3 = vec1.sqrt(); float data4[6]; TensorMap, RowMajor> > vec4(data4, 6); - vec4 = vec2.cwiseSqrt(); + vec4 = vec2.sqrt(); VERIFY_IS_EQUAL((vec3.size()), 6); // VERIFY_IS_EQUAL((vec3.dimensions()[0]), 6); @@ -92,8 +92,8 @@ static void test_2d() TensorFixedSize> mat3; TensorFixedSize, RowMajor> mat4; - mat3 = mat1.cwiseAbs(); - mat4 = mat2.cwiseAbs(); + mat3 = mat1.abs(); + mat4 = mat2.abs(); VERIFY_IS_EQUAL((mat3.size()), 2*3); // VERIFY_IS_EQUAL((mat3.dimension(0)), 2); @@ -136,9 +136,9 @@ static void test_3d() } TensorFixedSize > mat3; - mat3 = mat1.cwiseSqrt(); + mat3 = mat1.sqrt(); TensorFixedSize, RowMajor> mat4; - mat4 = mat2.cwiseSqrt(); + mat4 = mat2.sqrt(); VERIFY_IS_EQUAL((mat3.size()), 2*3*7); // VERIFY_IS_EQUAL((mat3.dimension(0)), 2); @@ -173,7 +173,7 @@ static void test_array() } TensorFixedSize > mat3; - mat3 = mat1.cwisePow(3.5f); + mat3 = mat1.pow(3.5f); val = 0.0; for (int i = 0; i < 2; ++i) { diff --git a/unsupported/test/cxx11_tensor_thread_pool.cpp b/unsupported/test/cxx11_tensor_thread_pool.cpp index c9de71da3..b371e8a71 100644 --- a/unsupported/test/cxx11_tensor_thread_pool.cpp +++ b/unsupported/test/cxx11_tensor_thread_pool.cpp @@ -12,6 +12,7 @@ #include "main.h" #include +#include "thread/threadpool.h" using Eigen::Tensor; @@ -24,8 +25,10 @@ void test_cxx11_tensor_thread_pool() in1.setRandom(); in2.setRandom(); - Eigen::ThreadPoolDevice thread_pool_device(3); - out.device(thread_pool_device) = in1 + in2 * 3.14; + ThreadPool thread_pool(2); + thread_pool.StartWorkers(); + Eigen::ThreadPoolDevice thread_pool_device(&thread_pool, 3); + out.device(thread_pool_device) = in1 + in2 * 3.14f; for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { -- cgit v1.2.3 From fe102248ac8f78e33064caeb5cdea6fc41af637c Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 9 Jun 2014 09:19:21 -0700 Subject: Fixed the threadpool test --- unsupported/test/cxx11_tensor_thread_pool.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'unsupported/test/cxx11_tensor_thread_pool.cpp') diff --git a/unsupported/test/cxx11_tensor_thread_pool.cpp b/unsupported/test/cxx11_tensor_thread_pool.cpp index b371e8a71..2e67b2064 100644 --- a/unsupported/test/cxx11_tensor_thread_pool.cpp +++ b/unsupported/test/cxx11_tensor_thread_pool.cpp @@ -12,7 +12,6 @@ #include "main.h" #include -#include "thread/threadpool.h" using Eigen::Tensor; @@ -25,9 +24,7 @@ void test_cxx11_tensor_thread_pool() in1.setRandom(); in2.setRandom(); - ThreadPool thread_pool(2); - thread_pool.StartWorkers(); - Eigen::ThreadPoolDevice thread_pool_device(&thread_pool, 3); + Eigen::ThreadPoolDevice thread_pool_device(3); out.device(thread_pool_device) = in1 + in2 * 3.14f; for (int i = 0; i < 2; ++i) { -- cgit v1.2.3 From a991f94c0e5c51555875564ce58681a82d07cd69 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 10 Oct 2014 15:20:37 -0700 Subject: Fixed the thread pool test --- test/main.h | 4 ++-- unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h | 2 +- unsupported/test/CMakeLists.txt | 2 +- unsupported/test/cxx11_tensor_thread_pool.cpp | 8 ++++---- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'unsupported/test/cxx11_tensor_thread_pool.cpp') diff --git a/test/main.h b/test/main.h index b504970f3..9cb41c828 100644 --- a/test/main.h +++ b/test/main.h @@ -47,8 +47,8 @@ // protected by parenthesis against macro expansion, the min()/max() macros // are defined here and any not-parenthesized min/max call will cause a // compiler error. -#define min(A,B) please_protect_your_min_with_parentheses -#define max(A,B) please_protect_your_max_with_parentheses +//#define min(A,B) please_protect_your_min_with_parentheses +//#define max(A,B) please_protect_your_max_with_parentheses #define FORBIDDEN_IDENTIFIER (this_identifier_is_forbidden_to_avoid_clashes) this_identifier_is_forbidden_to_avoid_clashes // B0 is defined in POSIX header termios.h diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index faf965df8..84768ca09 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -131,7 +131,7 @@ class TensorExecutor const Index numblocks = size / blocksize; Index i = 0; - vector > results; + std::vector > results; results.reserve(numblocks); for (int i = 0; i < numblocks; ++i) { results.push_back(std::async(std::launch::async, &EvalRange::run, &evaluator, i*blocksize, (i+1)*blocksize)); diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 75423f516..1c4d0838a 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -122,5 +122,5 @@ if(EIGEN_TEST_CXX11) # ei_add_test(cxx11_tensor_shuffling "-std=c++0x") ei_add_test(cxx11_tensor_striding "-std=c++0x") # ei_add_test(cxx11_tensor_device "-std=c++0x") -# ei_add_test(cxx11_tensor_thread_pool "-std=c++0x") + ei_add_test(cxx11_tensor_thread_pool "-std=c++0x") endif() diff --git a/unsupported/test/cxx11_tensor_thread_pool.cpp b/unsupported/test/cxx11_tensor_thread_pool.cpp index 2e67b2064..e02d8e4be 100644 --- a/unsupported/test/cxx11_tensor_thread_pool.cpp +++ b/unsupported/test/cxx11_tensor_thread_pool.cpp @@ -17,9 +17,9 @@ using Eigen::Tensor; void test_cxx11_tensor_thread_pool() { - Eigen::Tensor in1(Eigen::array(2,3,7)); - Eigen::Tensor in2(Eigen::array(2,3,7)); - Eigen::Tensor out(Eigen::array(2,3,7)); + Eigen::Tensor in1(2,3,7); + Eigen::Tensor in2(2,3,7); + Eigen::Tensor out(2,3,7); in1.setRandom(); in2.setRandom(); @@ -30,7 +30,7 @@ void test_cxx11_tensor_thread_pool() for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 7; ++k) { - VERIFY_IS_APPROX(out(Eigen::array(i,j,k)), in1(Eigen::array(i,j,k)) + in2(Eigen::array(i,j,k)) * 3.14f); + VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) + in2(i,j,k) * 3.14f); } } } -- cgit v1.2.3 From 99d75235a9567865d2c070a2840d54c8a5ad0f43 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 13 Oct 2014 17:02:09 -0700 Subject: Misc improvements and cleanups --- Eigen/src/Core/GenericPacketMath.h | 15 +- unsupported/Eigen/CXX11/Tensor | 4 + .../Eigen/CXX11/src/Core/util/CXX11Workarounds.h | 5 + .../Eigen/CXX11/src/Core/util/EmulateCXX11Meta.h | 101 ++++++++- unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h | 4 +- unsupported/Eigen/CXX11/src/Tensor/TensorBase.h | 8 +- .../Eigen/CXX11/src/Tensor/TensorBroadcasting.h | 8 +- .../Eigen/CXX11/src/Tensor/TensorConvolution.h | 12 +- unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h | 35 ++++ .../Eigen/CXX11/src/Tensor/TensorDeviceType.h | 73 ++++--- .../Eigen/CXX11/src/Tensor/TensorDimensions.h | 2 +- unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h | 2 +- .../Eigen/CXX11/src/Tensor/TensorEvaluator.h | 20 +- .../Eigen/CXX11/src/Tensor/TensorExecutor.h | 36 +++- .../Eigen/CXX11/src/Tensor/TensorFixedSize.h | 4 +- unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h | 26 ++- unsupported/Eigen/CXX11/src/Tensor/TensorMap.h | 22 +- unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h | 4 +- .../Eigen/CXX11/src/Tensor/TensorShuffling.h | 4 +- unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h | 9 +- .../Eigen/CXX11/src/Tensor/TensorStriding.h | 61 ++++-- unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h | 32 +-- unsupported/test/CMakeLists.txt | 1 + unsupported/test/cxx11_tensor_assign.cpp | 35 +++- unsupported/test/cxx11_tensor_convolution.cpp | 70 +++++++ unsupported/test/cxx11_tensor_device.cpp | 27 +++ unsupported/test/cxx11_tensor_morphing.cpp | 5 +- unsupported/test/cxx11_tensor_of_complex.cpp | 64 ++++++ unsupported/test/cxx11_tensor_thread_pool.cpp | 232 ++++++++++++++++++++- 29 files changed, 780 insertions(+), 141 deletions(-) create mode 100644 unsupported/test/cxx11_tensor_of_complex.cpp (limited to 'unsupported/test/cxx11_tensor_thread_pool.cpp') diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index e6fea5bba..3ef3475c7 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -359,7 +359,7 @@ pmadd(const Packet& a, /** \internal \returns a packet version of \a *from. * If LoadMode equals #Aligned, \a from must be 16 bytes aligned */ template -inline Packet ploadt(const typename unpacket_traits::type* from) +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits::type* from) { if(LoadMode == Aligned) return pload(from); @@ -370,7 +370,7 @@ inline Packet ploadt(const typename unpacket_traits::type* from) /** \internal copy the packet \a from to \a *to. * If StoreMode equals #Aligned, \a to must be 16 bytes aligned */ template -inline void pstoret(Scalar* to, const Packet& from) +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from) { if(LoadMode == Aligned) pstore(to, from); @@ -378,6 +378,17 @@ inline void pstoret(Scalar* to, const Packet& from) pstoreu(to, from); } +/** \internal \returns a packet version of \a *from. + * Unlike ploadt, ploadt_ro takes advantage of the read-only memory path on the + * hardware if available to speedup the loading of data that won't be modified + * by the current computation. + */ +template +inline Packet ploadt_ro(const typename unpacket_traits::type* from) +{ + return ploadt(from); +} + /** \internal default implementation of palign() allowing partial specialization */ template struct palign_impl diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor index 0dac95e45..2137f4276 100644 --- a/unsupported/Eigen/CXX11/Tensor +++ b/unsupported/Eigen/CXX11/Tensor @@ -30,6 +30,10 @@ #include #include +#ifdef EIGEN_USE_THREADS +#include +#endif + #if defined(EIGEN_USE_GPU) && defined(__CUDACC__) #include #endif diff --git a/unsupported/Eigen/CXX11/src/Core/util/CXX11Workarounds.h b/unsupported/Eigen/CXX11/src/Core/util/CXX11Workarounds.h index 227522ecb..e30eb6ad8 100644 --- a/unsupported/Eigen/CXX11/src/Core/util/CXX11Workarounds.h +++ b/unsupported/Eigen/CXX11/src/Core/util/CXX11Workarounds.h @@ -66,6 +66,11 @@ template constexpr inline T& array_ template constexpr inline T&& array_get(std::array&& a) { return (T&&) STD_GET_ARR_HACK; } template constexpr inline T const& array_get(std::array const& a) { return (T const&) STD_GET_ARR_HACK; } +template constexpr inline T& array_get(std::vector& a) { return a[I]; } +template constexpr inline T&& array_get(std::vector&& a) { return a[I]; } +template constexpr inline T const& array_get(std::vector const& a) { return a[I]; } + + #undef STD_GET_ARR_HACK template struct array_size; diff --git a/unsupported/Eigen/CXX11/src/Core/util/EmulateCXX11Meta.h b/unsupported/Eigen/CXX11/src/Core/util/EmulateCXX11Meta.h index 4c6b95773..e45d0a3b1 100644 --- a/unsupported/Eigen/CXX11/src/Core/util/EmulateCXX11Meta.h +++ b/unsupported/Eigen/CXX11/src/Core/util/EmulateCXX11Meta.h @@ -48,7 +48,8 @@ template class array { values[2] = v3; } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4) { + EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, + const T& v4) { EIGEN_STATIC_ASSERT(n==4, YOU_MADE_A_PROGRAMMING_MISTAKE) values[0] = v1; values[1] = v2; @@ -56,7 +57,8 @@ template class array { values[3] = v4; } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4, const T& v5) { + EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4, + const T& v5) { EIGEN_STATIC_ASSERT(n==5, YOU_MADE_A_PROGRAMMING_MISTAKE) values[0] = v1; values[1] = v2; @@ -64,6 +66,43 @@ template class array { values[3] = v4; values[4] = v5; } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4, + const T& v5, const T& v6) { + EIGEN_STATIC_ASSERT(n==6, YOU_MADE_A_PROGRAMMING_MISTAKE) + values[0] = v1; + values[1] = v2; + values[2] = v3; + values[3] = v4; + values[4] = v5; + values[5] = v6; + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4, + const T& v5, const T& v6, const T& v7) { + EIGEN_STATIC_ASSERT(n==7, YOU_MADE_A_PROGRAMMING_MISTAKE) + values[0] = v1; + values[1] = v2; + values[2] = v3; + values[3] = v4; + values[4] = v5; + values[5] = v6; + values[6] = v7; + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE array( + const T& v1, const T& v2, const T& v3, const T& v4, + const T& v5, const T& v6, const T& v7, const T& v8) { + EIGEN_STATIC_ASSERT(n==8, YOU_MADE_A_PROGRAMMING_MISTAKE) + values[0] = v1; + values[1] = v2; + values[2] = v3; + values[3] = v4; + values[4] = v5; + values[5] = v6; + values[6] = v7; + values[7] = v8; + } #ifdef EIGEN_HAS_VARIADIC_TEMPLATES array(std::initializer_list l) { @@ -93,9 +132,11 @@ template struct type_list { struct null_type { }; -template +template struct make_type_list { - typedef typename make_type_list::type tailresult; + typedef typename make_type_list::type tailresult; typedef type_list type; }; @@ -150,6 +191,23 @@ template struct gen_numeric_list_repeated { typedef typename make_type_list, type2val, type2val, type2val, type2val >::type type; }; +template struct gen_numeric_list_repeated { + typedef typename make_type_list, type2val, type2val, + type2val, type2val, type2val >::type type; +}; + +template struct gen_numeric_list_repeated { + typedef typename make_type_list, type2val, type2val, + type2val, type2val, type2val, + type2val >::type type; +}; + +template struct gen_numeric_list_repeated { + typedef typename make_type_list, type2val, type2val, + type2val, type2val, type2val, + type2val, type2val >::type type; +}; + template struct get; @@ -174,6 +232,7 @@ template <> struct arg_prod { static const int value = 1; }; + template array repeat(t v) { array array; @@ -190,6 +249,11 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Head::type array_get(const type_l return get >::value; } +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename NList::HeadType::type array_prod(const NList& l) { + return arg_prod::value; +}; + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE t array_prod(const array& a) { t prod = 1; @@ -201,6 +265,14 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE t array_prod(const array& /*a*/) { return 0; } +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE t array_prod(const std::vector& a) { + eigen_assert(a.size() > 0); + t prod = 1; + for (size_t i = 0; i < a.size(); ++i) { prod *= a[i]; } + return prod; +} + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T& array_get(array& a) { return a[I]; @@ -210,12 +282,31 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T& array_get(const array& a) { return a[I]; } +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T& array_get(std::vector& a) { + return a[I]; +} +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T& array_get(const std::vector& a) { + return a[I]; +} +template struct array_size; +template struct array_size > { + static const size_t value = N; +}; +template struct array_size; +template struct array_size& > { + static const size_t value = N; +}; template struct array_size; template struct array_size > { static const size_t value = N; }; - +template struct array_size; +template struct array_size& > { + static const size_t value = N; +}; struct sum_op { template static inline bool run(A a, B b) { return a + b; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h index 3bfe80c9e..e973c00d3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h @@ -131,8 +131,8 @@ struct TensorEvaluator, Device> m_leftImpl.coeffRef(i) = m_rightImpl.coeff(i); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalPacket(Index i) { - static const int LhsStoreMode = TensorEvaluator::IsAligned ? Aligned : Unaligned; - static const int RhsLoadMode = TensorEvaluator::IsAligned ? Aligned : Unaligned; + const int LhsStoreMode = TensorEvaluator::IsAligned ? Aligned : Unaligned; + const int RhsLoadMode = TensorEvaluator::IsAligned ? Aligned : Unaligned; m_leftImpl.template writePacket(i, m_rightImpl.template packet(i)); } EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 27c10f64f..6018ecc66 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -30,6 +30,12 @@ class TensorBase typedef Scalar CoeffReturnType; typedef typename internal::packet_traits::type PacketReturnType; + // Dimensions + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index dimension(std::size_t n) const { return derived().dimensions()[n]; } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index size() const { return internal::array_prod(derived().dimensions()); } + // Nullary operators EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseNullaryOp, const Derived> @@ -187,7 +193,7 @@ class TensorBase } // Contractions. - typedef std::pair DimensionPair; + typedef Eigen::IndexPair DimensionPair; template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorContractionOp diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h index 3b2a9c8b9..0e55d4de1 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h @@ -48,7 +48,7 @@ struct nested, 1, typename eval -class TensorBroadcastingOp : public TensorBase, WriteAccessors> +class TensorBroadcastingOp : public TensorBase, ReadOnlyAccessors> { public: typedef typename Eigen::internal::traits::Scalar Scalar; @@ -91,7 +91,7 @@ struct TensorEvaluator, Device> PacketAccess = TensorEvaluator::PacketAccess, }; -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_impl(op.expression(), device) { const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); @@ -141,7 +141,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const D template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { - static const int packetSize = internal::unpacket_traits::size; + const int packetSize = internal::unpacket_traits::size; EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) eigen_assert(index+packetSize-1 < dimensions().TotalSize()); @@ -161,7 +161,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const D if (innermostLoc + packetSize <= m_impl.dimensions()[0]) { return m_impl.template packet(inputIndex); } else { - EIGEN_ALIGN_DEFAULT CoeffReturnType values[packetSize]; + EIGEN_ALIGN_DEFAULT typename internal::remove_const::type values[packetSize]; values[0] = m_impl.coeff(inputIndex); for (int i = 1; i < packetSize; ++i) { values[i] = coeff(originalIndex+i); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h index 4a5fd9c79..34bdd5309 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h @@ -872,11 +872,19 @@ struct TensorEvaluator + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(const Index index) const + { + eigen_assert(m_buf); + eigen_assert(index < m_dimensions.TotalSize()); + return internal::ploadt(m_buf+index); + } + private: // No assignment (copies are needed by the kernels) TensorEvaluator& operator = (const TensorEvaluator&); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h index 75519c9f5..649bdb308 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h @@ -38,6 +38,18 @@ template class TensorDevice { return *this; } + template + EIGEN_STRONG_INLINE TensorDevice& operator+=(const OtherDerived& other) { + typedef typename OtherDerived::Scalar Scalar; + typedef TensorCwiseBinaryOp, const ExpressionType, const OtherDerived> Sum; + Sum sum(m_expression, other); + typedef TensorAssignOp Assign; + Assign assign(m_expression, sum); + static const bool Vectorize = TensorEvaluator::PacketAccess; + internal::TensorExecutor::run(assign, m_device); + return *this; + } + protected: const DeviceType& m_device; ExpressionType& m_expression; @@ -58,6 +70,18 @@ template class TensorDevice + EIGEN_STRONG_INLINE TensorDevice& operator+=(const OtherDerived& other) { + typedef typename OtherDerived::Scalar Scalar; + typedef TensorCwiseBinaryOp, const ExpressionType, const OtherDerived> Sum; + Sum sum(m_expression, other); + typedef TensorAssignOp Assign; + Assign assign(m_expression, sum); + static const bool Vectorize = TensorEvaluator::PacketAccess; + internal::TensorExecutor::run(assign, m_device); + return *this; + } + protected: const ThreadPoolDevice& m_device; ExpressionType& m_expression; @@ -79,6 +103,17 @@ template class TensorDevice return *this; } + template + EIGEN_STRONG_INLINE TensorDevice& operator+=(const OtherDerived& other) { + typedef typename OtherDerived::Scalar Scalar; + typedef TensorCwiseBinaryOp, const ExpressionType, const OtherDerived> Sum; + Sum sum(m_expression, other); + typedef TensorAssignOp Assign; + Assign assign(m_expression, sum); + internal::TensorExecutor::run(assign, m_device); + return *this; + } + protected: const GpuDevice& m_device; ExpressionType m_expression; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h index fad342eab..5a6ff70e9 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h @@ -37,23 +37,41 @@ struct DefaultDevice { // Multiple cpu cores // We should really use a thread pool here but first we need to find a portable thread pool library. #ifdef EIGEN_USE_THREADS + +typedef std::future Future; + struct ThreadPoolDevice { - ThreadPoolDevice(/*ThreadPool* pool, */size_t num_cores) : /*pool_(pool), */num_threads_(num_cores) { } - size_t numThreads() const { return num_threads_; } + ThreadPoolDevice(/*ThreadPool* pool, */size_t num_cores) : num_threads_(num_cores) { } EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { return internal::aligned_malloc(num_bytes); } + EIGEN_STRONG_INLINE void deallocate(void* buffer) const { internal::aligned_free(buffer); } + EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { ::memcpy(dst, src, n); } + EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const { ::memset(buffer, c, n); } + EIGEN_STRONG_INLINE size_t numThreads() const { + return num_threads_; + } + + template + EIGEN_STRONG_INLINE Future enqueue(Function&& f, Args&&... args) const { + return std::async(std::launch::async, f, args...); + } + template + EIGEN_STRONG_INLINE void enqueueNoFuture(Function&& f, Args&&... args) const { + std::async(std::launch::async, f, args...); + } + private: // todo: NUMA, ... size_t num_threads_; @@ -63,41 +81,34 @@ struct ThreadPoolDevice { // GPU offloading #ifdef EIGEN_USE_GPU -static int m_numMultiProcessors = 0; -static int m_maxThreadsPerBlock = 0; -static int m_maxThreadsPerMultiProcessor = 0; +static cudaDeviceProp m_deviceProperties; +static bool m_devicePropInitialized = false; + +static void initializeDeviceProp() { + if (!m_devicePropInitialized) { + assert(cudaGetDeviceProperties(&m_deviceProperties, 0) == cudaSuccess); + m_devicePropInitialized = true; + } +} static inline int getNumCudaMultiProcessors() { - if (m_numMultiProcessors == 0) { - cudaDeviceProp deviceProp; - cudaGetDeviceProperties(&deviceProp, 0); - m_maxThreadsPerBlock = deviceProp.maxThreadsPerBlock; - m_maxThreadsPerMultiProcessor = deviceProp.maxThreadsPerMultiProcessor; - m_numMultiProcessors = deviceProp.multiProcessorCount; - } - return m_numMultiProcessors; + initializeDeviceProp(); + return m_deviceProperties.multiProcessorCount; } static inline int maxCudaThreadsPerBlock() { - if (m_maxThreadsPerBlock == 0) { - cudaDeviceProp deviceProp; - cudaGetDeviceProperties(&deviceProp, 0); - m_numMultiProcessors = deviceProp.multiProcessorCount; - m_maxThreadsPerMultiProcessor = deviceProp.maxThreadsPerMultiProcessor; - m_maxThreadsPerBlock = deviceProp.maxThreadsPerBlock; - } - return m_maxThreadsPerBlock; + initializeDeviceProp(); + return m_deviceProperties.maxThreadsPerBlock; } static inline int maxCudaThreadsPerMultiProcessor() { - if (m_maxThreadsPerBlock == 0) { - cudaDeviceProp deviceProp; - cudaGetDeviceProperties(&deviceProp, 0); - m_numMultiProcessors = deviceProp.multiProcessorCount; - m_maxThreadsPerBlock = deviceProp.maxThreadsPerBlock; - m_maxThreadsPerMultiProcessor = deviceProp.maxThreadsPerMultiProcessor; - } - return m_maxThreadsPerMultiProcessor; + initializeDeviceProp(); + return m_deviceProperties.maxThreadsPerMultiProcessor; +} +static inline int sharedMemPerBlock() { + initializeDeviceProp(); + return m_deviceProperties.sharedMemPerBlock; } + struct GpuDevice { // The cudastream is not owned: the caller is responsible for its initialization and eventual destruction. GpuDevice(const cudaStream_t* stream) : stream_(stream) { eigen_assert(stream); } @@ -141,8 +152,8 @@ struct GpuDevice { #endif } - EIGEN_STRONG_INLINE size_t numThreads() const { - // Fixme: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t numThreads() const { + // FIXME return 32; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h index 732c6b344..2dd8e274b 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h @@ -29,7 +29,7 @@ namespace Eigen { * \sa Tensor */ -// Can't use std::pairs on cuda devices +// Can't use std::pair on cuda devices template struct IndexPair { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE IndexPair() : first(0), second(0) { } EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE IndexPair(Index f, Index s) : first(f), second(s) { } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h index 587cbd5ca..ce9d73578 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h @@ -116,7 +116,7 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalScalar(Index i) { m_buffer[i] = m_impl.coeff(i); } - EIGEN_STRONG_INLINE void evalPacket(Index i) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalPacket(Index i) { internal::pstoret(m_buffer + i, m_impl.template packet::IsAligned ? Aligned : Unaligned>(i)); } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h index 0f969036c..e324ba8d2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -65,13 +65,13 @@ struct TensorEvaluator return m_data[index]; } - template EIGEN_STRONG_INLINE + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { return internal::ploadt(m_data + index); } - template EIGEN_STRONG_INLINE + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const Packet& x) { return internal::pstoret(m_data + index, x); @@ -113,13 +113,17 @@ struct TensorEvaluator EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { eigen_assert(m_data); +#ifdef __CUDA_ARCH__ + return __ldg(m_data+index); +#else return m_data[index]; +#endif } - template EIGEN_STRONG_INLINE + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { - return internal::ploadt(m_data + index); + return internal::ploadt_ro(m_data + index); } const Scalar* data() const { return m_data; } @@ -166,7 +170,7 @@ struct TensorEvaluator, Device> } template - EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { return m_functor.packetOp(index); } @@ -219,7 +223,7 @@ struct TensorEvaluator, Device> } template - EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { return m_functor.packetOp(m_argImpl.template packet(index)); } @@ -278,7 +282,7 @@ struct TensorEvaluator - EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { return m_functor.packetOp(m_leftImpl.template packet(index), m_rightImpl.template packet(index)); } @@ -340,7 +344,7 @@ struct TensorEvaluator return m_condImpl.coeff(index) ? m_thenImpl.coeff(index) : m_elseImpl.coeff(index); } template - PacketReturnType packet(Index index) const + EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const { static const int PacketSize = internal::unpacket_traits::size; internal::Selector select; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index 10f5a5ee7..01fa04c64 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -10,10 +10,6 @@ #ifndef EIGEN_CXX11_TENSOR_TENSOR_EXECUTOR_H #define EIGEN_CXX11_TENSOR_TENSOR_EXECUTOR_H -#ifdef EIGEN_USE_THREADS -#include -#endif - namespace Eigen { /** \class TensorExecutor @@ -62,7 +58,7 @@ class TensorExecutor { const Index size = array_prod(evaluator.dimensions()); static const int PacketSize = unpacket_traits::PacketReturnType>::size; - const int VectorizedSize = (size / PacketSize) * PacketSize; + const Index VectorizedSize = (size / PacketSize) * PacketSize; for (Index i = 0; i < VectorizedSize; i += PacketSize) { evaluator.evalPacket(i); @@ -131,10 +127,10 @@ class TensorExecutor const Index numblocks = size / blocksize; Index i = 0; - std::vector > results; + std::vector results; results.reserve(numblocks); for (int i = 0; i < numblocks; ++i) { - results.push_back(std::async(std::launch::async, &EvalRange::run, &evaluator, i*blocksize, (i+1)*blocksize)); + results.push_back(device.enqueue(&EvalRange::run, &evaluator, i*blocksize, (i+1)*blocksize)); } for (int i = 0; i < numblocks; ++i) { @@ -154,11 +150,31 @@ class TensorExecutor // GPU: the evaluation of the expression is offloaded to a GPU. #if defined(EIGEN_USE_GPU) && defined(__CUDACC__) template -__global__ void EigenMetaKernel(Evaluator eval, unsigned int size) { +__global__ void +__launch_bounds__(1024) +EigenMetaKernel(Evaluator eval, unsigned int size) { + const int first_index = blockIdx.x * blockDim.x + threadIdx.x; const int step_size = blockDim.x * gridDim.x; - for (int i = first_index; i < size; i += step_size) { - eval.evalScalar(i); + + if (!Evaluator::PacketAccess || !Evaluator::IsAligned) { + // Use the scalar path + for (int i = first_index; i < size; i += step_size) { + eval.evalScalar(i); + } + } + else { + // Use the vector path + const int PacketSize = unpacket_traits::size; + const int vectorized_step_size = step_size * PacketSize; + const int vectorized_size = (size / PacketSize) * PacketSize; + int i = first_index * PacketSize; + for ( ; i < vectorized_size; i += vectorized_step_size) { + eval.evalPacket(i); + } + for ( ; i < size; i += step_size) { + eval.evalScalar(i); + } } } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h index 4d7f9e1fd..a753c5a48 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h @@ -17,7 +17,7 @@ namespace Eigen { * * \brief The fixed sized version of the tensor class. * - * The fixes sized equivalent of + * The fixed sized equivalent of * Eigen::Tensor t(3, 5, 7); * is * Eigen::TensorFixedSize> t; @@ -41,7 +41,7 @@ class TensorFixedSize : public TensorBase::size > 1), }; typedef Dimensions_ Dimensions; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h index cf97031be..2714117ab 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h @@ -31,30 +31,34 @@ namespace internal { template struct TensorIntDivisor { public: - TensorIntDivisor() { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() { multiplier = 0; shift1 = 0; shift2 = 0; } // Must have 1 <= divider <= 2^31-1 - TensorIntDivisor(const T divider) { - static const int N = 32; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor(const T divider) { + const int N = 32; eigen_assert(divider > 0); eigen_assert(divider <= (1<<(N-1)) - 1); // fast ln2 +#ifndef __CUDA_ARCH__ const int leading_zeros = __builtin_clz(divider); - const int l = N - (leading_zeros+1); - - multiplier = (static_cast(1) << (N+l)) / divider - (static_cast(1) << N) + 1; - shift1 = (std::min)(1, l); - shift2 = (std::max)(0, l-1); +#else + const int leading_zeros = __clz(divider); +#endif + const int log_div = N - (leading_zeros+1); + + multiplier = (static_cast(1) << (N+log_div)) / divider - (static_cast(1) << N) + 1; + shift1 = log_div > 1 ? 1 : log_div; + shift2 = log_div > 1 ? log_div-1 : 0; } // Must have 0 <= numerator <= 2^32-1 - T divide(const T numerator) const { - static const int N = 32; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T divide(const T numerator) const { + const int N = 32; eigen_assert(numerator >= 0); eigen_assert(numerator <= (1ull< -static T operator / (const T& numerator, const TensorIntDivisor& divisor) { +static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor& divisor) { return divisor.divide(numerator); } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h index 04849dd9f..2c0d2cd0f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h @@ -42,26 +42,25 @@ template class TensorMap : public Tensor static const int Options = Options_; - static const std::size_t NumIndices = PlainObjectType::NumIndices; + static const Index NumIndices = PlainObjectType::NumIndices; typedef typename PlainObjectType::Dimensions Dimensions; - enum { - IsAligned = bool(EIGEN_ALIGN) && ((int(Options_)&Aligned)==Aligned), - PacketAccess = true, + IsAligned = ((int(Options_)&Aligned)==Aligned), + PacketAccess = (internal::packet_traits::size > 1), }; #ifdef EIGEN_HAS_VARIADIC_TEMPLATES template EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index firstDimension, IndexTypes... otherDimensions) : m_data(dataPtr), m_dimensions(array({{firstDimension, otherDimensions...}})) { + EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index firstDimension, IndexTypes... otherDimensions) : m_data(dataPtr), m_dimensions(firstDimension, otherDimensions...) { // The number of dimensions used to construct a tensor must be equal to the rank of the tensor. - EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + EIGEN_STATIC_ASSERT((sizeof...(otherDimensions) + 1 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE) } #else EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index firstDimension) : m_data(dataPtr), m_dimensions(array(firstDimension)) { + EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index firstDimension) : m_data(dataPtr), m_dimensions(firstDimension) { // The number of dimensions used to construct a tensor must be equal to the rank of the tensor. - EIGEN_STATIC_ASSERT(1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + EIGEN_STATIC_ASSERT((1 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE) } #endif @@ -176,12 +175,13 @@ template class TensorMap : public Tensor template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) { - static_assert(sizeof...(otherIndices) + 1 == NumIndices, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor."); + static_assert(sizeof...(otherIndices) + 1 == NumIndices || NumIndices == Dynamic, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor."); + const std::size_t NumDims = sizeof...(otherIndices) + 1; if (PlainObjectType::Options&RowMajor) { - const Index index = m_dimensions.IndexOfRowMajor(array{{firstIndex, otherIndices...}}); + const Index index = m_dimensions.IndexOfRowMajor(array{{firstIndex, otherIndices...}}); return m_data[index]; } else { - const Index index = m_dimensions.IndexOfColMajor(array{{firstIndex, otherIndices...}}); + const Index index = m_dimensions.IndexOfColMajor(array{{firstIndex, otherIndices...}}); return m_data[index]; } } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h index 7da89458f..8da6e0f26 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h @@ -144,7 +144,7 @@ struct TensorEvaluator, Device template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { - static const int packetSize = internal::unpacket_traits::size; + const int packetSize = internal::unpacket_traits::size; EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) eigen_assert(index+packetSize-1 < dimensions().TotalSize()); @@ -206,7 +206,7 @@ struct TensorEvaluator, Device EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const { - static const int packetSize = internal::unpacket_traits::size; + const int packetSize = internal::unpacket_traits::size; EIGEN_ALIGN_DEFAULT typename internal::remove_const::type values[packetSize]; for (int i = 0; i < packetSize; ++i) { values[i] = coeff(index+i); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h index f7e7fc107..7e0063626 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h @@ -97,7 +97,7 @@ struct TensorEvaluator, Device> typedef typename XprType::Scalar Scalar; enum { - IsAligned = true, + IsAligned = false, PacketAccess = (internal::packet_traits::size > 1), }; @@ -194,7 +194,7 @@ struct TensorEvaluator, Device> typedef typename XprType::Scalar Scalar; enum { - IsAligned = true, + IsAligned = false, PacketAccess = (internal::packet_traits::size > 1), }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h b/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h index 0c4f8a3d6..aaec39756 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h @@ -30,11 +30,11 @@ namespace Eigen { * * \sa Tensor */ -template class TensorStorage; +template class TensorStorage; // Pure fixed-size storage -template +template class TensorStorage { private: @@ -62,7 +62,7 @@ class TensorStorage // pure-dynamic, but without specification of all dimensions explicitly -template +template class TensorStorage : public TensorStorage::type> { @@ -79,7 +79,7 @@ class TensorStorage }; // pure dynamic -template +template class TensorStorage::type> { T *m_data; @@ -140,6 +140,7 @@ class TensorStorage, 1, typename eval -class TensorStridingOp : public TensorBase, WriteAccessors> +class TensorStridingOp : public TensorBase > { public: typedef typename Eigen::internal::traits::Scalar Scalar; @@ -97,7 +97,7 @@ struct TensorEvaluator, Device> enum { IsAligned = /*TensorEvaluator::IsAligned*/false, - PacketAccess = /*TensorEvaluator::PacketAccess*/false, + PacketAccess = TensorEvaluator::PacketAccess, }; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) @@ -109,28 +109,23 @@ struct TensorEvaluator, Device> } const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); - for (int i = 0; i < NumDims; ++i) { - if (i > 0) { - m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; - m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; - } else { - m_inputStrides[0] = 1; - m_outputStrides[0] = 1; - } - } - for (int i = 0; i < NumDims; ++i) { - m_inputStrides[i] *= op.strides()[i]; + m_outputStrides[0] = 1; + m_inputStrides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; + m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; + m_inputStrides[i-1] *= op.strides()[i-1]; } + m_inputStrides[NumDims-1] *= op.strides()[NumDims-1]; } - // typedef typename XprType::Index Index; typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::PacketReturnType PacketReturnType; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { m_impl.evalSubExprsIfNeeded(NULL); return true; } @@ -150,16 +145,44 @@ struct TensorEvaluator, Device> return m_impl.coeff(inputIndex); } - /* template + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { - return m_impl.template packet(index); - }*/ + const int packetSize = internal::unpacket_traits::size; + EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+packetSize-1 < dimensions().TotalSize()); + + Index inputIndices[] = {0, 0}; + Index indices[] = {index, index + packetSize - 1}; + for (int i = NumDims - 1; i > 0; --i) { + const Index idx0 = indices[0] / m_outputStrides[i]; + const Index idx1 = indices[1] / m_outputStrides[i]; + inputIndices[0] += idx0 * m_inputStrides[i]; + inputIndices[1] += idx1 * m_inputStrides[i]; + indices[0] -= idx0 * m_outputStrides[i]; + indices[1] -= idx1 * m_outputStrides[i]; + } + inputIndices[0] += indices[0] * m_inputStrides[0]; + inputIndices[1] += indices[1] * m_inputStrides[0]; + if (inputIndices[1] - inputIndices[0] == packetSize - 1) { + PacketReturnType rslt = m_impl.template packet(inputIndices[0]); + return rslt; + } + else { + EIGEN_ALIGN_DEFAULT typename internal::remove_const::type values[packetSize]; + values[0] = m_impl.coeff(inputIndices[0]); + values[packetSize-1] = m_impl.coeff(inputIndices[1]); + for (int i = 1; i < packetSize-1; ++i) { + values[i] = coeff(index+i); + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + } Scalar* data() const { return NULL; } protected: - // Strides m_strides; Dimensions m_dimensions; array m_outputStrides; array m_inputStrides; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h index 40f805741..5940a8cf1 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h @@ -70,14 +70,18 @@ struct traits > }; -template -struct traits > +template +struct traits > : public traits { typedef traits BaseTraits; typedef typename BaseTraits::Scalar Scalar; typedef typename BaseTraits::StorageKind StorageKind; typedef typename BaseTraits::Index Index; + enum { + Options = Options_, + Flags = ((BaseTraits::Flags | LvalueBit) & ~AlignedBit) | (Options&Aligned ? AlignedBit : 0), + }; }; @@ -105,16 +109,16 @@ struct eval, Eigen::Dense> typedef const TensorFixedSize& type; }; -template -struct eval, Eigen::Dense> +template +struct eval, Eigen::Dense> { - typedef const TensorMap& type; + typedef const TensorMap& type; }; -template -struct eval, Eigen::Dense> +template +struct eval, Eigen::Dense> { - typedef const TensorMap& type; + typedef const TensorMap& type; }; template @@ -141,16 +145,16 @@ struct nested, 1, typename e typedef const TensorFixedSize& type; }; -template -struct nested, 1, typename eval >::type> +template +struct nested, 1, typename eval >::type> { - typedef const TensorMap& type; + typedef const TensorMap& type; }; -template -struct nested, 1, typename eval >::type> +template +struct nested, 1, typename eval >::type> { - typedef const TensorMap& type; + typedef const TensorMap& type; }; } // end namespace internal diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index d6c435947..a7ef2b402 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -110,6 +110,7 @@ if(EIGEN_TEST_CXX11) # ei_add_test(cxx11_tensor_fixed_size "-std=c++0x") ei_add_test(cxx11_tensor_const "-std=c++0x") ei_add_test(cxx11_tensor_of_const_values "-std=c++0x") + ei_add_test(cxx11_tensor_of_complex "-std=c++0x") ei_add_test(cxx11_tensor_of_strings "-std=c++0x") ei_add_test(cxx11_tensor_intdiv "-std=c++0x") ei_add_test(cxx11_tensor_lvalue "-std=c++0x") diff --git a/unsupported/test/cxx11_tensor_assign.cpp b/unsupported/test/cxx11_tensor_assign.cpp index f2b126413..0ac3f9bf9 100644 --- a/unsupported/test/cxx11_tensor_assign.cpp +++ b/unsupported/test/cxx11_tensor_assign.cpp @@ -253,6 +253,39 @@ static void test_auto_resize() } +static void test_compound_assign() +{ + Tensor start_tensor(10); + Tensor offset_tensor(10); + start_tensor.setRandom(); + offset_tensor.setRandom(); + + Tensor tensor = start_tensor; + tensor += offset_tensor; + for (int i = 0; i < 10; ++i) { + VERIFY_IS_EQUAL(tensor(i), start_tensor(i) + offset_tensor(i)); + } + + tensor = start_tensor; + tensor -= offset_tensor; + for (int i = 0; i < 10; ++i) { + VERIFY_IS_EQUAL(tensor(i), start_tensor(i) - offset_tensor(i)); + } + + tensor = start_tensor; + tensor *= offset_tensor; + for (int i = 0; i < 10; ++i) { + VERIFY_IS_EQUAL(tensor(i), start_tensor(i) * offset_tensor(i)); + } + + tensor = start_tensor; + tensor /= offset_tensor; + for (int i = 0; i < 10; ++i) { + VERIFY_IS_EQUAL(tensor(i), start_tensor(i) / offset_tensor(i)); + } +} + + void test_cxx11_tensor_assign() { CALL_SUBTEST(test_1d()); @@ -260,5 +293,5 @@ void test_cxx11_tensor_assign() CALL_SUBTEST(test_3d()); CALL_SUBTEST(test_same_type()); CALL_SUBTEST(test_auto_resize()); - + CALL_SUBTEST(test_compound_assign()); } diff --git a/unsupported/test/cxx11_tensor_convolution.cpp b/unsupported/test/cxx11_tensor_convolution.cpp index bafe73edd..4672db463 100644 --- a/unsupported/test/cxx11_tensor_convolution.cpp +++ b/unsupported/test/cxx11_tensor_convolution.cpp @@ -64,8 +64,78 @@ static void test_expr() } +static void test_modes() { + Tensor input(3); + Tensor kernel(3); + input(0) = 1.0f; + input(1) = 2.0f; + input(2) = 3.0f; + kernel(0) = 0.5f; + kernel(1) = 1.0f; + kernel(2) = 0.0f; + + const Eigen::array dims{{0}}; + Eigen::array, 1> padding; + + // Emulate VALID mode (as defined in + // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html). + padding[0] = std::make_pair(0, 0); + Tensor valid(1); + valid = input.pad(padding).convolve(kernel, dims); + VERIFY_IS_EQUAL(valid.dimension(0), 1); + VERIFY_IS_APPROX(valid(0), 2.5f); + + // Emulate SAME mode (as defined in + // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html). + padding[0] = std::make_pair(1, 1); + Tensor same(3); + same = input.pad(padding).convolve(kernel, dims); + VERIFY_IS_EQUAL(same.dimension(0), 3); + VERIFY_IS_APPROX(same(0), 1.0f); + VERIFY_IS_APPROX(same(1), 2.5f); + VERIFY_IS_APPROX(same(2), 4.0f); + + // Emulate FULL mode (as defined in + // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html). + padding[0] = std::make_pair(2, 2); + Tensor full(5); + full = input.pad(padding).convolve(kernel, dims); + VERIFY_IS_EQUAL(full.dimension(0), 5); + VERIFY_IS_APPROX(full(0), 0.0f); + VERIFY_IS_APPROX(full(1), 1.0f); + VERIFY_IS_APPROX(full(2), 2.5f); + VERIFY_IS_APPROX(full(3), 4.0f); + VERIFY_IS_APPROX(full(4), 1.5f); +} + + +static void test_strides() { + Tensor input(13); + Tensor kernel(3); + input.setRandom(); + kernel.setRandom(); + + const Eigen::array dims{{0}}; + const Eigen::array stride_of_3{{3}}; + const Eigen::array stride_of_2{{2}}; + + Tensor result; + result = input.stride(stride_of_3).convolve(kernel, dims).stride(stride_of_2); + + VERIFY_IS_EQUAL(result.dimension(0), 2); + VERIFY_IS_APPROX(result(0), (input(0)*kernel(0) + input(3)*kernel(1) + + input(6)*kernel(2))); + VERIFY_IS_APPROX(result(1), (input(6)*kernel(0) + input(9)*kernel(1) + + input(12)*kernel(2))); +} + + + + void test_cxx11_tensor_convolution() { CALL_SUBTEST(test_evals()); CALL_SUBTEST(test_expr()); + CALL_SUBTEST(test_modes()); + CALL_SUBTEST(test_strides()); } diff --git a/unsupported/test/cxx11_tensor_device.cpp b/unsupported/test/cxx11_tensor_device.cpp index f331cb481..26465ee11 100644 --- a/unsupported/test/cxx11_tensor_device.cpp +++ b/unsupported/test/cxx11_tensor_device.cpp @@ -123,6 +123,14 @@ static void test_forced_contextual_eval(Context* context) context->out().device(context->device()) = (context->in1() + context->in2()).eval() * 3.14f + context->in1().constant(2.718f); } +template +static void test_compound_assignment(Context* context) +{ + context->out().device(context->device()) = context->in1().constant(2.718f); + context->out().device(context->device()) += context->in1() + context->in2() * 3.14f; +} + + template static void test_contraction(Context* context) { @@ -197,6 +205,15 @@ static void test_cpu() { } } + test_compound_assignment(&context); + for (int i = 0; i < 40; ++i) { + for (int j = 0; j < 50; ++j) { + for (int k = 0; k < 70; ++k) { + VERIFY_IS_APPROX(out(Eigen::array(i,j,k)), in1(Eigen::array(i,j,k)) + in2(Eigen::array(i,j,k)) * 3.14f + 2.718f); + } + } + } + test_contraction(&context); for (int i = 0; i < 40; ++i) { for (int j = 0; j < 40; ++j) { @@ -299,6 +316,16 @@ static void test_gpu() { } } + test_compound_assignment(&context); + assert(cudaMemcpy(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost) == cudaSuccess); + for (int i = 0; i < 40; ++i) { + for (int j = 0; j < 50; ++j) { + for (int k = 0; k < 70; ++k) { + VERIFY_IS_APPROX(out(Eigen::array(i,j,k)), in1(Eigen::array(i,j,k)) + in2(Eigen::array(i,j,k)) * 3.14f + 2.718f); + } + } + } + test_contraction(&context); assert(cudaMemcpy(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost) == cudaSuccess); for (int i = 0; i < 40; ++i) { diff --git a/unsupported/test/cxx11_tensor_morphing.cpp b/unsupported/test/cxx11_tensor_morphing.cpp index 2a6a97856..fd1b1fa32 100644 --- a/unsupported/test/cxx11_tensor_morphing.cpp +++ b/unsupported/test/cxx11_tensor_morphing.cpp @@ -12,6 +12,7 @@ #include using Eigen::Tensor; +using Eigen::IndexPair; static void test_simple_reshape() { @@ -52,7 +53,7 @@ static void test_reshape_in_expr() { TensorMap> tensor2(m2.data(), 3,5,7,11,13); Tensor::Dimensions newDims1{{2,3*5*7*11}}; Tensor::Dimensions newDims2{{3*5*7*11,13}}; - array::DimensionPair, 1> contract_along{{std::make_pair(1, 0)}}; + Eigen::array, 1> contract_along{{IndexPair(1, 0)}}; Tensor tensor3(2,13); tensor3 = tensor1.reshape(newDims1).contract(tensor2.reshape(newDims2), contract_along); @@ -125,7 +126,7 @@ static void test_slice_in_expr() { TensorMap> tensor1(m1.data(), 7, 7); TensorMap> tensor2(m2.data(), 3, 3); Tensor tensor3(3,1); - array::DimensionPair, 1> contract_along{{std::make_pair(1, 0)}}; + array, 1> contract_along{{IndexPair(1, 0)}}; Eigen::DSizes indices1(1,2); Eigen::DSizes sizes1(3,3); diff --git a/unsupported/test/cxx11_tensor_of_complex.cpp b/unsupported/test/cxx11_tensor_of_complex.cpp new file mode 100644 index 000000000..b5044b962 --- /dev/null +++ b/unsupported/test/cxx11_tensor_of_complex.cpp @@ -0,0 +1,64 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include + +using Eigen::Tensor; +using Eigen::TensorMap; + + + +static void test_additions() +{ + Tensor, 1> data1(3); + Tensor, 1> data2(3); + for (int i = 0; i < 3; ++i) { + data1(i) = std::complex(i, -i); + data2(i) = std::complex(i, 7 * i); + } + + Tensor, 1> sum = data1 + data2; + for (int i = 0; i < 3; ++i) { + VERIFY_IS_EQUAL(sum(i), std::complex(2*i, 6*i)); + } +} + + +static void test_contractions() +{ + Tensor, 4> t_left(30, 50, 8, 31); + Tensor, 5> t_right(8, 31, 7, 20, 10); + Tensor, 5> t_result(30, 50, 7, 20, 10); + + t_left.setRandom(); + t_right.setRandom(); + + typedef Map, Dynamic, Dynamic>> MapXcf; + MapXcf m_left(t_left.data(), 1500, 248); + MapXcf m_right(t_right.data(), 248, 1400); + Matrix, Dynamic, Dynamic> m_result(1500, 1400); + + // This contraction should be equivalent to a regular matrix multiplication + typedef Tensor::DimensionPair DimPair; + Eigen::array dims({{DimPair(2, 0), DimPair(3, 1)}}); + t_result = t_left.contract(t_right, dims); + m_result = m_left * m_right; + for (size_t i = 0; i < t_result.dimensions().TotalSize(); i++) { + VERIFY_IS_APPROX(t_result.data()[i], m_result.data()[i]); + } +} + + +void test_cxx11_tensor_of_complex() +{ + CALL_SUBTEST(test_additions()); + CALL_SUBTEST(test_contractions()); +} diff --git a/unsupported/test/cxx11_tensor_thread_pool.cpp b/unsupported/test/cxx11_tensor_thread_pool.cpp index e02d8e4be..f0de61f8b 100644 --- a/unsupported/test/cxx11_tensor_thread_pool.cpp +++ b/unsupported/test/cxx11_tensor_thread_pool.cpp @@ -9,22 +9,23 @@ #define EIGEN_USE_THREADS - +#include #include "main.h" #include + using Eigen::Tensor; -void test_cxx11_tensor_thread_pool() +static void test_multithread_elementwise() { - Eigen::Tensor in1(2,3,7); - Eigen::Tensor in2(2,3,7); - Eigen::Tensor out(2,3,7); + Tensor in1(2,3,7); + Tensor in2(2,3,7); + Tensor out(2,3,7); in1.setRandom(); in2.setRandom(); - Eigen::ThreadPoolDevice thread_pool_device(3); + Eigen::ThreadPoolDevice thread_pool_device(internal::random(3, 11)); out.device(thread_pool_device) = in1 + in2 * 3.14f; for (int i = 0; i < 2; ++i) { @@ -35,3 +36,222 @@ void test_cxx11_tensor_thread_pool() } } } + + +static void test_multithread_compound_assignment() +{ + Tensor in1(2,3,7); + Tensor in2(2,3,7); + Tensor out(2,3,7); + + in1.setRandom(); + in2.setRandom(); + + Eigen::ThreadPoolDevice thread_pool_device(internal::random(3, 11)); + out.device(thread_pool_device) = in1; + out.device(thread_pool_device) += in2 * 3.14f; + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) + in2(i,j,k) * 3.14f); + } + } + } +} + + +static void test_multithread_contraction() +{ + Tensor t_left(30, 50, 37, 31); + Tensor t_right(37, 31, 70, 2, 10); + Tensor t_result(30, 50, 70, 2, 10); + + t_left.setRandom(); + t_right.setRandom(); + + // this contraction should be equivalent to a single matrix multiplication + typedef Tensor::DimensionPair DimPair; + Eigen::array dims({{DimPair(2, 0), DimPair(3, 1)}}); + + + typedef Map MapXf; + MapXf m_left(t_left.data(), 1500, 1147); + MapXf m_right(t_right.data(), 1147, 1400); + MatrixXf m_result(1500, 1400); + + Eigen::ThreadPoolDevice thread_pool_device(4); + + // compute results by separate methods + t_result.device(thread_pool_device) = t_left.contract(t_right, dims); + m_result = m_left * m_right; + + for (ptrdiff_t i = 0; i < t_result.size(); i++) { + VERIFY(&t_result.data()[i] != &m_result.data()[i]); + if (fabs(t_result.data()[i] - m_result.data()[i]) >= 1e-4) { + std::cout << "mismatch detected: " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl; + assert(false); + } + } +} + + +static void test_contraction_corner_cases() +{ + Tensor t_left(32, 500); + Tensor t_right(32, 28*28); + Tensor t_result(500, 28*28); + + t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f; + t_right = (t_right.constant(-0.6f) + t_right.random()) * 2.0f; + t_result = t_result.constant(NAN); + + // this contraction should be equivalent to a single matrix multiplication + typedef Tensor::DimensionPair DimPair; + Eigen::array dims{{DimPair(0, 0)}}; + + typedef Map MapXf; + MapXf m_left(t_left.data(), 32, 500); + MapXf m_right(t_right.data(), 32, 28*28); + MatrixXf m_result(500, 28*28); + + Eigen::ThreadPoolDevice thread_pool_device(12); + + // compute results by separate methods + t_result.device(thread_pool_device) = t_left.contract(t_right, dims); + m_result = m_left.transpose() * m_right; + + for (ptrdiff_t i = 0; i < t_result.size(); i++) { + assert(!isnan(t_result.data()[i])); + if (fabs(t_result.data()[i] - m_result.data()[i]) >= 1e-4) { + std::cout << "mismatch detected at index " << i << " : " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl; + assert(false); + } + } + + t_left.resize(32, 1); + t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f; + t_result.resize (1, 28*28); + t_result = t_result.constant(NAN); + t_result.device(thread_pool_device) = t_left.contract(t_right, dims); + new(&m_left) MapXf(t_left.data(), 32, 1); + m_result = m_left.transpose() * m_right; + for (ptrdiff_t i = 0; i < t_result.size(); i++) { + assert(!isnan(t_result.data()[i])); + if (fabs(t_result.data()[i] - m_result.data()[i]) >= 1e-4) { + std::cout << "mismatch detected: " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl; + assert(false); + } + } + + t_left.resize(32, 500); + t_right.resize(32, 4); + t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f; + t_right = (t_right.constant(-0.6f) + t_right.random()) * 2.0f; + t_result.resize (500, 4); + t_result = t_result.constant(NAN); + t_result.device(thread_pool_device) = t_left.contract(t_right, dims); + new(&m_left) MapXf(t_left.data(), 32, 500); + new(&m_right) MapXf(t_right.data(), 32, 4); + m_result = m_left.transpose() * m_right; + for (ptrdiff_t i = 0; i < t_result.size(); i++) { + assert(!isnan(t_result.data()[i])); + if (fabs(t_result.data()[i] - m_result.data()[i]) >= 1e-4) { + std::cout << "mismatch detected: " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl; + assert(false); + } + } + + t_left.resize(32, 1); + t_right.resize(32, 4); + t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f; + t_right = (t_right.constant(-0.6f) + t_right.random()) * 2.0f; + t_result.resize (1, 4); + t_result = t_result.constant(NAN); + t_result.device(thread_pool_device) = t_left.contract(t_right, dims); + new(&m_left) MapXf(t_left.data(), 32, 1); + new(&m_right) MapXf(t_right.data(), 32, 4); + m_result = m_left.transpose() * m_right; + for (ptrdiff_t i = 0; i < t_result.size(); i++) { + assert(!isnan(t_result.data()[i])); + if (fabs(t_result.data()[i] - m_result.data()[i]) >= 1e-4) { + std::cout << "mismatch detected: " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl; + assert(false); + } + } +} + + +static void test_multithread_contraction_agrees_with_singlethread() { + int contract_size = internal::random(1, 5000); + + Tensor left(internal::random(1, 80), + contract_size, + internal::random(1, 100)); + + Tensor right(internal::random(1, 25), + internal::random(1, 37), + contract_size, + internal::random(1, 51)); + + left.setRandom(); + right.setRandom(); + + // add constants to shift values away from 0 for more precision + left += left.constant(1.5f); + right += right.constant(1.5f); + + typedef Tensor::DimensionPair DimPair; + Eigen::array dims({{DimPair(1, 2)}}); + + Eigen::ThreadPoolDevice thread_pool_device(internal::random(2, 11)); + + Tensor st_result; + st_result = left.contract(right, dims); + + Tensor tp_result(st_result.dimensions()); + tp_result.device(thread_pool_device) = left.contract(right, dims); + + VERIFY(internal::dimensions_match(st_result.dimensions(), tp_result.dimensions())); + for (ptrdiff_t i = 0; i < st_result.size(); i++) { + // if both of the values are very small, then do nothing (because the test will fail + // due to numerical precision issues when values are small) + if (fabs(st_result.data()[i] - tp_result.data()[i]) >= 1e-4) { + VERIFY_IS_APPROX(st_result.data()[i], tp_result.data()[i]); + } + } +} + + +static void test_memcpy() { + + for (int i = 0; i < 5; ++i) { + const int num_threads = internal::random(3, 11); + Eigen::ThreadPoolDevice thread_pool_device(num_threads); + + const int size = internal::random(13, 7632); + Tensor t1(size); + t1.setRandom(); + std::vector result(size); + thread_pool_device.memcpy(&result[0], t1.data(), size*sizeof(float)); + for (int i = 0; i < size; i++) { + VERIFY_IS_EQUAL(t1(i), result[i]); + } + } +} + + +void test_cxx11_tensor_thread_pool() +{ + CALL_SUBTEST(test_multithread_elementwise()); + CALL_SUBTEST(test_multithread_compound_assignment()); + + CALL_SUBTEST(test_multithread_contraction()); + + CALL_SUBTEST(test_multithread_contraction_agrees_with_singlethread()); + + // Exercise various cases that have been problematic in the past. + CALL_SUBTEST(test_contraction_corner_cases()); + + CALL_SUBTEST(test_memcpy()); +} -- cgit v1.2.3 From b5124e7cfda27ed99dcfcec8cb1b674efa1ef4a3 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 14 Jan 2015 15:46:04 -0800 Subject: Created many additional tests --- unsupported/test/CMakeLists.txt | 13 +- unsupported/test/cxx11_tensor_assign.cpp | 73 ++++ unsupported/test/cxx11_tensor_broadcasting.cpp | 86 ++++- unsupported/test/cxx11_tensor_chipping.cpp | 183 ++++++--- unsupported/test/cxx11_tensor_concatenation.cpp | 34 +- unsupported/test/cxx11_tensor_contract_cuda.cpp | 121 ++++++ unsupported/test/cxx11_tensor_contraction.cpp | 221 +++++++---- unsupported/test/cxx11_tensor_cuda.cpp | 474 ++++++++++++++++++++++++ unsupported/test/cxx11_tensor_device.cpp | 118 +++--- unsupported/test/cxx11_tensor_dimension.cpp | 9 +- unsupported/test/cxx11_tensor_expr.cpp | 40 ++ unsupported/test/cxx11_tensor_forced_eval.cpp | 27 ++ unsupported/test/cxx11_tensor_image_patch.cpp | 206 +++++++++- unsupported/test/cxx11_tensor_map.cpp | 7 +- unsupported/test/cxx11_tensor_morphing.cpp | 143 +++++-- unsupported/test/cxx11_tensor_of_strings.cpp | 54 +-- unsupported/test/cxx11_tensor_padding.cpp | 23 +- unsupported/test/cxx11_tensor_patch.cpp | 17 + unsupported/test/cxx11_tensor_reduction.cpp | 287 ++++++++++++-- unsupported/test/cxx11_tensor_shuffling.cpp | 28 +- unsupported/test/cxx11_tensor_simple.cpp | 3 + unsupported/test/cxx11_tensor_striding.cpp | 38 +- unsupported/test/cxx11_tensor_thread_pool.cpp | 70 ++-- 23 files changed, 1908 insertions(+), 367 deletions(-) create mode 100644 unsupported/test/cxx11_tensor_contract_cuda.cpp create mode 100644 unsupported/test/cxx11_tensor_cuda.cpp (limited to 'unsupported/test/cxx11_tensor_thread_pool.cpp') diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 89c651804..9f44e47f9 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -99,7 +99,7 @@ if(EIGEN_TEST_CXX11) # older compiler that don't support cxx11. ei_add_test(cxx11_meta "-std=c++0x") ei_add_test(cxx11_tensor_simple "-std=c++0x") - ei_add_test(cxx11_tensor_symmetry "-std=c++0x") +# ei_add_test(cxx11_tensor_symmetry "-std=c++0x") ei_add_test(cxx11_tensor_assign "-std=c++0x") ei_add_test(cxx11_tensor_dimension "-std=c++0x") ei_add_test(cxx11_tensor_index_list "-std=c++0x") @@ -126,8 +126,17 @@ if(EIGEN_TEST_CXX11) ei_add_test(cxx11_tensor_reduction "-std=c++0x") ei_add_test(cxx11_tensor_shuffling "-std=c++0x") ei_add_test(cxx11_tensor_striding "-std=c++0x") -# ei_add_test(cxx11_tensor_device "-std=c++0x") ei_add_test(cxx11_tensor_thread_pool "-std=c++0x") ei_add_test(cxx11_tensor_ref "-std=c++0x") + ei_add_test(cxx11_tensor_random "-std=c++0x") + ei_add_test(cxx11_tensor_casts "-std=c++0x") + ei_add_test(cxx11_tensor_reverse "-std=c++0x") + ei_add_test(cxx11_tensor_layout_swap "-std=c++0x") ei_add_test(cxx11_tensor_io "-std=c++0x") + + # These tests needs nvcc +# ei_add_test(cxx11_tensor_device "-std=c++0x") +# ei_add_test(cxx11_tensor_cuda "-std=c++0x") +# ei_add_test(cxx11_tensor_contract_cuda "-std=c++0x") + endif() diff --git a/unsupported/test/cxx11_tensor_assign.cpp b/unsupported/test/cxx11_tensor_assign.cpp index 0ac3f9bf9..d16aaf847 100644 --- a/unsupported/test/cxx11_tensor_assign.cpp +++ b/unsupported/test/cxx11_tensor_assign.cpp @@ -285,6 +285,78 @@ static void test_compound_assign() } } +static void test_std_initializers_tensor() { +#ifdef EIGEN_HAS_VARIADIC_TEMPLATES + Tensor a(3); + a.setValues({0, 1, 2}); + VERIFY_IS_EQUAL(a(0), 0); + VERIFY_IS_EQUAL(a(1), 1); + VERIFY_IS_EQUAL(a(2), 2); + + // It fills the top-left slice. + a.setValues({10, 20}); + VERIFY_IS_EQUAL(a(0), 10); + VERIFY_IS_EQUAL(a(1), 20); + VERIFY_IS_EQUAL(a(2), 2); + + // Chaining. + Tensor a2(3); + a2 = a.setValues({100, 200, 300}); + VERIFY_IS_EQUAL(a(0), 100); + VERIFY_IS_EQUAL(a(1), 200); + VERIFY_IS_EQUAL(a(2), 300); + VERIFY_IS_EQUAL(a2(0), 100); + VERIFY_IS_EQUAL(a2(1), 200); + VERIFY_IS_EQUAL(a2(2), 300); + + Tensor b(2, 3); + b.setValues({{0, 1, 2}, {3, 4, 5}}); + VERIFY_IS_EQUAL(b(0, 0), 0); + VERIFY_IS_EQUAL(b(0, 1), 1); + VERIFY_IS_EQUAL(b(0, 2), 2); + VERIFY_IS_EQUAL(b(1, 0), 3); + VERIFY_IS_EQUAL(b(1, 1), 4); + VERIFY_IS_EQUAL(b(1, 2), 5); + + // It fills the top-left slice. + b.setValues({{10, 20}, {30}}); + VERIFY_IS_EQUAL(b(0, 0), 10); + VERIFY_IS_EQUAL(b(0, 1), 20); + VERIFY_IS_EQUAL(b(0, 2), 2); + VERIFY_IS_EQUAL(b(1, 0), 30); + VERIFY_IS_EQUAL(b(1, 1), 4); + VERIFY_IS_EQUAL(b(1, 2), 5); + + Eigen::Tensor c(3, 2, 4); + c.setValues({{{0, 1, 2, 3}, {4, 5, 6, 7}}, + {{10, 11, 12, 13}, {14, 15, 16, 17}}, + {{20, 21, 22, 23}, {24, 25, 26, 27}}}); + VERIFY_IS_EQUAL(c(0, 0, 0), 0); + VERIFY_IS_EQUAL(c(0, 0, 1), 1); + VERIFY_IS_EQUAL(c(0, 0, 2), 2); + VERIFY_IS_EQUAL(c(0, 0, 3), 3); + VERIFY_IS_EQUAL(c(0, 1, 0), 4); + VERIFY_IS_EQUAL(c(0, 1, 1), 5); + VERIFY_IS_EQUAL(c(0, 1, 2), 6); + VERIFY_IS_EQUAL(c(0, 1, 3), 7); + VERIFY_IS_EQUAL(c(1, 0, 0), 10); + VERIFY_IS_EQUAL(c(1, 0, 1), 11); + VERIFY_IS_EQUAL(c(1, 0, 2), 12); + VERIFY_IS_EQUAL(c(1, 0, 3), 13); + VERIFY_IS_EQUAL(c(1, 1, 0), 14); + VERIFY_IS_EQUAL(c(1, 1, 1), 15); + VERIFY_IS_EQUAL(c(1, 1, 2), 16); + VERIFY_IS_EQUAL(c(1, 1, 3), 17); + VERIFY_IS_EQUAL(c(2, 0, 0), 20); + VERIFY_IS_EQUAL(c(2, 0, 1), 21); + VERIFY_IS_EQUAL(c(2, 0, 2), 22); + VERIFY_IS_EQUAL(c(2, 0, 3), 23); + VERIFY_IS_EQUAL(c(2, 1, 0), 24); + VERIFY_IS_EQUAL(c(2, 1, 1), 25); + VERIFY_IS_EQUAL(c(2, 1, 2), 26); + VERIFY_IS_EQUAL(c(2, 1, 3), 27); +#endif // EIGEN_HAS_VARIADIC_TEMPLATES +} void test_cxx11_tensor_assign() { @@ -294,4 +366,5 @@ void test_cxx11_tensor_assign() CALL_SUBTEST(test_same_type()); CALL_SUBTEST(test_auto_resize()); CALL_SUBTEST(test_compound_assign()); + CALL_SUBTEST(test_std_initializers_tensor()); } diff --git a/unsupported/test/cxx11_tensor_broadcasting.cpp b/unsupported/test/cxx11_tensor_broadcasting.cpp index 9663912a4..f0792bdcf 100644 --- a/unsupported/test/cxx11_tensor_broadcasting.cpp +++ b/unsupported/test/cxx11_tensor_broadcasting.cpp @@ -13,9 +13,10 @@ using Eigen::Tensor; +template static void test_simple_broadcasting() { - Tensor tensor(2,3,5,7); + Tensor tensor(2,3,5,7); tensor.setRandom(); array broadcasts; broadcasts[0] = 1; @@ -23,7 +24,7 @@ static void test_simple_broadcasting() broadcasts[2] = 1; broadcasts[3] = 1; - Tensor no_broadcast; + Tensor no_broadcast; no_broadcast = tensor.broadcast(broadcasts); VERIFY_IS_EQUAL(no_broadcast.dimension(0), 2); @@ -45,7 +46,7 @@ static void test_simple_broadcasting() broadcasts[1] = 3; broadcasts[2] = 1; broadcasts[3] = 4; - Tensor broadcast; + Tensor broadcast; broadcast = tensor.broadcast(broadcasts); VERIFY_IS_EQUAL(broadcast.dimension(0), 4); @@ -65,16 +66,17 @@ static void test_simple_broadcasting() } +template static void test_vectorized_broadcasting() { - Tensor tensor(8,3,5); + Tensor tensor(8,3,5); tensor.setRandom(); array broadcasts; broadcasts[0] = 2; broadcasts[1] = 3; broadcasts[2] = 4; - Tensor broadcast; + Tensor broadcast; broadcast = tensor.broadcast(broadcasts); VERIFY_IS_EQUAL(broadcast.dimension(0), 16); @@ -107,8 +109,78 @@ static void test_vectorized_broadcasting() } +template +static void test_static_broadcasting() +{ + Tensor tensor(8,3,5); + tensor.setRandom(); + Eigen::IndexList, Eigen::type2index<3>, Eigen::type2index<4>> broadcasts; + + Tensor broadcast; + broadcast = tensor.broadcast(broadcasts); + + VERIFY_IS_EQUAL(broadcast.dimension(0), 16); + VERIFY_IS_EQUAL(broadcast.dimension(1), 9); + VERIFY_IS_EQUAL(broadcast.dimension(2), 20); + + for (int i = 0; i < 16; ++i) { + for (int j = 0; j < 9; ++j) { + for (int k = 0; k < 20; ++k) { + VERIFY_IS_EQUAL(tensor(i%8,j%3,k%5), broadcast(i,j,k)); + } + } + } + + tensor.resize(11,3,5); + tensor.setRandom(); + broadcast = tensor.broadcast(broadcasts); + + VERIFY_IS_EQUAL(broadcast.dimension(0), 22); + VERIFY_IS_EQUAL(broadcast.dimension(1), 9); + VERIFY_IS_EQUAL(broadcast.dimension(2), 20); + + for (int i = 0; i < 22; ++i) { + for (int j = 0; j < 9; ++j) { + for (int k = 0; k < 20; ++k) { + VERIFY_IS_EQUAL(tensor(i%11,j%3,k%5), broadcast(i,j,k)); + } + } + } +} + + +template +static void test_fixed_size_broadcasting() +{ + // Need to add a [] operator to the Size class for this to work +#if 0 + Tensor t1(10); + t1.setRandom(); + TensorFixedSize, DataLayout> t2; + t2 = t2.constant(20.0f); + + Tensor t3 = t1 + t2.broadcast(Eigen::array{{10}}); + for (int i = 0; i < 10; ++i) { + VERIFY_IS_APPROX(t3(i), t1(i) + t2(0)); + } + + TensorMap, DataLayout> > t4(t2.data(), {{1}}); + Tensor t5 = t1 + t4.broadcast(Eigen::array{{10}}); + for (int i = 0; i < 10; ++i) { + VERIFY_IS_APPROX(t5(i), t1(i) + t2(0)); + } +#endif +} + + void test_cxx11_tensor_broadcasting() { - CALL_SUBTEST(test_simple_broadcasting()); - CALL_SUBTEST(test_vectorized_broadcasting()); + CALL_SUBTEST(test_simple_broadcasting()); + CALL_SUBTEST(test_simple_broadcasting()); + CALL_SUBTEST(test_vectorized_broadcasting()); + CALL_SUBTEST(test_vectorized_broadcasting()); + CALL_SUBTEST(test_static_broadcasting()); + CALL_SUBTEST(test_static_broadcasting()); + CALL_SUBTEST(test_fixed_size_broadcasting()); + CALL_SUBTEST(test_fixed_size_broadcasting()); } diff --git a/unsupported/test/cxx11_tensor_chipping.cpp b/unsupported/test/cxx11_tensor_chipping.cpp index 0027b2888..0de7bbac6 100644 --- a/unsupported/test/cxx11_tensor_chipping.cpp +++ b/unsupported/test/cxx11_tensor_chipping.cpp @@ -13,18 +13,20 @@ using Eigen::Tensor; - +template static void test_simple_chip() { - Tensor tensor(2,3,5,7,11); + Tensor tensor(2,3,5,7,11); tensor.setRandom(); - Tensor chip1; - chip1 = tensor.chip<0>(1); + Tensor chip1; + chip1 = tensor.template chip<0>(1); + VERIFY_IS_EQUAL(chip1.dimension(0), 3); VERIFY_IS_EQUAL(chip1.dimension(1), 5); VERIFY_IS_EQUAL(chip1.dimension(2), 7); VERIFY_IS_EQUAL(chip1.dimension(3), 11); + for (int i = 0; i < 3; ++i) { for (int j = 0; j < 5; ++j) { for (int k = 0; k < 7; ++k) { @@ -35,7 +37,7 @@ static void test_simple_chip() } } - Tensor chip2 = tensor.chip<1>(1); + Tensor chip2 = tensor.template chip<1>(1); VERIFY_IS_EQUAL(chip2.dimension(0), 2); VERIFY_IS_EQUAL(chip2.dimension(1), 5); VERIFY_IS_EQUAL(chip2.dimension(2), 7); @@ -50,7 +52,7 @@ static void test_simple_chip() } } - Tensor chip3 = tensor.chip<2>(2); + Tensor chip3 = tensor.template chip<2>(2); VERIFY_IS_EQUAL(chip3.dimension(0), 2); VERIFY_IS_EQUAL(chip3.dimension(1), 3); VERIFY_IS_EQUAL(chip3.dimension(2), 7); @@ -65,7 +67,7 @@ static void test_simple_chip() } } - Tensor chip4(tensor.chip<3>(5)); + Tensor chip4(tensor.template chip<3>(5)); VERIFY_IS_EQUAL(chip4.dimension(0), 2); VERIFY_IS_EQUAL(chip4.dimension(1), 3); VERIFY_IS_EQUAL(chip4.dimension(2), 5); @@ -80,7 +82,7 @@ static void test_simple_chip() } } - Tensor chip5(tensor.chip<4>(7)); + Tensor chip5(tensor.template chip<4>(7)); VERIFY_IS_EQUAL(chip5.dimension(0), 2); VERIFY_IS_EQUAL(chip5.dimension(1), 3); VERIFY_IS_EQUAL(chip5.dimension(2), 5); @@ -96,14 +98,97 @@ static void test_simple_chip() } } +template +static void test_dynamic_chip() +{ + Tensor tensor(2,3,5,7,11); + tensor.setRandom(); + + Tensor chip1; + chip1 = tensor.chip(1, 0); + VERIFY_IS_EQUAL(chip1.dimension(0), 3); + VERIFY_IS_EQUAL(chip1.dimension(1), 5); + VERIFY_IS_EQUAL(chip1.dimension(2), 7); + VERIFY_IS_EQUAL(chip1.dimension(3), 11); + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 5; ++j) { + for (int k = 0; k < 7; ++k) { + for (int l = 0; l < 11; ++l) { + VERIFY_IS_EQUAL(chip1(i,j,k,l), tensor(1,i,j,k,l)); + } + } + } + } + + Tensor chip2 = tensor.chip(1, 1); + VERIFY_IS_EQUAL(chip2.dimension(0), 2); + VERIFY_IS_EQUAL(chip2.dimension(1), 5); + VERIFY_IS_EQUAL(chip2.dimension(2), 7); + VERIFY_IS_EQUAL(chip2.dimension(3), 11); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + for (int l = 0; l < 11; ++l) { + VERIFY_IS_EQUAL(chip2(i,j,k,l), tensor(i,1,j,k,l)); + } + } + } + } + + Tensor chip3 = tensor.chip(2, 2); + VERIFY_IS_EQUAL(chip3.dimension(0), 2); + VERIFY_IS_EQUAL(chip3.dimension(1), 3); + VERIFY_IS_EQUAL(chip3.dimension(2), 7); + VERIFY_IS_EQUAL(chip3.dimension(3), 11); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + for (int l = 0; l < 11; ++l) { + VERIFY_IS_EQUAL(chip3(i,j,k,l), tensor(i,j,2,k,l)); + } + } + } + } + + Tensor chip4(tensor.chip(5, 3)); + VERIFY_IS_EQUAL(chip4.dimension(0), 2); + VERIFY_IS_EQUAL(chip4.dimension(1), 3); + VERIFY_IS_EQUAL(chip4.dimension(2), 5); + VERIFY_IS_EQUAL(chip4.dimension(3), 11); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + VERIFY_IS_EQUAL(chip4(i,j,k,l), tensor(i,j,k,5,l)); + } + } + } + } + + Tensor chip5(tensor.chip(7, 4)); + VERIFY_IS_EQUAL(chip5.dimension(0), 2); + VERIFY_IS_EQUAL(chip5.dimension(1), 3); + VERIFY_IS_EQUAL(chip5.dimension(2), 5); + VERIFY_IS_EQUAL(chip5.dimension(3), 7); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + VERIFY_IS_EQUAL(chip5(i,j,k,l), tensor(i,j,k,l,7)); + } + } + } + } +} +template static void test_chip_in_expr() { - Tensor input1(2,3,5,7,11); + Tensor input1(2,3,5,7,11); input1.setRandom(); - Tensor input2(3,5,7,11); + Tensor input2(3,5,7,11); input2.setRandom(); - Tensor result = input1.chip<0>(0) + input2; + Tensor result = input1.template chip<0>(0) + input2; for (int i = 0; i < 3; ++i) { for (int j = 0; j < 5; ++j) { for (int k = 0; k < 7; ++k) { @@ -115,9 +200,9 @@ static void test_chip_in_expr() { } } - Tensor input3(3,7,11); + Tensor input3(3,7,11); input3.setRandom(); - Tensor result2 = input1.chip<0>(0).chip<1>(2) + input3; + Tensor result2 = input1.template chip<0>(0).template chip<1>(2) + input3; for (int i = 0; i < 3; ++i) { for (int j = 0; j < 7; ++j) { for (int k = 0; k < 11; ++k) { @@ -128,16 +213,16 @@ static void test_chip_in_expr() { } } - +template static void test_chip_as_lvalue() { - Tensor input1(2,3,5,7,11); + Tensor input1(2,3,5,7,11); input1.setRandom(); - Tensor input2(3,5,7,11); + Tensor input2(3,5,7,11); input2.setRandom(); - Tensor tensor = input1; - tensor.chip<0>(1) = input2; + Tensor tensor = input1; + tensor.template chip<0>(1) = input2; for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 5; ++k) { @@ -154,10 +239,10 @@ static void test_chip_as_lvalue() } } - Tensor input3(2,5,7,11); + Tensor input3(2,5,7,11); input3.setRandom(); tensor = input1; - tensor.chip<1>(1) = input3; + tensor.template chip<1>(1) = input3; for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 5; ++k) { @@ -174,10 +259,10 @@ static void test_chip_as_lvalue() } } - Tensor input4(2,3,7,11); + Tensor input4(2,3,7,11); input4.setRandom(); tensor = input1; - tensor.chip<2>(3) = input4; + tensor.template chip<2>(3) = input4; for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 5; ++k) { @@ -194,10 +279,10 @@ static void test_chip_as_lvalue() } } - Tensor input5(2,3,5,11); + Tensor input5(2,3,5,11); input5.setRandom(); tensor = input1; - tensor.chip<3>(4) = input5; + tensor.template chip<3>(4) = input5; for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 5; ++k) { @@ -214,10 +299,10 @@ static void test_chip_as_lvalue() } } - Tensor input6(2,3,5,7); + Tensor input6(2,3,5,7); input6.setRandom(); tensor = input1; - tensor.chip<4>(5) = input6; + tensor.template chip<4>(5) = input6; for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 5; ++k) { @@ -235,47 +320,57 @@ static void test_chip_as_lvalue() } } - +template static void test_chip_raw_data() { - Tensor tensor(2,3,5,7,11); + Tensor tensor(2,3,5,7,11); tensor.setRandom(); - typedef TensorEvaluator(3)), DefaultDevice> Evaluator4; - auto chip = Evaluator4(tensor.chip<4>(3), DefaultDevice()); + typedef TensorEvaluator(3)), DefaultDevice> Evaluator4; + auto chip = Evaluator4(tensor.template chip<4>(3), DefaultDevice()); for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 5; ++k) { for (int l = 0; l < 7; ++l) { - int chip_index = i + 2 * (j + 3 * (k + 5 * l)); + int chip_index; + if (DataLayout == ColMajor) { + chip_index = i + 2 * (j + 3 * (k + 5 * l)); + } else { + chip_index = 11 * (l + 7 * (k + 5 * (j + 3 * i))); + } VERIFY_IS_EQUAL(chip.data()[chip_index], tensor(i,j,k,l,3)); } } } } - typedef TensorEvaluator(0)), DefaultDevice> Evaluator0; - auto chip0 = Evaluator0(tensor.chip<0>(0), DefaultDevice()); + typedef TensorEvaluator(0)), DefaultDevice> Evaluator0; + auto chip0 = Evaluator0(tensor.template chip<0>(0), DefaultDevice()); VERIFY_IS_EQUAL(chip0.data(), static_cast(0)); - typedef TensorEvaluator(0)), DefaultDevice> Evaluator1; - auto chip1 = Evaluator1(tensor.chip<1>(0), DefaultDevice()); + typedef TensorEvaluator(0)), DefaultDevice> Evaluator1; + auto chip1 = Evaluator1(tensor.template chip<1>(0), DefaultDevice()); VERIFY_IS_EQUAL(chip1.data(), static_cast(0)); - typedef TensorEvaluator(0)), DefaultDevice> Evaluator2; - auto chip2 = Evaluator2(tensor.chip<2>(0), DefaultDevice()); + typedef TensorEvaluator(0)), DefaultDevice> Evaluator2; + auto chip2 = Evaluator2(tensor.template chip<2>(0), DefaultDevice()); VERIFY_IS_EQUAL(chip2.data(), static_cast(0)); - typedef TensorEvaluator(0)), DefaultDevice> Evaluator3; - auto chip3 = Evaluator3(tensor.chip<3>(0), DefaultDevice()); + typedef TensorEvaluator(0)), DefaultDevice> Evaluator3; + auto chip3 = Evaluator3(tensor.template chip<3>(0), DefaultDevice()); VERIFY_IS_EQUAL(chip3.data(), static_cast(0)); } - void test_cxx11_tensor_chipping() { - CALL_SUBTEST(test_simple_chip()); - CALL_SUBTEST(test_chip_in_expr()); - CALL_SUBTEST(test_chip_as_lvalue()); - CALL_SUBTEST(test_chip_raw_data()); + CALL_SUBTEST(test_simple_chip()); + CALL_SUBTEST(test_simple_chip()); + CALL_SUBTEST(test_dynamic_chip()); + CALL_SUBTEST(test_dynamic_chip()); + CALL_SUBTEST(test_chip_in_expr()); + CALL_SUBTEST(test_chip_in_expr()); + CALL_SUBTEST(test_chip_as_lvalue()); + CALL_SUBTEST(test_chip_as_lvalue()); + CALL_SUBTEST(test_chip_raw_data()); + CALL_SUBTEST(test_chip_raw_data()); } diff --git a/unsupported/test/cxx11_tensor_concatenation.cpp b/unsupported/test/cxx11_tensor_concatenation.cpp index 8fd4f5f80..9fdf33c16 100644 --- a/unsupported/test/cxx11_tensor_concatenation.cpp +++ b/unsupported/test/cxx11_tensor_concatenation.cpp @@ -13,15 +13,16 @@ using Eigen::Tensor; +template static void test_dimension_failures() { - Tensor left(2, 3, 1); - Tensor right(3, 3, 1); + Tensor left(2, 3, 1); + Tensor right(3, 3, 1); left.setRandom(); right.setRandom(); // Okay; other dimensions are equal. - Tensor concatenation = left.concatenate(right, 0); + Tensor concatenation = left.concatenate(right, 0); // Dimension mismatches. VERIFY_RAISES_ASSERT(concatenation = left.concatenate(right, 1)); @@ -32,33 +33,35 @@ static void test_dimension_failures() VERIFY_RAISES_ASSERT(concatenation = left.concatenate(right, -1)); } +template static void test_static_dimension_failure() { - Tensor left(2, 3); - Tensor right(2, 3, 1); + Tensor left(2, 3); + Tensor right(2, 3, 1); #ifdef CXX11_TENSOR_CONCATENATION_STATIC_DIMENSION_FAILURE // Technically compatible, but we static assert that the inputs have same // NumDims. - Tensor concatenation = left.concatenate(right, 0); + Tensor concatenation = left.concatenate(right, 0); #endif // This can be worked around in this case. - Tensor concatenation = left + Tensor concatenation = left .reshape(Tensor::Dimensions{{2, 3, 1}}) .concatenate(right, 0); - Tensor alternative = left + Tensor alternative = left .concatenate(right.reshape(Tensor::Dimensions{{2, 3}}), 0); } +template static void test_simple_concatenation() { - Tensor left(2, 3, 1); - Tensor right(2, 3, 1); + Tensor left(2, 3, 1); + Tensor right(2, 3, 1); left.setRandom(); right.setRandom(); - Tensor concatenation = left.concatenate(right, 0); + Tensor concatenation = left.concatenate(right, 0); VERIFY_IS_EQUAL(concatenation.dimension(0), 4); VERIFY_IS_EQUAL(concatenation.dimension(1), 3); VERIFY_IS_EQUAL(concatenation.dimension(2), 1); @@ -103,8 +106,11 @@ static void test_simple_concatenation() void test_cxx11_tensor_concatenation() { - CALL_SUBTEST(test_dimension_failures()); - CALL_SUBTEST(test_static_dimension_failure()); - CALL_SUBTEST(test_simple_concatenation()); + CALL_SUBTEST(test_dimension_failures()); + CALL_SUBTEST(test_dimension_failures()); + CALL_SUBTEST(test_static_dimension_failure()); + CALL_SUBTEST(test_static_dimension_failure()); + CALL_SUBTEST(test_simple_concatenation()); + CALL_SUBTEST(test_simple_concatenation()); // CALL_SUBTEST(test_vectorized_concatenation()); } diff --git a/unsupported/test/cxx11_tensor_contract_cuda.cpp b/unsupported/test/cxx11_tensor_contract_cuda.cpp new file mode 100644 index 000000000..9599607c6 --- /dev/null +++ b/unsupported/test/cxx11_tensor_contract_cuda.cpp @@ -0,0 +1,121 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// Copyright (C) 2014 Navdeep Jaitly +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_cuda +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int +#define EIGEN_USE_GPU + + +#include "main.h" +#include + +using Eigen::Tensor; +typedef Tensor::DimensionPair DimPair; + +template +static void test_cuda_contraction(int m_size, int k_size, int n_size) +{ + cout<<"Calling with ("< t_left(Eigen::array(m_size, k_size)); + Tensor t_right(Eigen::array(k_size, n_size)); + Tensor t_result(Eigen::array(m_size, n_size)); + Tensor t_result_gpu(Eigen::array(m_size, n_size)); + Eigen::array dims(DimPair(1, 0)); + + t_left.setRandom(); + t_right.setRandom(); + + std::size_t t_left_bytes = t_left.size() * sizeof(float); + std::size_t t_right_bytes = t_right.size() * sizeof(float); + std::size_t t_result_bytes = t_result.size() * sizeof(float); + + float* d_t_left; + float* d_t_right; + float* d_t_result; + + cudaMalloc((void**)(&d_t_left), t_left_bytes); + cudaMalloc((void**)(&d_t_right), t_right_bytes); + cudaMalloc((void**)(&d_t_result), t_result_bytes); + + cudaMemcpy(d_t_left, t_left.data(), t_left_bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_t_right, t_right.data(), t_right_bytes, cudaMemcpyHostToDevice); + + cudaStream_t stream; + assert(cudaStreamCreate(&stream) == cudaSuccess); + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap > + gpu_t_left(d_t_left, Eigen::array(m_size, k_size)); + Eigen::TensorMap > + gpu_t_right(d_t_right, Eigen::array(k_size, n_size)); + Eigen::TensorMap > + gpu_t_result(d_t_result, Eigen::array(m_size, n_size)); + + + gpu_t_result.device(gpu_device) = gpu_t_left.contract(gpu_t_right, dims); + t_result = t_left.contract(t_right, dims); + + cudaMemcpy(t_result_gpu.data(), d_t_result, t_result_bytes, cudaMemcpyDeviceToHost); + for (size_t i = 0; i < t_result.dimensions().TotalSize(); i++) { + if (fabs(t_result.data()[i] - t_result_gpu.data()[i]) >= 1e-4) { + cout << "mismatch detected at index " << i << ": " << t_result.data()[i] + << " vs " << t_result_gpu.data()[i] << endl; + assert(false); + } + } + + cudaFree((void*)d_t_left); + cudaFree((void*)d_t_right); + cudaFree((void*)d_t_result); +} + + +void test_cxx11_tensor_cuda() +{ + cout<<"Calling contraction tests"<(128, 128, 128)); + CALL_SUBTEST(test_cuda_contraction(128, 128, 128)); + for (int k = 32; k < 256; k++) { + CALL_SUBTEST(test_cuda_contraction(128, k, 128)); + CALL_SUBTEST(test_cuda_contraction(128, k, 128)); + } + for (int k = 32; k < 256; k++) { + CALL_SUBTEST(test_cuda_contraction(128, 128, k)); + CALL_SUBTEST(test_cuda_contraction(128, 128, k)); + } + for (int k = 32; k < 256; k++) { + CALL_SUBTEST(test_cuda_contraction(k, 128, 128)); + CALL_SUBTEST(test_cuda_contraction(k, 128, 128)); + } + + int m_sizes[] = {31, 39, 63, 64, 65, + 127, 129, 255, 257, 511, + 512, 513, 1023, 1024, 1025 }; + int n_sizes[] = {31, 39, 63, 64, 65, + 127, 129, 255, 257, 511, + 512, 513, 1023, 1024, 1025 }; + + int k_sizes[] = { 31, 39, 63, 64, 65, + 95, 96, 127, 129, 255, + 257, 511, 512, 513, 1023, + 1024, 1025}; + + for (int i = 0; i <15; i++) + for (int j = 0; j < 15; j++) + for (int k = 0; k < 17; k++) { + CALL_SUBTEST(test_cuda_contraction(m_sizes[i], n_sizes[j], k_sizes[k])); + CALL_SUBTEST(test_cuda_contraction(m_sizes[i], n_sizes[j], k_sizes[k])); + } +} diff --git a/unsupported/test/cxx11_tensor_contraction.cpp b/unsupported/test/cxx11_tensor_contraction.cpp index 17bd335f7..6124818fd 100644 --- a/unsupported/test/cxx11_tensor_contraction.cpp +++ b/unsupported/test/cxx11_tensor_contraction.cpp @@ -16,18 +16,18 @@ using Eigen::Tensor; typedef Tensor::DimensionPair DimPair; - +template static void test_evals() { - Tensor mat1(2, 3); - Tensor mat2(2, 3); - Tensor mat3(3, 2); + Tensor mat1(2, 3); + Tensor mat2(2, 3); + Tensor mat3(3, 2); mat1.setRandom(); mat2.setRandom(); mat3.setRandom(); - Tensor mat4(3,3); + Tensor mat4(3,3); mat4.setZero(); Eigen::array dims3({{DimPair(0, 0)}}); typedef TensorEvaluator Evaluator; @@ -47,7 +47,7 @@ static void test_evals() VERIFY_IS_APPROX(mat4(2,1), mat1(0,2)*mat2(0,1) + mat1(1,2)*mat2(1,1)); VERIFY_IS_APPROX(mat4(2,2), mat1(0,2)*mat2(0,2) + mat1(1,2)*mat2(1,2)); - Tensor mat5(2,2); + Tensor mat5(2,2); mat5.setZero(); Eigen::array dims4({{DimPair(1, 1)}}); typedef TensorEvaluator Evaluator2; @@ -62,7 +62,7 @@ static void test_evals() VERIFY_IS_APPROX(mat5(1,0), mat1(1,0)*mat2(0,0) + mat1(1,1)*mat2(0,1) + mat1(1,2)*mat2(0,2)); VERIFY_IS_APPROX(mat5(1,1), mat1(1,0)*mat2(1,0) + mat1(1,1)*mat2(1,1) + mat1(1,2)*mat2(1,2)); - Tensor mat6(2,2); + Tensor mat6(2,2); mat6.setZero(); Eigen::array dims6({{DimPair(1, 0)}}); typedef TensorEvaluator Evaluator3; @@ -78,16 +78,16 @@ static void test_evals() VERIFY_IS_APPROX(mat6(1,1), mat1(1,0)*mat3(0,1) + mat1(1,1)*mat3(1,1) + mat1(1,2)*mat3(2,1)); } - +template static void test_scalar() { - Tensor vec1({6}); - Tensor vec2({6}); + Tensor vec1({6}); + Tensor vec2({6}); vec1.setRandom(); vec2.setRandom(); - Tensor scalar(1); + Tensor scalar(1); scalar.setZero(); Eigen::array dims({{DimPair(0, 0)}}); typedef TensorEvaluator Evaluator; @@ -102,16 +102,16 @@ static void test_scalar() VERIFY_IS_APPROX(scalar(0), expected); } - +template static void test_multidims() { - Tensor mat1(2, 2, 2); - Tensor mat2(2, 2, 2, 2); + Tensor mat1(2, 2, 2); + Tensor mat2(2, 2, 2, 2); mat1.setRandom(); mat2.setRandom(); - Tensor mat3(2, 2, 2); + Tensor mat3(2, 2, 2); mat3.setZero(); Eigen::array dims({{DimPair(1, 2), DimPair(2, 3)}}); typedef TensorEvaluator Evaluator; @@ -140,15 +140,15 @@ static void test_multidims() mat1(1,0,1)*mat2(1,1,0,1) + mat1(1,1,1)*mat2(1,1,1,1)); } - +template static void test_holes() { - Tensor t1(2, 5, 7, 3); - Tensor t2(2, 7, 11, 13, 3); + Tensor t1(2, 5, 7, 3); + Tensor t2(2, 7, 11, 13, 3); t1.setRandom(); t2.setRandom(); Eigen::array dims({{DimPair(0, 0), DimPair(3, 4)}}); - Tensor result = t1.contract(t2, dims); + Tensor result = t1.contract(t2, dims); VERIFY_IS_EQUAL(result.dimension(0), 5); VERIFY_IS_EQUAL(result.dimension(1), 7); VERIFY_IS_EQUAL(result.dimension(2), 7); @@ -174,16 +174,16 @@ static void test_holes() { } } - +template static void test_full_redux() { - Tensor t1(2, 2); - Tensor t2(2, 2, 2); + Tensor t1(2, 2); + Tensor t2(2, 2, 2); t1.setRandom(); t2.setRandom(); Eigen::array dims({{DimPair(0, 0), DimPair(1, 1)}}); - Tensor result = t1.contract(t2, dims); + Tensor result = t1.contract(t2, dims); VERIFY_IS_EQUAL(result.dimension(0), 2); VERIFY_IS_APPROX(result(0), t1(0, 0) * t2(0, 0, 0) + t1(1, 0) * t2(1, 0, 0) + t1(0, 1) * t2(0, 1, 0) + t1(1, 1) * t2(1, 1, 0)); @@ -200,13 +200,13 @@ static void test_full_redux() + t1(0, 1) * t2(1, 0, 1) + t1(1, 1) * t2(1, 1, 1)); } - +template static void test_contraction_of_contraction() { - Tensor t1(2, 2); - Tensor t2(2, 2); - Tensor t3(2, 2); - Tensor t4(2, 2); + Tensor t1(2, 2); + Tensor t2(2, 2); + Tensor t3(2, 2); + Tensor t4(2, 2); t1.setRandom(); t2.setRandom(); t3.setRandom(); @@ -216,30 +216,32 @@ static void test_contraction_of_contraction() auto contract1 = t1.contract(t2, dims); auto diff = t3 - contract1; auto contract2 = t1.contract(t4, dims); - Tensor result = contract2.contract(diff, dims); + Tensor result = contract2.contract(diff, dims); + VERIFY_IS_EQUAL(result.dimension(0), 2); VERIFY_IS_EQUAL(result.dimension(1), 2); - Eigen::Map m1(t1.data(), 2, 2); - Eigen::Map m2(t2.data(), 2, 2); - Eigen::Map m3(t3.data(), 2, 2); - Eigen::Map m4(t4.data(), 2, 2); - Eigen::MatrixXf expected = (m1 * m4) * (m3 - m1 * m2); + Eigen::Map> + m1(t1.data(), 2, 2), m2(t2.data(), 2, 2), m3(t3.data(), 2, 2), + m4(t4.data(), 2, 2); + Eigen::Matrix + expected = (m1 * m4) * (m3 - m1 * m2); + VERIFY_IS_APPROX(result(0, 0), expected(0, 0)); VERIFY_IS_APPROX(result(0, 1), expected(0, 1)); VERIFY_IS_APPROX(result(1, 0), expected(1, 0)); VERIFY_IS_APPROX(result(1, 1), expected(1, 1)); } - +template static void test_expr() { - Tensor mat1(2, 3); - Tensor mat2(3, 2); + Tensor mat1(2, 3); + Tensor mat2(3, 2); mat1.setRandom(); mat2.setRandom(); - Tensor mat3(2,2); + Tensor mat3(2,2); Eigen::array dims({{DimPair(1, 0)}}); mat3 = mat1.contract(mat2, dims); @@ -250,16 +252,16 @@ static void test_expr() VERIFY_IS_APPROX(mat3(1,1), mat1(1,0)*mat2(0,1) + mat1(1,1)*mat2(1,1) + mat1(1,2)*mat2(2,1)); } - +template static void test_out_of_order_contraction() { - Tensor mat1(2, 2, 2); - Tensor mat2(2, 2, 2); + Tensor mat1(2, 2, 2); + Tensor mat2(2, 2, 2); mat1.setRandom(); mat2.setRandom(); - Tensor mat3(2, 2); + Tensor mat3(2, 2); Eigen::array dims({{DimPair(2, 0), DimPair(0, 2)}}); mat3 = mat1.contract(mat2, dims); @@ -295,18 +297,18 @@ static void test_out_of_order_contraction() } - +template static void test_consistency() { // this does something like testing (A*B)^T = (B^T * A^T) - Tensor mat1(4, 3, 5); - Tensor mat2(3, 2, 1, 5, 4); + Tensor mat1(4, 3, 5); + Tensor mat2(3, 2, 1, 5, 4); mat1.setRandom(); mat2.setRandom(); - Tensor mat3(5, 2, 1, 5); - Tensor mat4(2, 1, 5, 5); + Tensor mat3(5, 2, 1, 5); + Tensor mat4(2, 1, 5, 5); // contract on dimensions of size 4 and 3 Eigen::array dims1({{DimPair(0, 4), DimPair(1, 0)}}); @@ -316,27 +318,40 @@ static void test_consistency() mat4 = mat2.contract(mat1, dims2); // check that these are equal except for ordering of dimensions - for (size_t i = 0; i < 5; i++) { - for (size_t j = 0; j < 10; j++) { - VERIFY_IS_APPROX(mat3.data()[i + 5 * j], mat4.data()[j + 10 * i]); + if (DataLayout == ColMajor) { + for (size_t i = 0; i < 5; i++) { + for (size_t j = 0; j < 10; j++) { + VERIFY_IS_APPROX(mat3.data()[i + 5 * j], mat4.data()[j + 10 * i]); + } + } + } else { + // Row major + for (size_t i = 0; i < 5; i++) { + for (size_t j = 0; j < 10; j++) { + VERIFY_IS_APPROX(mat3.data()[10 * i + j], mat4.data()[i + 5 * j]); + } } } } - +template static void test_large_contraction() { - Tensor t_left(30, 50, 8, 31); - Tensor t_right(8, 31, 7, 20, 10); - Tensor t_result(30, 50, 7, 20, 10); + Tensor t_left(30, 50, 8, 31); + Tensor t_right(8, 31, 7, 20, 10); + Tensor t_result(30, 50, 7, 20, 10); t_left.setRandom(); t_right.setRandom(); - typedef Map MapXf; + // Add a little offset so that the results won't be close to zero. + t_left += t_left.constant(1.0f); + t_right += t_right.constant(1.0f); + + typedef Map> MapXf; MapXf m_left(t_left.data(), 1500, 248); MapXf m_right(t_right.data(), 248, 1400); - MatrixXf m_result(1500, 1400); + Eigen::Matrix m_result(1500, 1400); // this contraction should be equivalent to a single matrix multiplication Eigen::array dims({{DimPair(2, 0), DimPair(3, 1)}}); @@ -351,20 +366,20 @@ static void test_large_contraction() } } - +template static void test_matrix_vector() { - Tensor t_left(30, 50); - Tensor t_right(50); - Tensor t_result(30); + Tensor t_left(30, 50); + Tensor t_right(50); + Tensor t_result(30); t_left.setRandom(); t_right.setRandom(); - typedef Map> MapXf; + typedef Map> MapXf; MapXf m_left(t_left.data(), 30, 50); MapXf m_right(t_right.data(), 50, 1); - Eigen::Matrix m_result(30, 1); + Eigen::Matrix m_result(30, 1); // this contraction should be equivalent to a single matrix multiplication Eigen::array dims{{DimPair(1, 0)}}; @@ -379,18 +394,19 @@ static void test_matrix_vector() } +template static void test_tensor_vector() { - Tensor t_left(7, 13, 17); - Tensor t_right(1, 7); - typedef typename Tensor::DimensionPair DimensionPair; + Tensor t_left(7, 13, 17); + Tensor t_right(1, 7); + typedef typename Tensor::DimensionPair DimensionPair; Eigen::array dim_pair01{{{0, 1}}}; - Tensor t_result = t_left.contract(t_right, dim_pair01); + Tensor t_result = t_left.contract(t_right, dim_pair01); - typedef Map> MapXf; + typedef Map> MapXf; MapXf m_left(t_left.data(), 7, 13*17); MapXf m_right(t_right.data(), 1, 7); - Eigen::Matrix m_result = m_left.transpose() * m_right.transpose(); + Eigen::Matrix m_result = m_left.transpose() * m_right.transpose(); for (size_t i = 0; i < t_result.dimensions().TotalSize(); i++) { VERIFY_IS_APPROX(t_result(i), m_result(i, 0)); @@ -398,18 +414,63 @@ static void test_tensor_vector() } +template +static void test_small_blocking_factors() +{ + Tensor t_left(30, 5, 3, 31); + Tensor t_right(3, 31, 7, 20, 1); + t_left.setRandom(); + t_right.setRandom(); + + // Add a little offset so that the results won't be close to zero. + t_left += t_left.constant(1.0f); + t_right += t_right.constant(1.0f); + + // Force the cache sizes, which results in smaller blocking factors. + Eigen::setCpuCacheSizes(896, 1920, 2944); + + // this contraction should be equivalent to a single matrix multiplication + Eigen::array dims({{DimPair(2, 0), DimPair(3, 1)}}); + Tensor t_result; + t_result = t_left.contract(t_right, dims); + + // compute result using a simple eigen matrix product + Map> m_left(t_left.data(), 150, 93); + Map> m_right(t_right.data(), 93, 140); + Eigen::Matrix m_result = m_left * m_right; + + for (size_t i = 0; i < t_result.dimensions().TotalSize(); i++) { + VERIFY_IS_APPROX(t_result.data()[i], m_result.data()[i]); + } +} + + void test_cxx11_tensor_contraction() { - CALL_SUBTEST(test_evals()); - CALL_SUBTEST(test_scalar()); - CALL_SUBTEST(test_multidims()); - CALL_SUBTEST(test_holes()); - CALL_SUBTEST(test_full_redux()); - CALL_SUBTEST(test_contraction_of_contraction()); - CALL_SUBTEST(test_expr()); - CALL_SUBTEST(test_out_of_order_contraction()); - CALL_SUBTEST(test_consistency()); - CALL_SUBTEST(test_large_contraction()); - CALL_SUBTEST(test_matrix_vector()); - CALL_SUBTEST(test_tensor_vector()); + CALL_SUBTEST(test_evals()); + CALL_SUBTEST(test_evals()); + CALL_SUBTEST(test_scalar()); + CALL_SUBTEST(test_scalar()); + CALL_SUBTEST(test_multidims()); + CALL_SUBTEST(test_multidims()); + CALL_SUBTEST(test_holes()); + CALL_SUBTEST(test_holes()); + CALL_SUBTEST(test_full_redux()); + CALL_SUBTEST(test_full_redux()); + CALL_SUBTEST(test_contraction_of_contraction()); + CALL_SUBTEST(test_contraction_of_contraction()); + CALL_SUBTEST(test_expr()); + CALL_SUBTEST(test_expr()); + CALL_SUBTEST(test_out_of_order_contraction()); + CALL_SUBTEST(test_out_of_order_contraction()); + CALL_SUBTEST(test_consistency()); + CALL_SUBTEST(test_consistency()); + CALL_SUBTEST(test_large_contraction()); + CALL_SUBTEST(test_large_contraction()); + CALL_SUBTEST(test_matrix_vector()); + CALL_SUBTEST(test_matrix_vector()); + CALL_SUBTEST(test_tensor_vector()); + CALL_SUBTEST(test_tensor_vector()); + CALL_SUBTEST(test_small_blocking_factors()); + CALL_SUBTEST(test_small_blocking_factors()); } diff --git a/unsupported/test/cxx11_tensor_cuda.cpp b/unsupported/test/cxx11_tensor_cuda.cpp new file mode 100644 index 000000000..059d23de1 --- /dev/null +++ b/unsupported/test/cxx11_tensor_cuda.cpp @@ -0,0 +1,474 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +// TODO(mdevin): Free the cuda memory. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_cuda +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int +#define EIGEN_USE_GPU + + +#include "main.h" +#include + +using Eigen::Tensor; + +void test_cuda_elementwise_small() { + Tensor in1(Eigen::array(2)); + Tensor in2(Eigen::array(2)); + Tensor out(Eigen::array(2)); + in1.setRandom(); + in2.setRandom(); + + std::size_t in1_bytes = in1.size() * sizeof(float); + std::size_t in2_bytes = in2.size() * sizeof(float); + std::size_t out_bytes = out.size() * sizeof(float); + + float* d_in1; + float* d_in2; + float* d_out; + cudaMalloc((void**)(&d_in1), in1_bytes); + cudaMalloc((void**)(&d_in2), in2_bytes); + cudaMalloc((void**)(&d_out), out_bytes); + + cudaMemcpy(d_in1, in1.data(), in1_bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_in2, in2.data(), in2_bytes, cudaMemcpyHostToDevice); + + cudaStream_t stream; + assert(cudaStreamCreate(&stream) == cudaSuccess); + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap, Eigen::Aligned> gpu_in1( + d_in1, Eigen::array(2)); + Eigen::TensorMap, Eigen::Aligned> gpu_in2( + d_in2, Eigen::array(2)); + Eigen::TensorMap, Eigen::Aligned> gpu_out( + d_out, Eigen::array(2)); + + gpu_out.device(gpu_device) = gpu_in1 + gpu_in2; + + assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, + gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 2; ++i) { + VERIFY_IS_APPROX( + out(Eigen::array(i)), + in1(Eigen::array(i)) + in2(Eigen::array(i))); + } +} + +void test_cuda_elementwise() +{ + Tensor in1(Eigen::array(72,53,97)); + Tensor in2(Eigen::array(72,53,97)); + Tensor in3(Eigen::array(72,53,97)); + Tensor out(Eigen::array(72,53,97)); + in1.setRandom(); + in2.setRandom(); + in3.setRandom(); + + std::size_t in1_bytes = in1.size() * sizeof(float); + std::size_t in2_bytes = in2.size() * sizeof(float); + std::size_t in3_bytes = in3.size() * sizeof(float); + std::size_t out_bytes = out.size() * sizeof(float); + + float* d_in1; + float* d_in2; + float* d_in3; + float* d_out; + cudaMalloc((void**)(&d_in1), in1_bytes); + cudaMalloc((void**)(&d_in2), in2_bytes); + cudaMalloc((void**)(&d_in3), in3_bytes); + cudaMalloc((void**)(&d_out), out_bytes); + + cudaMemcpy(d_in1, in1.data(), in1_bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_in2, in2.data(), in2_bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_in3, in3.data(), in3_bytes, cudaMemcpyHostToDevice); + + cudaStream_t stream; + assert(cudaStreamCreate(&stream) == cudaSuccess); + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap > gpu_in1(d_in1, Eigen::array(72,53,97)); + Eigen::TensorMap > gpu_in2(d_in2, Eigen::array(72,53,97)); + Eigen::TensorMap > gpu_in3(d_in3, Eigen::array(72,53,97)); + Eigen::TensorMap > gpu_out(d_out, Eigen::array(72,53,97)); + + gpu_out.device(gpu_device) = gpu_in1 + gpu_in2 * gpu_in3; + + assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 72; ++i) { + for (int j = 0; j < 53; ++j) { + for (int k = 0; k < 97; ++k) { + VERIFY_IS_APPROX(out(Eigen::array(i,j,k)), in1(Eigen::array(i,j,k)) + in2(Eigen::array(i,j,k)) * in3(Eigen::array(i,j,k))); + } + } + } +} + + +void test_cuda_reduction() +{ + Tensor in1(Eigen::array(72,53,97,113)); + Tensor out(Eigen::array(72,97)); + in1.setRandom(); + + std::size_t in1_bytes = in1.size() * sizeof(float); + std::size_t out_bytes = out.size() * sizeof(float); + + float* d_in1; + float* d_out; + cudaMalloc((void**)(&d_in1), in1_bytes); + cudaMalloc((void**)(&d_out), out_bytes); + + cudaMemcpy(d_in1, in1.data(), in1_bytes, cudaMemcpyHostToDevice); + + cudaStream_t stream; + assert(cudaStreamCreate(&stream) == cudaSuccess); + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap > gpu_in1(d_in1, Eigen::array(72,53,97,113)); + Eigen::TensorMap > gpu_out(d_out, Eigen::array(72,97)); + + array reduction_axis; + reduction_axis[0] = 1; + reduction_axis[1] = 3; + + gpu_out.device(gpu_device) = gpu_in1.maximum(reduction_axis); + + assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 72; ++i) { + for (int j = 0; j < 97; ++j) { + float expected = 0; + for (int k = 0; k < 53; ++k) { + for (int l = 0; l < 113; ++l) { + expected = + std::max(expected, in1(Eigen::array(i, k, j, l))); + } + } + VERIFY_IS_APPROX(out(Eigen::array(i,j)), expected); + } + } +} + +template +static void test_cuda_contraction() +{ + // with these dimensions, the output has 300 * 140 elements, which is + // more than 30 * 1024, which is the number of threads in blocks on + // a 15 SM GK110 GPU + Tensor t_left(Eigen::array(6, 50, 3, 31)); + Tensor t_right(Eigen::array(3, 31, 7, 20, 1)); + Tensor t_result(Eigen::array(6, 50, 7, 20, 1)); + + t_left.setRandom(); + t_right.setRandom(); + + std::size_t t_left_bytes = t_left.size() * sizeof(float); + std::size_t t_right_bytes = t_right.size() * sizeof(float); + std::size_t t_result_bytes = t_result.size() * sizeof(float); + + float* d_t_left; + float* d_t_right; + float* d_t_result; + + cudaMalloc((void**)(&d_t_left), t_left_bytes); + cudaMalloc((void**)(&d_t_right), t_right_bytes); + cudaMalloc((void**)(&d_t_result), t_result_bytes); + + cudaMemcpy(d_t_left, t_left.data(), t_left_bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_t_right, t_right.data(), t_right_bytes, cudaMemcpyHostToDevice); + + cudaStream_t stream; + assert(cudaStreamCreate(&stream) == cudaSuccess); + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap > + gpu_t_left(d_t_left, Eigen::array(6, 50, 3, 31)); + Eigen::TensorMap > + gpu_t_right(d_t_right, Eigen::array(3, 31, 7, 20, 1)); + Eigen::TensorMap > + gpu_t_result(d_t_result, Eigen::array(6, 50, 7, 20, 1)); + + typedef Eigen::Map > MapXf; + MapXf m_left(t_left.data(), 300, 93); + MapXf m_right(t_right.data(), 93, 140); + Eigen::Matrix m_result(300, 140); + + typedef Tensor::DimensionPair DimPair; + Eigen::array dims; + dims[0] = DimPair(2, 0); + dims[1] = DimPair(3, 1); + + m_result = m_left * m_right; + gpu_t_result.device(gpu_device) = gpu_t_left.contract(gpu_t_right, dims); + + cudaMemcpy(t_result.data(), d_t_result, t_result_bytes, cudaMemcpyDeviceToHost); + + for (size_t i = 0; i < t_result.dimensions().TotalSize(); i++) { + if (fabs(t_result.data()[i] - m_result.data()[i]) >= 1e-4) { + cout << "mismatch detected at index " << i << ": " << t_result.data()[i] << " vs " << m_result.data()[i] << endl; + assert(false); + } + } +} + +static void test_cuda_convolution_1d() +{ + Tensor input(Eigen::array(74,37,11,137)); + Tensor kernel(Eigen::array(4)); + Tensor out(Eigen::array(74,34,11,137)); + input = input.constant(10.0f) + input.random(); + kernel = kernel.constant(7.0f) + kernel.random(); + + std::size_t input_bytes = input.size() * sizeof(float); + std::size_t kernel_bytes = kernel.size() * sizeof(float); + std::size_t out_bytes = out.size() * sizeof(float); + + float* d_input; + float* d_kernel; + float* d_out; + cudaMalloc((void**)(&d_input), input_bytes); + cudaMalloc((void**)(&d_kernel), kernel_bytes); + cudaMalloc((void**)(&d_out), out_bytes); + + cudaMemcpy(d_input, input.data(), input_bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_kernel, kernel.data(), kernel_bytes, cudaMemcpyHostToDevice); + + cudaStream_t stream; + assert(cudaStreamCreate(&stream) == cudaSuccess); + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap > gpu_input(d_input, Eigen::array(74,37,11,137)); + Eigen::TensorMap > gpu_kernel(d_kernel, Eigen::array(4)); + Eigen::TensorMap > gpu_out(d_out, Eigen::array(74,34,11,137)); + + Eigen::array dims(1); + gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims); + + assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 74; ++i) { + for (int j = 0; j < 34; ++j) { + for (int k = 0; k < 11; ++k) { + for (int l = 0; l < 137; ++l) { + const float result = out(Eigen::array(i,j,k,l)); + const float expected = input(Eigen::array(i,j+0,k,l)) * kernel(Eigen::array(0)) + + input(Eigen::array(i,j+1,k,l)) * kernel(Eigen::array(1)) + + input(Eigen::array(i,j+2,k,l)) * kernel(Eigen::array(2)) + + input(Eigen::array(i,j+3,k,l)) * kernel(Eigen::array(3)); + VERIFY_IS_APPROX(result, expected); + } + } + } + } +} + + +static void test_cuda_convolution_2d() +{ + Tensor input(Eigen::array(74,37,11,137)); + Tensor kernel(Eigen::array(3,4)); + Tensor out(Eigen::array(74,35,8,137)); + input = input.constant(10.0f) + input.random(); + kernel = kernel.constant(7.0f) + kernel.random(); + + std::size_t input_bytes = input.size() * sizeof(float); + std::size_t kernel_bytes = kernel.size() * sizeof(float); + std::size_t out_bytes = out.size() * sizeof(float); + + float* d_input; + float* d_kernel; + float* d_out; + cudaMalloc((void**)(&d_input), input_bytes); + cudaMalloc((void**)(&d_kernel), kernel_bytes); + cudaMalloc((void**)(&d_out), out_bytes); + + cudaMemcpy(d_input, input.data(), input_bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_kernel, kernel.data(), kernel_bytes, cudaMemcpyHostToDevice); + + cudaStream_t stream; + assert(cudaStreamCreate(&stream) == cudaSuccess); + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap > gpu_input(d_input, Eigen::array(74,37,11,137)); + Eigen::TensorMap > gpu_kernel(d_kernel, Eigen::array(3,4)); + Eigen::TensorMap > gpu_out(d_out, Eigen::array(74,35,8,137)); + + Eigen::array dims(1,2); + gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims); + + assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 74; ++i) { + for (int j = 0; j < 35; ++j) { + for (int k = 0; k < 8; ++k) { + for (int l = 0; l < 137; ++l) { + const float result = out(Eigen::array(i,j,k,l)); + const float expected = input(Eigen::array(i,j+0,k+0,l)) * kernel(Eigen::array(0,0)) + + input(Eigen::array(i,j+1,k+0,l)) * kernel(Eigen::array(1,0)) + + input(Eigen::array(i,j+2,k+0,l)) * kernel(Eigen::array(2,0)) + + input(Eigen::array(i,j+0,k+1,l)) * kernel(Eigen::array(0,1)) + + input(Eigen::array(i,j+1,k+1,l)) * kernel(Eigen::array(1,1)) + + input(Eigen::array(i,j+2,k+1,l)) * kernel(Eigen::array(2,1)) + + input(Eigen::array(i,j+0,k+2,l)) * kernel(Eigen::array(0,2)) + + input(Eigen::array(i,j+1,k+2,l)) * kernel(Eigen::array(1,2)) + + input(Eigen::array(i,j+2,k+2,l)) * kernel(Eigen::array(2,2)) + + input(Eigen::array(i,j+0,k+3,l)) * kernel(Eigen::array(0,3)) + + input(Eigen::array(i,j+1,k+3,l)) * kernel(Eigen::array(1,3)) + + input(Eigen::array(i,j+2,k+3,l)) * kernel(Eigen::array(2,3)); + VERIFY_IS_APPROX(result, expected); + } + } + } + } +} + + +static void test_cuda_convolution_3d() +{ + Tensor input(Eigen::array(74,37,11,137,17)); + Tensor kernel(Eigen::array(3,4,2)); + Tensor out(Eigen::array(74,35,8,136,17)); + input = input.constant(10.0f) + input.random(); + kernel = kernel.constant(7.0f) + kernel.random(); + + std::size_t input_bytes = input.size() * sizeof(float); + std::size_t kernel_bytes = kernel.size() * sizeof(float); + std::size_t out_bytes = out.size() * sizeof(float); + + float* d_input; + float* d_kernel; + float* d_out; + cudaMalloc((void**)(&d_input), input_bytes); + cudaMalloc((void**)(&d_kernel), kernel_bytes); + cudaMalloc((void**)(&d_out), out_bytes); + + cudaMemcpy(d_input, input.data(), input_bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_kernel, kernel.data(), kernel_bytes, cudaMemcpyHostToDevice); + + cudaStream_t stream; + assert(cudaStreamCreate(&stream) == cudaSuccess); + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap > gpu_input(d_input, Eigen::array(74,37,11,137,17)); + Eigen::TensorMap > gpu_kernel(d_kernel, Eigen::array(3,4,2)); + Eigen::TensorMap > gpu_out(d_out, Eigen::array(74,35,8,136,17)); + + Eigen::array dims(1,2,3); + gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims); + + assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 74; ++i) { + for (int j = 0; j < 35; ++j) { + for (int k = 0; k < 8; ++k) { + for (int l = 0; l < 136; ++l) { + for (int m = 0; m < 17; ++m) { + const float result = out(Eigen::array(i,j,k,l,m)); + const float expected = input(Eigen::array(i,j+0,k+0,l+0,m)) * kernel(Eigen::array(0,0,0)) + + input(Eigen::array(i,j+1,k+0,l+0,m)) * kernel(Eigen::array(1,0,0)) + + input(Eigen::array(i,j+2,k+0,l+0,m)) * kernel(Eigen::array(2,0,0)) + + input(Eigen::array(i,j+0,k+1,l+0,m)) * kernel(Eigen::array(0,1,0)) + + input(Eigen::array(i,j+1,k+1,l+0,m)) * kernel(Eigen::array(1,1,0)) + + input(Eigen::array(i,j+2,k+1,l+0,m)) * kernel(Eigen::array(2,1,0)) + + input(Eigen::array(i,j+0,k+2,l+0,m)) * kernel(Eigen::array(0,2,0)) + + input(Eigen::array(i,j+1,k+2,l+0,m)) * kernel(Eigen::array(1,2,0)) + + input(Eigen::array(i,j+2,k+2,l+0,m)) * kernel(Eigen::array(2,2,0)) + + input(Eigen::array(i,j+0,k+3,l+0,m)) * kernel(Eigen::array(0,3,0)) + + input(Eigen::array(i,j+1,k+3,l+0,m)) * kernel(Eigen::array(1,3,0)) + + input(Eigen::array(i,j+2,k+3,l+0,m)) * kernel(Eigen::array(2,3,0)) + + input(Eigen::array(i,j+0,k+0,l+1,m)) * kernel(Eigen::array(0,0,1)) + + input(Eigen::array(i,j+1,k+0,l+1,m)) * kernel(Eigen::array(1,0,1)) + + input(Eigen::array(i,j+2,k+0,l+1,m)) * kernel(Eigen::array(2,0,1)) + + input(Eigen::array(i,j+0,k+1,l+1,m)) * kernel(Eigen::array(0,1,1)) + + input(Eigen::array(i,j+1,k+1,l+1,m)) * kernel(Eigen::array(1,1,1)) + + input(Eigen::array(i,j+2,k+1,l+1,m)) * kernel(Eigen::array(2,1,1)) + + input(Eigen::array(i,j+0,k+2,l+1,m)) * kernel(Eigen::array(0,2,1)) + + input(Eigen::array(i,j+1,k+2,l+1,m)) * kernel(Eigen::array(1,2,1)) + + input(Eigen::array(i,j+2,k+2,l+1,m)) * kernel(Eigen::array(2,2,1)) + + input(Eigen::array(i,j+0,k+3,l+1,m)) * kernel(Eigen::array(0,3,1)) + + input(Eigen::array(i,j+1,k+3,l+1,m)) * kernel(Eigen::array(1,3,1)) + + input(Eigen::array(i,j+2,k+3,l+1,m)) * kernel(Eigen::array(2,3,1)); + VERIFY_IS_APPROX(result, expected); + } + } + } + } + } +} + +static float* CudaCopyFloat(float* data, int size) { + const int nbytes = size * sizeof(float); + float* result = NULL; + if (cudaMalloc((void**)(&result), nbytes) != cudaSuccess) { + return NULL; + } else { + if (data != NULL) { + cudaMemcpy(result, data, nbytes, cudaMemcpyHostToDevice); + } + return result; + } +} + +static void test_cuda_constant_broadcast() +{ + cudaStream_t stream; + assert(cudaStreamCreate(&stream) == cudaSuccess); + Eigen::GpuDevice gpu_device(&stream); + + Tensor t1(10); + for (int i = 0; i < 10; ++i) { + t1(i) = 10.0f * i; + } + float* t1_cuda = CudaCopyFloat(t1.data(), t1.size()); + Eigen::TensorMap > t1_gpu(t1_cuda, 10); + + Tensor t2(1); + t2 = t2.constant(20.0f); + float* t2_cuda = CudaCopyFloat(t2.data(), t2.size()); + Eigen::TensorMap > > t2_gpu(t2_cuda, 1); + + float* t3_cuda = CudaCopyFloat(NULL, 10); + Eigen::TensorMap > t3_gpu(t3_cuda, 10); + + t3_gpu.device(gpu_device) = + t1_gpu + t2_gpu.broadcast(Eigen::array(10)); + + Eigen::Tensor t3(10); + cudaMemcpy(t3.data(), t3_gpu.data(), 10 * sizeof(float), + cudaMemcpyDeviceToHost); + + for (int i = 0; i < 10; ++i) { + VERIFY_IS_APPROX(t3(i), t1(i) + t2(0)); + } +} + +void test_cxx11_tensor_cuda() +{ + CALL_SUBTEST(test_cuda_elementwise_small()); + CALL_SUBTEST(test_cuda_elementwise()); + CALL_SUBTEST(test_cuda_reduction()); + CALL_SUBTEST(test_cuda_contraction()); + CALL_SUBTEST(test_cuda_contraction()); + CALL_SUBTEST(test_cuda_convolution_1d()); + CALL_SUBTEST(test_cuda_convolution_2d()); + CALL_SUBTEST(test_cuda_convolution_3d()); + CALL_SUBTEST(test_cuda_constant_broadcast()); +} diff --git a/unsupported/test/cxx11_tensor_device.cpp b/unsupported/test/cxx11_tensor_device.cpp index 26465ee11..f2d7e4ce6 100644 --- a/unsupported/test/cxx11_tensor_device.cpp +++ b/unsupported/test/cxx11_tensor_device.cpp @@ -22,23 +22,23 @@ using Eigen::RowMajor; // Context for evaluation on cpu struct CPUContext { - CPUContext(const Eigen::Tensor& in1, Eigen::Tensor& in2, Eigen::Tensor& out) : in1_(in1), in2_(in2), out_(out), kernel_1d_(2), kernel_2d_(Eigen::array(2,2)), kernel_3d_(Eigen::array(2,2,2)) { + CPUContext(const Eigen::Tensor& in1, Eigen::Tensor& in2, Eigen::Tensor& out) : in1_(in1), in2_(in2), out_(out), kernel_1d_(2), kernel_2d_(2,2), kernel_3d_(2,2,2) { kernel_1d_(0) = 3.14f; kernel_1d_(1) = 2.7f; - kernel_2d_(Eigen::array(0,0)) = 3.14f; - kernel_2d_(Eigen::array(1,0)) = 2.7f; - kernel_2d_(Eigen::array(0,1)) = 0.2f; - kernel_2d_(Eigen::array(1,1)) = 7.0f; - - kernel_3d_(Eigen::array(0,0,0)) = 3.14f; - kernel_3d_(Eigen::array(0,1,0)) = 2.7f; - kernel_3d_(Eigen::array(0,0,1)) = 0.2f; - kernel_3d_(Eigen::array(0,1,1)) = 7.0f; - kernel_3d_(Eigen::array(1,0,0)) = -1.0f; - kernel_3d_(Eigen::array(1,1,0)) = -0.3f; - kernel_3d_(Eigen::array(1,0,1)) = -0.7f; - kernel_3d_(Eigen::array(1,1,1)) = -0.5f; + kernel_2d_(0,0) = 3.14f; + kernel_2d_(1,0) = 2.7f; + kernel_2d_(0,1) = 0.2f; + kernel_2d_(1,1) = 7.0f; + + kernel_3d_(0,0,0) = 3.14f; + kernel_3d_(0,1,0) = 2.7f; + kernel_3d_(0,0,1) = 0.2f; + kernel_3d_(0,1,1) = 7.0f; + kernel_3d_(1,0,0) = -1.0f; + kernel_3d_(1,1,0) = -0.3f; + kernel_3d_(1,0,1) = -0.7f; + kernel_3d_(1,1,1) = -0.5f; } const Eigen::DefaultDevice& device() const { return cpu_device_; } @@ -93,8 +93,8 @@ struct GPUContext { const Eigen::TensorMap >& in2() const { return in2_; } Eigen::TensorMap >& out() { return out_; } Eigen::TensorMap > kernel1d() const { return Eigen::TensorMap >(kernel_1d_, 2); } - Eigen::TensorMap > kernel2d() const { return Eigen::TensorMap >(kernel_2d_, Eigen::array(2, 2)); } - Eigen::TensorMap > kernel3d() const { return Eigen::TensorMap >(kernel_3d_, Eigen::array(2, 2, 2)); } + Eigen::TensorMap > kernel2d() const { return Eigen::TensorMap >(kernel_2d_, 2, 2); } + Eigen::TensorMap > kernel3d() const { return Eigen::TensorMap >(kernel_3d_, 2, 2, 2); } private: const Eigen::TensorMap >& in1_; @@ -150,8 +150,8 @@ static void test_contraction(Context* context) template static void test_1d_convolution(Context* context) { - Eigen::DSizes indices(Eigen::array(0,0,0)); - Eigen::DSizes sizes(Eigen::array(40,49,70)); + Eigen::DSizes indices(0,0,0); + Eigen::DSizes sizes(40,49,70); Eigen::array dims(1); context->out().slice(indices, sizes).device(context->device()) = context->in1().convolve(context->kernel1d(), dims); @@ -160,8 +160,8 @@ static void test_1d_convolution(Context* context) template static void test_2d_convolution(Context* context) { - Eigen::DSizes indices(Eigen::array(0,0,0)); - Eigen::DSizes sizes(Eigen::array(40,49,69)); + Eigen::DSizes indices(0,0,0); + Eigen::DSizes sizes(40,49,69); Eigen::array dims(1,2); context->out().slice(indices, sizes).device(context->device()) = context->in1().convolve(context->kernel2d(), dims); @@ -170,8 +170,8 @@ static void test_2d_convolution(Context* context) template static void test_3d_convolution(Context* context) { - Eigen::DSizes indices(Eigen::array(0,0,0)); - Eigen::DSizes sizes(Eigen::array(39,49,69)); + Eigen::DSizes indices(0,0,0); + Eigen::DSizes sizes(39,49,69); Eigen::array dims(0,1,2); context->out().slice(indices, sizes).device(context->device()) = context->in1().convolve(context->kernel3d(), dims); @@ -179,9 +179,9 @@ static void test_3d_convolution(Context* context) static void test_cpu() { - Eigen::Tensor in1(Eigen::array(40,50,70)); - Eigen::Tensor in2(Eigen::array(40,50,70)); - Eigen::Tensor out(Eigen::array(40,50,70)); + Eigen::Tensor in1(40,50,70); + Eigen::Tensor in2(40,50,70); + Eigen::Tensor out(40,50,70); in1 = in1.random() + in1.constant(10.0f); in2 = in2.random() + in2.constant(10.0f); @@ -191,7 +191,7 @@ static void test_cpu() { for (int i = 0; i < 40; ++i) { for (int j = 0; j < 50; ++j) { for (int k = 0; k < 70; ++k) { - VERIFY_IS_APPROX(out(Eigen::array(i,j,k)), in1(Eigen::array(i,j,k)) + in2(Eigen::array(i,j,k)) * 3.14f + 2.718f); + VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) + in2(i,j,k) * 3.14f + 2.718f); } } } @@ -200,7 +200,7 @@ static void test_cpu() { for (int i = 0; i < 40; ++i) { for (int j = 0; j < 50; ++j) { for (int k = 0; k < 70; ++k) { - VERIFY_IS_APPROX(out(Eigen::array(i,j,k)), (in1(Eigen::array(i,j,k)) + in2(Eigen::array(i,j,k))) * 3.14f + 2.718f); + VERIFY_IS_APPROX(out(i,j,k), (in1(i,j,k) + in2(i,j,k)) * 3.14f + 2.718f); } } } @@ -209,7 +209,7 @@ static void test_cpu() { for (int i = 0; i < 40; ++i) { for (int j = 0; j < 50; ++j) { for (int k = 0; k < 70; ++k) { - VERIFY_IS_APPROX(out(Eigen::array(i,j,k)), in1(Eigen::array(i,j,k)) + in2(Eigen::array(i,j,k)) * 3.14f + 2.718f); + VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) + in2(i,j,k) * 3.14f + 2.718f); } } } @@ -217,11 +217,11 @@ static void test_cpu() { test_contraction(&context); for (int i = 0; i < 40; ++i) { for (int j = 0; j < 40; ++j) { - const float result = out(Eigen::array(i,j,0)); + const float result = out(i,j,0); float expected = 0; for (int k = 0; k < 50; ++k) { for (int l = 0; l < 70; ++l) { - expected += in1(Eigen::array(i, k, l)) * in2(Eigen::array(j, k, l)); + expected += in1(i, k, l) * in2(j, k, l); } } VERIFY_IS_APPROX(expected, result); @@ -232,7 +232,7 @@ static void test_cpu() { for (int i = 0; i < 40; ++i) { for (int j = 0; j < 49; ++j) { for (int k = 0; k < 70; ++k) { - VERIFY_IS_APPROX(out(Eigen::array(i,j,k)), (in1(Eigen::array(i,j,k)) * 3.14f + in1(Eigen::array(i,j+1,k)) * 2.7f)); + VERIFY_IS_APPROX(out(i,j,k), (in1(i,j,k) * 3.14f + in1(i,j+1,k) * 2.7f)); } } } @@ -241,9 +241,9 @@ static void test_cpu() { for (int i = 0; i < 40; ++i) { for (int j = 0; j < 49; ++j) { for (int k = 0; k < 69; ++k) { - const float result = out(Eigen::array(i,j,k)); - const float expected = (in1(Eigen::array(i,j,k)) * 3.14f + in1(Eigen::array(i,j+1,k)) * 2.7f) + - (in1(Eigen::array(i,j,k+1)) * 0.2f + in1(Eigen::array(i,j+1,k+1)) * 7.0f); + const float result = out(i,j,k); + const float expected = (in1(i,j,k) * 3.14f + in1(i,j+1,k) * 2.7f) + + (in1(i,j,k+1) * 0.2f + in1(i,j+1,k+1) * 7.0f); if (fabs(expected) < 1e-4 && fabs(result) < 1e-4) { continue; } @@ -256,11 +256,11 @@ static void test_cpu() { for (int i = 0; i < 39; ++i) { for (int j = 0; j < 49; ++j) { for (int k = 0; k < 69; ++k) { - const float result = out(Eigen::array(i,j,k)); - const float expected = (in1(Eigen::array(i,j,k)) * 3.14f + in1(Eigen::array(i,j+1,k)) * 2.7f + - in1(Eigen::array(i,j,k+1)) * 0.2f + in1(Eigen::array(i,j+1,k+1)) * 7.0f) + - (in1(Eigen::array(i+1,j,k)) * -1.0f + in1(Eigen::array(i+1,j+1,k)) * -0.3f + - in1(Eigen::array(i+1,j,k+1)) * -0.7f + in1(Eigen::array(i+1,j+1,k+1)) * -0.5f); + const float result = out(i,j,k); + const float expected = (in1(i,j,k) * 3.14f + in1(i,j+1,k) * 2.7f + + in1(i,j,k+1) * 0.2f + in1(i,j+1,k+1) * 7.0f) + + (in1(i+1,j,k) * -1.0f + in1(i+1,j+1,k) * -0.3f + + in1(i+1,j,k+1) * -0.7f + in1(i+1,j+1,k+1) * -0.5f); if (fabs(expected) < 1e-4 && fabs(result) < 1e-4) { continue; } @@ -271,9 +271,9 @@ static void test_cpu() { } static void test_gpu() { - Eigen::Tensor in1(Eigen::array(40,50,70)); - Eigen::Tensor in2(Eigen::array(40,50,70)); - Eigen::Tensor out(Eigen::array(40,50,70)); + Eigen::Tensor in1(40,50,70); + Eigen::Tensor in2(40,50,70); + Eigen::Tensor out(40,50,70); in1 = in1.random() + in1.constant(10.0f); in2 = in2.random() + in2.constant(10.0f); @@ -291,9 +291,9 @@ static void test_gpu() { cudaMemcpy(d_in1, in1.data(), in1_bytes, cudaMemcpyHostToDevice); cudaMemcpy(d_in2, in2.data(), in2_bytes, cudaMemcpyHostToDevice); - Eigen::TensorMap > gpu_in1(d_in1, Eigen::array(40,50,70)); - Eigen::TensorMap > gpu_in2(d_in2, Eigen::array(40,50,70)); - Eigen::TensorMap > gpu_out(d_out, Eigen::array(40,50,70)); + Eigen::TensorMap > gpu_in1(d_in1, 40,50,70); + Eigen::TensorMap > gpu_in2(d_in2, 40,50,70); + Eigen::TensorMap > gpu_out(d_out, 40,50,70); GPUContext context(gpu_in1, gpu_in2, gpu_out); test_contextual_eval(&context); @@ -301,7 +301,7 @@ static void test_gpu() { for (int i = 0; i < 40; ++i) { for (int j = 0; j < 50; ++j) { for (int k = 0; k < 70; ++k) { - VERIFY_IS_APPROX(out(Eigen::array(i,j,k)), in1(Eigen::array(i,j,k)) + in2(Eigen::array(i,j,k)) * 3.14f + 2.718f); + VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) + in2(i,j,k) * 3.14f + 2.718f); } } } @@ -311,7 +311,7 @@ static void test_gpu() { for (int i = 0; i < 40; ++i) { for (int j = 0; j < 50; ++j) { for (int k = 0; k < 70; ++k) { - VERIFY_IS_APPROX(out(Eigen::array(i,j,k)), (in1(Eigen::array(i,j,k)) + in2(Eigen::array(i,j,k))) * 3.14f + 2.718f); + VERIFY_IS_APPROX(out(i,j,k), (in1(i,j,k) + in2(i,j,k)) * 3.14f + 2.718f); } } } @@ -321,7 +321,7 @@ static void test_gpu() { for (int i = 0; i < 40; ++i) { for (int j = 0; j < 50; ++j) { for (int k = 0; k < 70; ++k) { - VERIFY_IS_APPROX(out(Eigen::array(i,j,k)), in1(Eigen::array(i,j,k)) + in2(Eigen::array(i,j,k)) * 3.14f + 2.718f); + VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) + in2(i,j,k) * 3.14f + 2.718f); } } } @@ -330,11 +330,11 @@ static void test_gpu() { assert(cudaMemcpy(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost) == cudaSuccess); for (int i = 0; i < 40; ++i) { for (int j = 0; j < 40; ++j) { - const float result = out(Eigen::array(i,j,0)); + const float result = out(i,j,0); float expected = 0; for (int k = 0; k < 50; ++k) { for (int l = 0; l < 70; ++l) { - expected += in1(Eigen::array(i, k, l)) * in2(Eigen::array(j, k, l)); + expected += in1(i, k, l) * in2(j, k, l); } } VERIFY_IS_APPROX(expected, result); @@ -347,7 +347,7 @@ static void test_gpu() { for (int i = 0; i < 40; ++i) { for (int j = 0; j < 49; ++j) { for (int k = 0; k < 70; ++k) { - VERIFY_IS_APPROX(out(Eigen::array(i,j,k)), (in1(Eigen::array(i,j,k)) * 3.14f + in1(Eigen::array(i,j+1,k)) * 2.7f)); + VERIFY_IS_APPROX(out(i,j,k), (in1(i,j,k) * 3.14f + in1(i,j+1,k) * 2.7f)); } } } @@ -358,9 +358,9 @@ static void test_gpu() { for (int i = 0; i < 40; ++i) { for (int j = 0; j < 49; ++j) { for (int k = 0; k < 69; ++k) { - const float result = out(Eigen::array(i,j,k)); - const float expected = (in1(Eigen::array(i,j,k)) * 3.14f + in1(Eigen::array(i,j+1,k)) * 2.7f + - in1(Eigen::array(i,j,k+1)) * 0.2f + in1(Eigen::array(i,j+1,k+1)) * 7.0f); + const float result = out(i,j,k); + const float expected = (in1(i,j,k) * 3.14f + in1(i,j+1,k) * 2.7f + + in1(i,j,k+1) * 0.2f + in1(i,j+1,k+1) * 7.0f); VERIFY_IS_APPROX(expected, result); } } @@ -372,11 +372,11 @@ static void test_gpu() { for (int i = 0; i < 39; ++i) { for (int j = 0; j < 49; ++j) { for (int k = 0; k < 69; ++k) { - const float result = out(Eigen::array(i,j,k)); - const float expected = (in1(Eigen::array(i,j,k)) * 3.14f + in1(Eigen::array(i,j+1,k)) * 2.7f + - in1(Eigen::array(i,j,k+1)) * 0.2f + in1(Eigen::array(i,j+1,k+1)) * 7.0f + - in1(Eigen::array(i+1,j,k)) * -1.0f + in1(Eigen::array(i+1,j+1,k)) * -0.3f + - in1(Eigen::array(i+1,j,k+1)) * -0.7f + in1(Eigen::array(i+1,j+1,k+1)) * -0.5f); + const float result = out(i,j,k); + const float expected = (in1(i,j,k) * 3.14f + in1(i,j+1,k) * 2.7f + + in1(i,j,k+1) * 0.2f + in1(i,j+1,k+1) * 7.0f + + in1(i+1,j,k) * -1.0f + in1(i+1,j+1,k) * -0.3f + + in1(i+1,j,k+1) * -0.7f + in1(i+1,j+1,k+1) * -0.5f); VERIFY_IS_APPROX(expected, result); } } diff --git a/unsupported/test/cxx11_tensor_dimension.cpp b/unsupported/test/cxx11_tensor_dimension.cpp index c806b623f..0cc4e86f7 100644 --- a/unsupported/test/cxx11_tensor_dimension.cpp +++ b/unsupported/test/cxx11_tensor_dimension.cpp @@ -16,12 +16,15 @@ using Eigen::Tensor; static void test_dynamic_size() { - Eigen::DSizes dimensions(Eigen::array{{2,3,7}}); + Eigen::DSizes dimensions(2,3,7); VERIFY_IS_EQUAL((int)Eigen::internal::array_get<0>(dimensions), 2); VERIFY_IS_EQUAL((int)Eigen::internal::array_get<1>(dimensions), 3); VERIFY_IS_EQUAL((int)Eigen::internal::array_get<2>(dimensions), 7); VERIFY_IS_EQUAL(dimensions.TotalSize(), (size_t)2*3*7); + VERIFY_IS_EQUAL((int)dimensions[0], 2); + VERIFY_IS_EQUAL((int)dimensions[1], 3); + VERIFY_IS_EQUAL((int)dimensions[2], 7); } static void test_fixed_size() @@ -37,9 +40,9 @@ static void test_fixed_size() static void test_match() { - Eigen::DSizes dyn(Eigen::array{{2,3,7}}); + Eigen::DSizes dyn(2,3,7); Eigen::Sizes<2,3,7> stat; - VERIFY_IS_EQUAL(Eigen::internal::dimensions_match(dyn, stat), true); + VERIFY_IS_EQUAL(Eigen::dimensions_match(dyn, stat), true); } diff --git a/unsupported/test/cxx11_tensor_expr.cpp b/unsupported/test/cxx11_tensor_expr.cpp index e85fcbfa9..792fdeade 100644 --- a/unsupported/test/cxx11_tensor_expr.cpp +++ b/unsupported/test/cxx11_tensor_expr.cpp @@ -125,6 +125,12 @@ static void test_3d() mat7 = mat1.cwiseMax(mat5 * 2.0f).exp(); Tensor mat8(2,3,7); mat8 = (-mat2).exp() * 3.14f; + Tensor mat9(2,3,7); + mat9 = mat2 + 3.14f; + Tensor mat10(2,3,7); + mat10 = mat2 - 3.14f; + Tensor mat11(2,3,7); + mat11 = mat2 / 3.14f; val = 1.0; for (int i = 0; i < 2; ++i) { @@ -136,6 +142,9 @@ static void test_3d() VERIFY_IS_APPROX(mat6(i,j,k), sqrtf(val) * 3.14f); VERIFY_IS_APPROX(mat7(i,j,k), expf((std::max)(val, mat5(i,j,k) * 2.0f))); VERIFY_IS_APPROX(mat8(i,j,k), expf(-val) * 3.14f); + VERIFY_IS_APPROX(mat9(i,j,k), val + 3.14f); + VERIFY_IS_APPROX(mat10(i,j,k), val - 3.14f); + VERIFY_IS_APPROX(mat11(i,j,k), val / 3.14f); val += 1.0; } } @@ -172,6 +181,36 @@ static void test_constants() } } +static void test_boolean() +{ + Tensor vec(6); + std::copy_n(std::begin({0, 1, 2, 3, 4, 5}), 6, vec.data()); + + // Test ||. + Tensor bool1 = vec < vec.constant(1) || vec > vec.constant(4); + VERIFY_IS_EQUAL(bool1[0], true); + VERIFY_IS_EQUAL(bool1[1], false); + VERIFY_IS_EQUAL(bool1[2], false); + VERIFY_IS_EQUAL(bool1[3], false); + VERIFY_IS_EQUAL(bool1[4], false); + VERIFY_IS_EQUAL(bool1[5], true); + + // Test &&, including cast of operand vec. + Tensor bool2 = vec.cast() && vec < vec.constant(4); + VERIFY_IS_EQUAL(bool2[0], false); + VERIFY_IS_EQUAL(bool2[1], true); + VERIFY_IS_EQUAL(bool2[2], true); + VERIFY_IS_EQUAL(bool2[3], true); + VERIFY_IS_EQUAL(bool2[4], false); + VERIFY_IS_EQUAL(bool2[5], false); + + // Compilation tests: + // Test Tensor against results of cast or comparison; verifies that + // CoeffReturnType is set to match Op return type of bool for Unary and Binary + // Ops. + Tensor bool3 = vec.cast() && bool2; + bool3 = vec < vec.constant(4) && bool2; +} static void test_functors() { @@ -258,6 +297,7 @@ void test_cxx11_tensor_expr() CALL_SUBTEST(test_2d()); CALL_SUBTEST(test_3d()); CALL_SUBTEST(test_constants()); + CALL_SUBTEST(test_boolean()); CALL_SUBTEST(test_functors()); CALL_SUBTEST(test_type_casting()); CALL_SUBTEST(test_select()); diff --git a/unsupported/test/cxx11_tensor_forced_eval.cpp b/unsupported/test/cxx11_tensor_forced_eval.cpp index 529584a7b..ad9de867d 100644 --- a/unsupported/test/cxx11_tensor_forced_eval.cpp +++ b/unsupported/test/cxx11_tensor_forced_eval.cpp @@ -45,7 +45,34 @@ static void test_simple() } +static void test_const() +{ + MatrixXf input(3,3); + input.setRandom(); + MatrixXf output = input; + output.rowwise() -= input.colwise().maxCoeff(); + + Eigen::array depth_dim; + depth_dim[0] = 0; + Tensor::Dimensions dims2d; + dims2d[0] = 1; + dims2d[1] = 3; + Eigen::array bcast; + bcast[0] = 3; + bcast[1] = 1; + const TensorMap> input_tensor(input.data(), 3, 3); + Tensor output_tensor= (input_tensor - input_tensor.maximum(depth_dim).eval().reshape(dims2d).broadcast(bcast)); + + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 3; ++j) { + VERIFY_IS_APPROX(output(i, j), output_tensor(i, j)); + } + } +} + + void test_cxx11_tensor_forced_eval() { CALL_SUBTEST(test_simple()); + CALL_SUBTEST(test_const()); } diff --git a/unsupported/test/cxx11_tensor_image_patch.cpp b/unsupported/test/cxx11_tensor_image_patch.cpp index 55d35eac0..26854f5a4 100644 --- a/unsupported/test/cxx11_tensor_image_patch.cpp +++ b/unsupported/test/cxx11_tensor_image_patch.cpp @@ -28,6 +28,9 @@ static void test_simple_patch() VERIFY_IS_EQUAL(single_pixel_patch.dimension(4), 7); for (int i = 0; i < tensor.size(); ++i) { + if (tensor.data()[i] != single_pixel_patch.data()[i]) { + std::cout << "Mismatch detected at index " << i << " : " << tensor.data()[i] << " vs " << single_pixel_patch.data()[i] << std::endl; + } VERIFY_IS_EQUAL(single_pixel_patch.data()[i], tensor.data()[i]); } @@ -51,6 +54,9 @@ static void test_simple_patch() if (r-1+i >= 0 && c-2+j >= 0 && r-1+i < 3 && c-2+j < 5) { expected = tensor(d, r-1+i, c-2+j, b); } + if (entire_image_patch(d, r, c, patchId, b) != expected) { + std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; + } VERIFY_IS_EQUAL(entire_image_patch(d, r, c, patchId, b), expected); } } @@ -68,6 +74,11 @@ static void test_simple_patch() VERIFY_IS_EQUAL(twod_patch.dimension(3), 3*5); VERIFY_IS_EQUAL(twod_patch.dimension(4), 7); + // Based on the calculation described in TensorTraits.h, padding happens to be 0. + int row_padding = 0; + int col_padding = 0; + int stride = 1; + for (int i = 0; i < 3; ++i) { for (int j = 0; j < 5; ++j) { int patchId = i+3*j; @@ -76,8 +87,13 @@ static void test_simple_patch() for (int d = 0; d < 2; ++d) { for (int b = 0; b < 7; ++b) { float expected = 0.0f; - if (r-1+i >= 0 && c-1+j >= 0 && r-1+i < 3 && c-1+j < 5) { - expected = tensor(d, r-1+i, c-1+j, b); + int row_offset = r*stride + i - row_padding; + int col_offset = c*stride + j - col_padding; + if (row_offset >= 0 && col_offset >= 0 && row_offset < tensor.dimension(1) && col_offset < tensor.dimension(2)) { + expected = tensor(d, row_offset, col_offset, b); + } + if (twod_patch(d, r, c, patchId, b) != expected) { + std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; } VERIFY_IS_EQUAL(twod_patch(d, r, c, patchId, b), expected); } @@ -88,6 +104,156 @@ static void test_simple_patch() } } +// Verifies VALID padding (no padding) with incrementing values. +static void test_patch_padding_valid() +{ + int input_depth = 3; + int input_rows = 3; + int input_cols = 3; + int input_batches = 1; + int ksize = 2; // Corresponds to the Rows and Cols for tensor.extract_image_patches<>. + int stride = 2; // Only same stride is supported. + Tensor tensor(input_depth, input_rows, input_cols, input_batches); + // Initializes tensor with incrementing numbers. + for (int i = 0; i < tensor.size(); ++i) { + tensor.data()[i] = i + 1; + } + Tensor result = tensor.extract_image_patches(ksize, ksize, stride, stride, PADDING_VALID); + + VERIFY_IS_EQUAL(result.dimension(0), input_depth); // depth + VERIFY_IS_EQUAL(result.dimension(1), ksize); // kernel rows + VERIFY_IS_EQUAL(result.dimension(2), ksize); // kernel cols + VERIFY_IS_EQUAL(result.dimension(3), 1); // number of patches + VERIFY_IS_EQUAL(result.dimension(4), input_batches); // number of batches + + // No padding is carried out. + int row_padding = 0; + int col_padding = 0; + + for (int i = 0; (i+stride+ksize-1) < input_rows; i += stride) { // input rows + for (int j = 0; (j+stride+ksize-1) < input_cols; j += stride) { // input cols + int patchId = i+input_rows*j; + for (int r = 0; r < ksize; ++r) { // patch rows + for (int c = 0; c < ksize; ++c) { // patch cols + for (int d = 0; d < input_depth; ++d) { // depth + for (int b = 0; b < input_batches; ++b) { // batch + float expected = 0.0f; + int row_offset = r + i - row_padding; + int col_offset = c + j - col_padding; + if (row_offset >= 0 && col_offset >= 0 && row_offset < input_rows && col_offset < input_cols) { + expected = tensor(d, row_offset, col_offset, b); + } + if (result(d, r, c, patchId, b) != expected) { + std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; + } + VERIFY_IS_EQUAL(result(d, r, c, patchId, b), expected); + } + } + } + } + } + } +} + +// Verifies VALID padding (no padding) with the same value. +static void test_patch_padding_valid_same_value() +{ + int input_depth = 1; + int input_rows = 5; + int input_cols = 5; + int input_batches = 2; + int ksize = 3; // Corresponds to the Rows and Cols for tensor.extract_image_patches<>. + int stride = 2; // Only same stride is supported. + Tensor tensor(input_depth, input_rows, input_cols, input_batches); + tensor = tensor.constant(11.0f); + Tensor result = tensor.extract_image_patches(ksize, ksize, stride, stride, PADDING_VALID); + + VERIFY_IS_EQUAL(result.dimension(0), input_depth); // depth + VERIFY_IS_EQUAL(result.dimension(1), ksize); // kernel rows + VERIFY_IS_EQUAL(result.dimension(2), ksize); // kernel cols + VERIFY_IS_EQUAL(result.dimension(3), 4); // number of patches + VERIFY_IS_EQUAL(result.dimension(4), input_batches); // number of batches + + // No padding is carried out. + int row_padding = 0; + int col_padding = 0; + + for (int i = 0; (i+stride+ksize-1) <= input_rows; i += stride) { // input rows + for (int j = 0; (j+stride+ksize-1) <= input_cols; j += stride) { // input cols + int patchId = i+input_rows*j; + for (int r = 0; r < ksize; ++r) { // patch rows + for (int c = 0; c < ksize; ++c) { // patch cols + for (int d = 0; d < input_depth; ++d) { // depth + for (int b = 0; b < input_batches; ++b) { // batch + float expected = 0.0f; + int row_offset = r + i - row_padding; + int col_offset = c + j - col_padding; + if (row_offset >= 0 && col_offset >= 0 && row_offset < input_rows && col_offset < input_cols) { + expected = tensor(d, row_offset, col_offset, b); + } + if (result(d, r, c, patchId, b) != expected) { + std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; + } + VERIFY_IS_EQUAL(result(d, r, c, patchId, b), expected); + } + } + } + } + } + } +} + +// Verifies SAME padding. +static void test_patch_padding_same() +{ + int input_depth = 3; + int input_rows = 4; + int input_cols = 2; + int input_batches = 1; + int ksize = 2; // Corresponds to the Rows and Cols for tensor.extract_image_patches<>. + int stride = 2; // Only same stride is supported. + Tensor tensor(input_depth, input_rows, input_cols, input_batches); + // Initializes tensor with incrementing numbers. + for (int i = 0; i < tensor.size(); ++i) { + tensor.data()[i] = i + 1; + } + Tensor result = tensor.extract_image_patches(ksize, ksize, stride, stride, PADDING_SAME); + + VERIFY_IS_EQUAL(result.dimension(0), input_depth); // depth + VERIFY_IS_EQUAL(result.dimension(1), ksize); // kernel rows + VERIFY_IS_EQUAL(result.dimension(2), ksize); // kernel cols + VERIFY_IS_EQUAL(result.dimension(3), 2); // number of patches + VERIFY_IS_EQUAL(result.dimension(4), input_batches); // number of batches + + // Based on the calculation described in TensorTraits.h, padding happens to be + // 0. + int row_padding = 0; + int col_padding = 0; + + for (int i = 0; (i+stride+ksize-1) <= input_rows; i += stride) { // input rows + for (int j = 0; (j+stride+ksize-1) <= input_cols; j += stride) { // input cols + int patchId = i+input_rows*j; + for (int r = 0; r < ksize; ++r) { // patch rows + for (int c = 0; c < ksize; ++c) { // patch cols + for (int d = 0; d < input_depth; ++d) { // depth + for (int b = 0; b < input_batches; ++b) { // batch + float expected = 0.0f; + int row_offset = r*stride + i - row_padding; + int col_offset = c*stride + j - col_padding; + if (row_offset >= 0 && col_offset >= 0 && row_offset < input_rows && col_offset < input_cols) { + expected = tensor(d, row_offset, col_offset, b); + } + if (result(d, r, c, patchId, b) != expected) { + std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; + } + VERIFY_IS_EQUAL(result(d, r, c, patchId, b), expected); + } + } + } + } + } + } +} static void test_patch_no_extra_dim() { @@ -103,6 +269,9 @@ static void test_patch_no_extra_dim() VERIFY_IS_EQUAL(single_pixel_patch.dimension(3), 3*5); for (int i = 0; i < tensor.size(); ++i) { + if (tensor.data()[i] != single_pixel_patch.data()[i]) { + std::cout << "Mismatch detected at index " << i << " : " << tensor.data()[i] << " vs " << single_pixel_patch.data()[i] << std::endl; + } VERIFY_IS_EQUAL(single_pixel_patch.data()[i], tensor.data()[i]); } @@ -124,6 +293,9 @@ static void test_patch_no_extra_dim() if (r-1+i >= 0 && c-2+j >= 0 && r-1+i < 3 && c-2+j < 5) { expected = tensor(d, r-1+i, c-2+j); } + if (entire_image_patch(d, r, c, patchId) != expected) { + std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << std::endl; + } VERIFY_IS_EQUAL(entire_image_patch(d, r, c, patchId), expected); } } @@ -139,6 +311,11 @@ static void test_patch_no_extra_dim() VERIFY_IS_EQUAL(twod_patch.dimension(2), 2); VERIFY_IS_EQUAL(twod_patch.dimension(3), 3*5); + // Based on the calculation described in TensorTraits.h, padding happens to be 0. + int row_padding = 0; + int col_padding = 0; + int stride = 1; + for (int i = 0; i < 3; ++i) { for (int j = 0; j < 5; ++j) { int patchId = i+3*j; @@ -146,8 +323,13 @@ static void test_patch_no_extra_dim() for (int c = 0; c < 2; ++c) { for (int d = 0; d < 2; ++d) { float expected = 0.0f; - if (r-1+i >= 0 && c-1+j >= 0 && r-1+i < 3 && c-1+j < 5) { - expected = tensor(d, r-1+i, c-1+j); + int row_offset = r*stride + i - row_padding; + int col_offset = c*stride + j - col_padding; + if (row_offset >= 0 && col_offset >= 0 && row_offset < tensor.dimension(1) && col_offset < tensor.dimension(2)) { + expected = tensor(d, row_offset, col_offset); + } + if (twod_patch(d, r, c, patchId) != expected) { + std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << std::endl; } VERIFY_IS_EQUAL(twod_patch(d, r, c, patchId), expected); } @@ -181,6 +363,9 @@ static void test_imagenet_patches() if (r-5+i >= 0 && c-5+j >= 0 && r-5+i < 128 && c-5+j < 128) { expected = l_in(d, r-5+i, c-5+j, b); } + if (l_out(d, r, c, patchId, b) != expected) { + std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; + } VERIFY_IS_EQUAL(l_out(d, r, c, patchId, b), expected); } } @@ -208,6 +393,9 @@ static void test_imagenet_patches() if (r-4+i >= 0 && c-4+j >= 0 && r-4+i < 64 && c-4+j < 64) { expected = l_in(d, r-4+i, c-4+j, b); } + if (l_out(d, r, c, patchId, b) != expected) { + std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; + } VERIFY_IS_EQUAL(l_out(d, r, c, patchId, b), expected); } } @@ -235,6 +423,9 @@ static void test_imagenet_patches() if (r-3+i >= 0 && c-3+j >= 0 && r-3+i < 16 && c-3+j < 16) { expected = l_in(d, r-3+i, c-3+j, b); } + if (l_out(d, r, c, patchId, b) != expected) { + std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; + } VERIFY_IS_EQUAL(l_out(d, r, c, patchId, b), expected); } } @@ -262,6 +453,9 @@ static void test_imagenet_patches() if (r-1+i >= 0 && c-1+j >= 0 && r-1+i < 13 && c-1+j < 13) { expected = l_in(d, r-1+i, c-1+j, b); } + if (l_out(d, r, c, patchId, b) != expected) { + std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; + } VERIFY_IS_EQUAL(l_out(d, r, c, patchId, b), expected); } } @@ -271,10 +465,12 @@ static void test_imagenet_patches() } } - void test_cxx11_tensor_image_patch() { CALL_SUBTEST(test_simple_patch()); CALL_SUBTEST(test_patch_no_extra_dim()); + CALL_SUBTEST(test_patch_padding_valid()); + CALL_SUBTEST(test_patch_padding_valid_same_value()); + CALL_SUBTEST(test_patch_padding_same()); CALL_SUBTEST(test_imagenet_patches()); } diff --git a/unsupported/test/cxx11_tensor_map.cpp b/unsupported/test/cxx11_tensor_map.cpp index 478c20306..9cf2eb150 100644 --- a/unsupported/test/cxx11_tensor_map.cpp +++ b/unsupported/test/cxx11_tensor_map.cpp @@ -29,6 +29,7 @@ static void test_1d() vec1(4) = 23; vec2(4) = 4; vec1(5) = 42; vec2(5) = 5; + VERIFY_IS_EQUAL(vec1.rank(), 1); VERIFY_IS_EQUAL(vec1.size(), 6); VERIFY_IS_EQUAL(vec1.dimension(0), 6); @@ -69,10 +70,12 @@ static void test_2d() TensorMap> mat3(mat1.data(), 2, 3); TensorMap> mat4(mat2.data(), 2, 3); + VERIFY_IS_EQUAL(mat3.rank(), 2); VERIFY_IS_EQUAL(mat3.size(), 6); VERIFY_IS_EQUAL(mat3.dimension(0), 2); VERIFY_IS_EQUAL(mat3.dimension(1), 3); + VERIFY_IS_EQUAL(mat4.rank(), 2); VERIFY_IS_EQUAL(mat4.size(), 6); VERIFY_IS_EQUAL(mat4.dimension(0), 2); VERIFY_IS_EQUAL(mat4.dimension(1), 3); @@ -109,13 +112,15 @@ static void test_3d() } TensorMap> mat3(mat1.data(), 2, 3, 7); - TensorMap> mat4(mat2.data(), 2, 3, 7); + TensorMap> mat4(mat2.data(), array{{2, 3, 7}}); + VERIFY_IS_EQUAL(mat3.rank(), 3); VERIFY_IS_EQUAL(mat3.size(), 2*3*7); VERIFY_IS_EQUAL(mat3.dimension(0), 2); VERIFY_IS_EQUAL(mat3.dimension(1), 3); VERIFY_IS_EQUAL(mat3.dimension(2), 7); + VERIFY_IS_EQUAL(mat4.rank(), 3); VERIFY_IS_EQUAL(mat4.size(), 2*3*7); VERIFY_IS_EQUAL(mat4.dimension(0), 2); VERIFY_IS_EQUAL(mat4.dimension(1), 3); diff --git a/unsupported/test/cxx11_tensor_morphing.cpp b/unsupported/test/cxx11_tensor_morphing.cpp index 78b0dade0..b4b0a55b6 100644 --- a/unsupported/test/cxx11_tensor_morphing.cpp +++ b/unsupported/test/cxx11_tensor_morphing.cpp @@ -89,19 +89,19 @@ static void test_reshape_as_lvalue() } } - +template static void test_simple_slice() { - Tensor tensor(2,3,5,7,11); + Tensor tensor(2,3,5,7,11); tensor.setRandom(); - Tensor slice1(1,1,1,1,1); + Tensor slice1(1,1,1,1,1); Eigen::DSizes indices(1,2,3,4,5); Eigen::DSizes sizes(1,1,1,1,1); slice1 = tensor.slice(indices, sizes); VERIFY_IS_EQUAL(slice1(0,0,0,0,0), tensor(1,2,3,4,5)); - Tensor slice2(1,1,2,2,3); + Tensor slice2(1,1,2,2,3); Eigen::DSizes indices2(1,1,3,4,5); Eigen::DSizes sizes2(1,1,2,2,3); slice2 = tensor.slice(indices2, sizes2); @@ -114,7 +114,7 @@ static void test_simple_slice() } } - +// TODO(andydavis) Add RowMajor support when TensorContract supports RowMajor. static void test_slice_in_expr() { MatrixXf m1(7,7); MatrixXf m2(3,3); @@ -141,21 +141,28 @@ static void test_slice_in_expr() { VERIFY_IS_APPROX(res(i,j), m3(i,j)); } } -} + // Take an arbitrary slice of an arbitrarily sized tensor. + TensorMap> tensor4(m1.data(), 7, 7); + Tensor tensor6 = tensor4.reshape(DSizes(7*7)).exp().slice(DSizes(0), DSizes(35)); + for (int i = 0; i < 35; ++i) { + VERIFY_IS_APPROX(tensor6(i), expf(tensor4.data()[i])); + } +} +template static void test_slice_as_lvalue() { - Tensor tensor1(2,2,7); + Tensor tensor1(2,2,7); tensor1.setRandom(); - Tensor tensor2(2,2,7); + Tensor tensor2(2,2,7); tensor2.setRandom(); - Tensor tensor3(4,3,5); + Tensor tensor3(4,3,5); tensor3.setRandom(); - Tensor tensor4(4,3,2); + Tensor tensor4(4,3,2); tensor4.setRandom(); - Tensor result(4,5,7); + Tensor result(4,5,7); Eigen::DSizes sizes12(2,2,7); Eigen::DSizes first_slice(0,0,0); result.slice(first_slice, sizes12) = tensor1; @@ -190,10 +197,10 @@ static void test_slice_as_lvalue() } } - +template static void test_slice_raw_data() { - Tensor tensor(3,5,7,11); + Tensor tensor(3,5,7,11); tensor.setRandom(); Eigen::DSizes offsets(1,2,3,4); @@ -203,40 +210,78 @@ static void test_slice_raw_data() VERIFY_IS_EQUAL(slice1.dimensions().TotalSize(), 1ul); VERIFY_IS_EQUAL(slice1.data()[0], tensor(1,2,3,4)); - extents = Eigen::DSizes(2,1,1,1); - auto slice2 = SliceEvaluator(tensor.slice(offsets, extents), DefaultDevice()); - VERIFY_IS_EQUAL(slice2.dimensions().TotalSize(), 2ul); - VERIFY_IS_EQUAL(slice2.data()[0], tensor(1,2,3,4)); - VERIFY_IS_EQUAL(slice2.data()[1], tensor(2,2,3,4)); + if (DataLayout == ColMajor) { + extents = Eigen::DSizes(2,1,1,1); + auto slice2 = SliceEvaluator(tensor.slice(offsets, extents), DefaultDevice()); + VERIFY_IS_EQUAL(slice2.dimensions().TotalSize(), 2ul); + VERIFY_IS_EQUAL(slice2.data()[0], tensor(1,2,3,4)); + VERIFY_IS_EQUAL(slice2.data()[1], tensor(2,2,3,4)); + } else { + extents = Eigen::DSizes(1,1,1,2); + auto slice2 = SliceEvaluator(tensor.slice(offsets, extents), DefaultDevice()); + VERIFY_IS_EQUAL(slice2.dimensions().TotalSize(), 2ul); + VERIFY_IS_EQUAL(slice2.data()[0], tensor(1,2,3,4)); + VERIFY_IS_EQUAL(slice2.data()[1], tensor(1,2,3,5)); + } extents = Eigen::DSizes(1,2,1,1); auto slice3 = SliceEvaluator(tensor.slice(offsets, extents), DefaultDevice()); VERIFY_IS_EQUAL(slice3.dimensions().TotalSize(), 2ul); VERIFY_IS_EQUAL(slice3.data(), static_cast(0)); - offsets = Eigen::DSizes(0,2,3,4); - extents = Eigen::DSizes(3,2,1,1); - auto slice4 = SliceEvaluator(tensor.slice(offsets, extents), DefaultDevice()); - VERIFY_IS_EQUAL(slice4.dimensions().TotalSize(), 6ul); - for (int i = 0; i < 3; ++i) { - for (int j = 0; j < 2; ++j) { - VERIFY_IS_EQUAL(slice4.data()[i+3*j], tensor(i,2+j,3,4)); + if (DataLayout == ColMajor) { + offsets = Eigen::DSizes(0,2,3,4); + extents = Eigen::DSizes(3,2,1,1); + auto slice4 = SliceEvaluator(tensor.slice(offsets, extents), DefaultDevice()); + VERIFY_IS_EQUAL(slice4.dimensions().TotalSize(), 6ul); + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 2; ++j) { + VERIFY_IS_EQUAL(slice4.data()[i+3*j], tensor(i,2+j,3,4)); + } + } + } else { + offsets = Eigen::DSizes(1,2,3,0); + extents = Eigen::DSizes(1,1,2,11); + auto slice4 = SliceEvaluator(tensor.slice(offsets, extents), DefaultDevice()); + VERIFY_IS_EQUAL(slice4.dimensions().TotalSize(), 22ul); + for (int l = 0; l < 11; ++l) { + for (int k = 0; k < 2; ++k) { + VERIFY_IS_EQUAL(slice4.data()[l+11*k], tensor(1,2,3+k,l)); + } } } - offsets = Eigen::DSizes(0,0,0,4); - extents = Eigen::DSizes(3,5,7,2); - auto slice5 = SliceEvaluator(tensor.slice(offsets, extents), DefaultDevice()); - VERIFY_IS_EQUAL(slice5.dimensions().TotalSize(), 210ul); - for (int i = 0; i < 3; ++i) { - for (int j = 0; j < 5; ++j) { + if (DataLayout == ColMajor) { + offsets = Eigen::DSizes(0,0,0,4); + extents = Eigen::DSizes(3,5,7,2); + auto slice5 = SliceEvaluator(tensor.slice(offsets, extents), DefaultDevice()); + VERIFY_IS_EQUAL(slice5.dimensions().TotalSize(), 210ul); + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 5; ++j) { + for (int k = 0; k < 7; ++k) { + for (int l = 0; l < 2; ++l) { + int slice_index = i + 3 * (j + 5 * (k + 7 * l)); + VERIFY_IS_EQUAL(slice5.data()[slice_index], tensor(i,j,k,l+4)); + } + } + } + } + } else { + offsets = Eigen::DSizes(1,0,0,0); + extents = Eigen::DSizes(2,5,7,11); + auto slice5 = SliceEvaluator(tensor.slice(offsets, extents), DefaultDevice()); + VERIFY_IS_EQUAL(slice5.dimensions().TotalSize(), 770ul); + for (int l = 0; l < 11; ++l) { for (int k = 0; k < 7; ++k) { - for (int l = 0; l < 2; ++l) { - int slice_index = i + 3 * (j + 5 * (k + 7 * l)); - VERIFY_IS_EQUAL(slice5.data()[slice_index], tensor(i,j,k,l+4)); + for (int j = 0; j < 5; ++j) { + for (int i = 0; i < 2; ++i) { + int slice_index = l + 11 * (k + 7 * (j + 5 * i)); + VERIFY_IS_EQUAL(slice5.data()[slice_index], tensor(i+1,j,k,l)); + } } } } + } offsets = Eigen::DSizes(0,0,0,0); @@ -247,14 +292,38 @@ static void test_slice_raw_data() } +static void test_composition() +{ + Eigen::Tensor matrix(7, 11); + matrix.setRandom(); + + const DSizes newDims{{1, 1, 11}}; + Eigen::Tensor tensor = + matrix.slice(DSizes(2, 0), DSizes(1, 11)).reshape(newDims); + + VERIFY_IS_EQUAL(tensor.dimensions().TotalSize(), 11ul); + VERIFY_IS_EQUAL(tensor.dimension(0), 1); + VERIFY_IS_EQUAL(tensor.dimension(1), 1); + VERIFY_IS_EQUAL(tensor.dimension(2), 11); + for (int i = 0; i < 11; ++i) { + VERIFY_IS_EQUAL(tensor(0,0,i), matrix(2,i)); + } +} + + void test_cxx11_tensor_morphing() { CALL_SUBTEST(test_simple_reshape()); CALL_SUBTEST(test_reshape_in_expr()); CALL_SUBTEST(test_reshape_as_lvalue()); - CALL_SUBTEST(test_simple_slice()); + CALL_SUBTEST(test_simple_slice()); + CALL_SUBTEST(test_simple_slice()); CALL_SUBTEST(test_slice_in_expr()); - CALL_SUBTEST(test_slice_as_lvalue()); - CALL_SUBTEST(test_slice_raw_data()); + CALL_SUBTEST(test_slice_as_lvalue()); + CALL_SUBTEST(test_slice_as_lvalue()); + CALL_SUBTEST(test_slice_raw_data()); + CALL_SUBTEST(test_slice_raw_data()); + + CALL_SUBTEST(test_composition()); } diff --git a/unsupported/test/cxx11_tensor_of_strings.cpp b/unsupported/test/cxx11_tensor_of_strings.cpp index 0ffa341c4..8d05d154e 100644 --- a/unsupported/test/cxx11_tensor_of_strings.cpp +++ b/unsupported/test/cxx11_tensor_of_strings.cpp @@ -8,19 +8,18 @@ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #include "main.h" -#include + #include -using std::string; using Eigen::Tensor; using Eigen::TensorMap; static void test_assign() { - string data1[6]; - TensorMap> mat1(data1, 2, 3); - string data2[6]; - const TensorMap> mat2(data2, 2, 3); + std::string data1[6]; + TensorMap> mat1(data1, 2, 3); + std::string data2[6]; + const TensorMap> mat2(data2, 2, 3); for (int i = 0; i < 6; ++i) { std::ostringstream s1; @@ -31,16 +30,16 @@ static void test_assign() data2[i] = s2.str(); } - Tensor rslt1; + Tensor rslt1; rslt1 = mat1; - Tensor rslt2; + Tensor rslt2; rslt2 = mat2; - Tensor rslt3 = mat1; - Tensor rslt4 = mat2; + Tensor rslt3 = mat1; + Tensor rslt4 = mat2; - Tensor rslt5(mat1); - Tensor rslt6(mat2); + Tensor rslt5(mat1); + Tensor rslt6(mat2); for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { @@ -57,8 +56,8 @@ static void test_assign() static void test_concat() { - Tensor t1(2, 3); - Tensor t2(2, 3); + Tensor t1(2, 3); + Tensor t2(2, 3); for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { @@ -71,7 +70,7 @@ static void test_concat() } } - Tensor result = t1.concatenate(t2, 1); + Tensor result = t1.concatenate(t2, 1); VERIFY_IS_EQUAL(result.dimension(0), 2); VERIFY_IS_EQUAL(result.dimension(1), 6); @@ -86,7 +85,7 @@ static void test_concat() static void test_slices() { - Tensor data(2, 6); + Tensor data(2, 6); for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { std::ostringstream s1; @@ -99,8 +98,8 @@ static void test_slices() const Eigen::DSizes first_half{{0, 0}}; const Eigen::DSizes second_half{{0, 3}}; - Tensor t1 = data.slice(first_half, half_size); - Tensor t2 = data.slice(second_half, half_size); + Tensor t1 = data.slice(first_half, half_size); + Tensor t2 = data.slice(second_half, half_size); for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { @@ -113,8 +112,8 @@ static void test_slices() static void test_additions() { - Tensor data1(3); - Tensor data2(3); + Tensor data1(3); + Tensor data2(3); for (int i = 0; i < 3; ++i) { data1(i) = "abc"; std::ostringstream s1; @@ -122,16 +121,26 @@ static void test_additions() data2(i) = s1.str(); } - Tensor sum = data1 + data2; + Tensor sum = data1 + data2; for (int i = 0; i < 3; ++i) { std::ostringstream concat; concat << "abc" << i; - string expected = concat.str(); + std::string expected = concat.str(); VERIFY_IS_EQUAL(sum(i), expected); } } +static void test_initialization() +{ + Tensor a(2, 3); + a.setConstant(std::string("foo")); + for (int i = 0; i < 2*3; ++i) { + VERIFY_IS_EQUAL(a(i), std::string("foo")); + } +} + + void test_cxx11_tensor_of_strings() { // Beware: none of this is likely to ever work on a GPU. @@ -139,4 +148,5 @@ void test_cxx11_tensor_of_strings() CALL_SUBTEST(test_concat()); CALL_SUBTEST(test_slices()); CALL_SUBTEST(test_additions()); + CALL_SUBTEST(test_initialization()); } diff --git a/unsupported/test/cxx11_tensor_padding.cpp b/unsupported/test/cxx11_tensor_padding.cpp index 6f74216dd..ffa19896e 100644 --- a/unsupported/test/cxx11_tensor_padding.cpp +++ b/unsupported/test/cxx11_tensor_padding.cpp @@ -13,9 +13,10 @@ using Eigen::Tensor; +template static void test_simple_padding() { - Tensor tensor(2,3,5,7); + Tensor tensor(2,3,5,7); tensor.setRandom(); array, 4> paddings; @@ -24,7 +25,7 @@ static void test_simple_padding() paddings[2] = std::make_pair(3, 4); paddings[3] = std::make_pair(0, 0); - Tensor padded; + Tensor padded; padded = tensor.pad(paddings); VERIFY_IS_EQUAL(padded.dimension(0), 2+0); @@ -47,9 +48,10 @@ static void test_simple_padding() } } +template static void test_padded_expr() { - Tensor tensor(2,3,5,7); + Tensor tensor(2,3,5,7); tensor.setRandom(); array, 4> paddings; @@ -62,17 +64,19 @@ static void test_padded_expr() reshape_dims[0] = 12; reshape_dims[1] = 84; - Tensor result; + Tensor result; result = tensor.pad(paddings).reshape(reshape_dims); for (int i = 0; i < 2; ++i) { for (int j = 0; j < 6; ++j) { for (int k = 0; k < 12; ++k) { for (int l = 0; l < 7; ++l) { + const float result_value = DataLayout == ColMajor ? + result(i+2*j,k+12*l) : result(j+6*i,l+7*k); if (j >= 2 && j < 5 && k >= 3 && k < 8) { - VERIFY_IS_EQUAL(result(i+2*j,k+12*l), tensor(i,j-2,k-3,l)); + VERIFY_IS_EQUAL(result_value, tensor(i,j-2,k-3,l)); } else { - VERIFY_IS_EQUAL(result(i+2*j,k+12*l), 0.0f); + VERIFY_IS_EQUAL(result_value, 0.0f); } } } @@ -80,9 +84,10 @@ static void test_padded_expr() } } - void test_cxx11_tensor_padding() { - CALL_SUBTEST(test_simple_padding()); - CALL_SUBTEST(test_padded_expr()); + CALL_SUBTEST(test_simple_padding()); + CALL_SUBTEST(test_simple_padding()); + CALL_SUBTEST(test_padded_expr()); + CALL_SUBTEST(test_padded_expr()); } diff --git a/unsupported/test/cxx11_tensor_patch.cpp b/unsupported/test/cxx11_tensor_patch.cpp index e2ba5bfd8..0ee7b46d4 100644 --- a/unsupported/test/cxx11_tensor_patch.cpp +++ b/unsupported/test/cxx11_tensor_patch.cpp @@ -36,6 +36,23 @@ static void test_simple_patch() VERIFY_IS_EQUAL(tensor.data()[i], no_patch.data()[i]); } + patch_dims[0] = 2; + patch_dims[1] = 3; + patch_dims[2] = 5; + patch_dims[3] = 7; + Tensor single_patch; + single_patch = tensor.extract_patches(patch_dims); + + VERIFY_IS_EQUAL(single_patch.dimension(0), 2); + VERIFY_IS_EQUAL(single_patch.dimension(1), 3); + VERIFY_IS_EQUAL(single_patch.dimension(2), 5); + VERIFY_IS_EQUAL(single_patch.dimension(3), 7); + VERIFY_IS_EQUAL(single_patch.dimension(4), 1); + + for (int i = 0; i < tensor.size(); ++i) { + VERIFY_IS_EQUAL(tensor.data()[i], single_patch.data()[i]); + } + patch_dims[0] = 1; patch_dims[1] = 2; patch_dims[2] = 2; diff --git a/unsupported/test/cxx11_tensor_reduction.cpp b/unsupported/test/cxx11_tensor_reduction.cpp index da9885166..99e19eba4 100644 --- a/unsupported/test/cxx11_tensor_reduction.cpp +++ b/unsupported/test/cxx11_tensor_reduction.cpp @@ -13,15 +13,15 @@ using Eigen::Tensor; -static void test_simple_reductions() -{ - Tensor tensor(2,3,5,7); +template +static void test_simple_reductions() { + Tensor tensor(2, 3, 5, 7); tensor.setRandom(); array reduction_axis; reduction_axis[0] = 1; reduction_axis[1] = 3; - Tensor result = tensor.sum(reduction_axis); + Tensor result = tensor.sum(reduction_axis); VERIFY_IS_EQUAL(result.dimension(0), 2); VERIFY_IS_EQUAL(result.dimension(1), 5); for (int i = 0; i < 2; ++i) { @@ -36,6 +36,53 @@ static void test_simple_reductions() } } + { + Tensor sum1 = tensor.sum(); + VERIFY_IS_EQUAL(sum1.dimension(0), 1); + + array reduction_axis; + reduction_axis[0] = 0; + reduction_axis[1] = 1; + reduction_axis[2] = 2; + reduction_axis[3] = 3; + Tensor sum2 = tensor.sum(reduction_axis); + VERIFY_IS_EQUAL(sum2.dimension(0), 1); + + VERIFY_IS_APPROX(sum1(0), sum2(0)); + } + + reduction_axis[0] = 0; + reduction_axis[1] = 2; + result = tensor.prod(reduction_axis); + VERIFY_IS_EQUAL(result.dimension(0), 3); + VERIFY_IS_EQUAL(result.dimension(1), 7); + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 7; ++j) { + float prod = 1.0f; + for (int k = 0; k < 2; ++k) { + for (int l = 0; l < 5; ++l) { + prod *= tensor(k, i, l, j); + } + } + VERIFY_IS_APPROX(result(i, j), prod); + } + } + + { + Tensor prod1 = tensor.prod(); + VERIFY_IS_EQUAL(prod1.dimension(0), 1); + + array reduction_axis; + reduction_axis[0] = 0; + reduction_axis[1] = 1; + reduction_axis[2] = 2; + reduction_axis[3] = 3; + Tensor prod2 = tensor.prod(reduction_axis); + VERIFY_IS_EQUAL(prod2.dimension(0), 1); + + VERIFY_IS_APPROX(prod1(0), prod2(0)); + } + reduction_axis[0] = 0; reduction_axis[1] = 2; result = tensor.maximum(reduction_axis); @@ -53,6 +100,21 @@ static void test_simple_reductions() } } + { + Tensor max1 = tensor.maximum(); + VERIFY_IS_EQUAL(max1.dimension(0), 1); + + array reduction_axis; + reduction_axis[0] = 0; + reduction_axis[1] = 1; + reduction_axis[2] = 2; + reduction_axis[3] = 3; + Tensor max2 = tensor.maximum(reduction_axis); + VERIFY_IS_EQUAL(max2.dimension(0), 1); + + VERIFY_IS_APPROX(max1(0), max2(0)); + } + reduction_axis[0] = 0; reduction_axis[1] = 1; result = tensor.minimum(reduction_axis); @@ -63,24 +125,72 @@ static void test_simple_reductions() float min_val = (std::numeric_limits::max)(); for (int k = 0; k < 2; ++k) { for (int l = 0; l < 3; ++l) { - min_val = (std::min)(min_val, tensor(k, l, i, j)); + min_val = (std::min)(min_val, tensor(k, l, i, j)); } } VERIFY_IS_APPROX(result(i, j), min_val); } } -} + { + Tensor min1 = tensor.minimum(); + VERIFY_IS_EQUAL(min1.dimension(0), 1); + + array reduction_axis; + reduction_axis[0] = 0; + reduction_axis[1] = 1; + reduction_axis[2] = 2; + reduction_axis[3] = 3; + Tensor min2 = tensor.minimum(reduction_axis); + VERIFY_IS_EQUAL(min2.dimension(0), 1); -static void test_full_reductions() -{ - Tensor tensor(2,3); + VERIFY_IS_APPROX(min1(0), min2(0)); + } + + reduction_axis[0] = 0; + reduction_axis[1] = 1; + result = tensor.mean(reduction_axis); + VERIFY_IS_EQUAL(result.dimension(0), 5); + VERIFY_IS_EQUAL(result.dimension(1), 7); + for (int i = 0; i < 5; ++i) { + for (int j = 0; j < 7; ++j) { + float sum = 0.0f; + int count = 0; + for (int k = 0; k < 2; ++k) { + for (int l = 0; l < 3; ++l) { + sum += tensor(k, l, i, j); + ++count; + } + } + VERIFY_IS_APPROX(result(i, j), sum / count); + } + } + + { + Tensor mean1 = tensor.mean(); + VERIFY_IS_EQUAL(mean1.dimension(0), 1); + + array reduction_axis; + reduction_axis[0] = 0; + reduction_axis[1] = 1; + reduction_axis[2] = 2; + reduction_axis[3] = 3; + Tensor mean2 = tensor.mean(reduction_axis); + VERIFY_IS_EQUAL(mean2.dimension(0), 1); + + VERIFY_IS_APPROX(mean1(0), mean2(0)); + } +} + +template +static void test_full_reductions() { + Tensor tensor(2, 3); tensor.setRandom(); array reduction_axis; reduction_axis[0] = 0; reduction_axis[1] = 1; - Tensor result = tensor.sum(reduction_axis); + Tensor result = tensor.sum(reduction_axis); VERIFY_IS_EQUAL(result.dimension(0), 1); float sum = 0.0f; @@ -103,30 +213,26 @@ static void test_full_reductions() VERIFY_IS_APPROX(result(0), sqrtf(sum)); } - struct UserReducer { - UserReducer(float offset) : offset_(offset), sum_(0.0f) {} - void reduce(const float val) { - sum_ += val * val; - } - float finalize() const { - return 1.0f / (sum_ + offset_); - } + static const bool PacketAccess = false; + UserReducer(float offset) : offset_(offset) {} + void reduce(const float val, float* accum) { *accum += val * val; } + float initialize() const { return 0; } + float finalize(const float accum) const { return 1.0f / (accum + offset_); } private: - float offset_; - float sum_; + const float offset_; }; -static void test_user_defined_reductions() -{ - Tensor tensor(5,7); +template +static void test_user_defined_reductions() { + Tensor tensor(5, 7); tensor.setRandom(); array reduction_axis; reduction_axis[0] = 1; UserReducer reducer(10.0f); - Tensor result = tensor.reduce(reduction_axis, reducer); + Tensor result = tensor.reduce(reduction_axis, reducer); VERIFY_IS_EQUAL(result.dimension(0), 5); for (int i = 0; i < 5; ++i) { float expected = 10.0f; @@ -138,22 +244,24 @@ static void test_user_defined_reductions() } } - -static void test_tensor_maps() -{ - int inputs[2*3*5*7]; - TensorMap > tensor_map(inputs, 2,3,5,7); - TensorMap > tensor_map_const(inputs, 2,3,5,7); - const TensorMap > tensor_map_const_const(inputs, 2,3,5,7); +template +static void test_tensor_maps() { + int inputs[2 * 3 * 5 * 7]; + TensorMap > tensor_map(inputs, 2, 3, 5, 7); + TensorMap > tensor_map_const(inputs, 2, 3, 5, + 7); + const TensorMap > tensor_map_const_const( + inputs, 2, 3, 5, 7); tensor_map.setRandom(); array reduction_axis; reduction_axis[0] = 1; reduction_axis[1] = 3; - Tensor result = tensor_map.sum(reduction_axis); - Tensor result2 = tensor_map_const.sum(reduction_axis); - Tensor result3 = tensor_map_const_const.sum(reduction_axis); + Tensor result = tensor_map.sum(reduction_axis); + Tensor result2 = tensor_map_const.sum(reduction_axis); + Tensor result3 = + tensor_map_const_const.sum(reduction_axis); for (int i = 0; i < 2; ++i) { for (int j = 0; j < 5; ++j) { @@ -170,11 +278,110 @@ static void test_tensor_maps() } } +template +static void test_static_dims() { + Tensor in(72, 53, 97, 113); + Tensor out(72, 97); + in.setRandom(); + +#if __cplusplus <= 199711L + array reduction_axis; + reduction_axis[0] = 1; + reduction_axis[1] = 3; +#else + Eigen::IndexList, Eigen::type2index<3> > reduction_axis; +#endif + + out = in.maximum(reduction_axis); + + for (int i = 0; i < 72; ++i) { + for (int j = 0; j < 97; ++j) { + float expected = -1e10f; + for (int k = 0; k < 53; ++k) { + for (int l = 0; l < 113; ++l) { + expected = (std::max)(expected, in(i, k, j, l)); + } + } + VERIFY_IS_APPROX(out(i, j), expected); + } + } +} + +template +static void test_innermost_last_dims() { + Tensor in(72, 53, 97, 113); + Tensor out(97, 113); + in.setRandom(); + +// Reduce on the innermost dimensions. +#if __cplusplus <= 199711L + array reduction_axis; + reduction_axis[0] = 0; + reduction_axis[1] = 1; +#else + // This triggers the use of packets for ColMajor. + Eigen::IndexList, Eigen::type2index<1> > reduction_axis; +#endif + + out = in.maximum(reduction_axis); + + for (int i = 0; i < 97; ++i) { + for (int j = 0; j < 113; ++j) { + float expected = -1e10f; + for (int k = 0; k < 53; ++k) { + for (int l = 0; l < 72; ++l) { + expected = (std::max)(expected, in(l, k, i, j)); + } + } + VERIFY_IS_APPROX(out(i, j), expected); + } + } +} + +template +static void test_innermost_first_dims() { + Tensor in(72, 53, 97, 113); + Tensor out(72, 53); + in.setRandom(); + +// Reduce on the innermost dimensions. +#if __cplusplus <= 199711L + array reduction_axis; + reduction_axis[0] = 2; + reduction_axis[1] = 3; +#else + // This triggers the use of packets for RowMajor. + Eigen::IndexList, Eigen::type2index<3>> reduction_axis; +#endif + + out = in.maximum(reduction_axis); + + for (int i = 0; i < 72; ++i) { + for (int j = 0; j < 53; ++j) { + float expected = -1e10f; + for (int k = 0; k < 97; ++k) { + for (int l = 0; l < 113; ++l) { + expected = (std::max)(expected, in(i, j, k, l)); + } + } + VERIFY_IS_APPROX(out(i, j), expected); + } + } +} -void test_cxx11_tensor_reduction() -{ - CALL_SUBTEST(test_simple_reductions()); - CALL_SUBTEST(test_full_reductions()); - CALL_SUBTEST(test_user_defined_reductions()); - CALL_SUBTEST(test_tensor_maps()); +void test_cxx11_tensor_reduction() { + CALL_SUBTEST(test_simple_reductions()); + CALL_SUBTEST(test_simple_reductions()); + CALL_SUBTEST(test_full_reductions()); + CALL_SUBTEST(test_full_reductions()); + CALL_SUBTEST(test_user_defined_reductions()); + CALL_SUBTEST(test_user_defined_reductions()); + CALL_SUBTEST(test_tensor_maps()); + CALL_SUBTEST(test_tensor_maps()); + CALL_SUBTEST(test_static_dims()); + CALL_SUBTEST(test_static_dims()); + CALL_SUBTEST(test_innermost_last_dims()); + CALL_SUBTEST(test_innermost_last_dims()); + CALL_SUBTEST(test_innermost_first_dims()); + CALL_SUBTEST(test_innermost_first_dims()); } diff --git a/unsupported/test/cxx11_tensor_shuffling.cpp b/unsupported/test/cxx11_tensor_shuffling.cpp index 39c623499..ec623e1f9 100644 --- a/unsupported/test/cxx11_tensor_shuffling.cpp +++ b/unsupported/test/cxx11_tensor_shuffling.cpp @@ -14,9 +14,10 @@ using Eigen::Tensor; using Eigen::array; +template static void test_simple_shuffling() { - Tensor tensor(2,3,5,7); + Tensor tensor(2,3,5,7); tensor.setRandom(); array shuffles; shuffles[0] = 0; @@ -24,7 +25,7 @@ static void test_simple_shuffling() shuffles[2] = 2; shuffles[3] = 3; - Tensor no_shuffle; + Tensor no_shuffle; no_shuffle = tensor.shuffle(shuffles); VERIFY_IS_EQUAL(no_shuffle.dimension(0), 2); @@ -46,7 +47,7 @@ static void test_simple_shuffling() shuffles[1] = 3; shuffles[2] = 1; shuffles[3] = 0; - Tensor shuffle; + Tensor shuffle; shuffle = tensor.shuffle(shuffles); VERIFY_IS_EQUAL(shuffle.dimension(0), 5); @@ -66,9 +67,10 @@ static void test_simple_shuffling() } +template static void test_expr_shuffling() { - Tensor tensor(2,3,5,7); + Tensor tensor(2,3,5,7); tensor.setRandom(); array shuffles; @@ -76,10 +78,10 @@ static void test_expr_shuffling() shuffles[1] = 3; shuffles[2] = 1; shuffles[3] = 0; - Tensor expected; + Tensor expected; expected = tensor.shuffle(shuffles); - Tensor result(5,7,3,2); + Tensor result(5,7,3,2); array src_slice_dim{{2,3,1,7}}; array src_slice_start{{0,0,0,0}}; @@ -128,16 +130,17 @@ static void test_expr_shuffling() } +template static void test_shuffling_as_value() { - Tensor tensor(2,3,5,7); + Tensor tensor(2,3,5,7); tensor.setRandom(); array shuffles; shuffles[2] = 0; shuffles[3] = 1; shuffles[1] = 2; shuffles[0] = 3; - Tensor shuffle(5,7,3,2); + Tensor shuffle(5,7,3,2); shuffle.shuffle(shuffles) = tensor; VERIFY_IS_EQUAL(shuffle.dimension(0), 5); @@ -158,7 +161,10 @@ static void test_shuffling_as_value() void test_cxx11_tensor_shuffling() { - CALL_SUBTEST(test_simple_shuffling()); - CALL_SUBTEST(test_expr_shuffling()); - CALL_SUBTEST(test_shuffling_as_value()); + CALL_SUBTEST(test_simple_shuffling()); + CALL_SUBTEST(test_simple_shuffling()); + CALL_SUBTEST(test_expr_shuffling()); + CALL_SUBTEST(test_expr_shuffling()); + CALL_SUBTEST(test_shuffling_as_value()); + CALL_SUBTEST(test_shuffling_as_value()); } diff --git a/unsupported/test/cxx11_tensor_simple.cpp b/unsupported/test/cxx11_tensor_simple.cpp index a70591c82..23855fca0 100644 --- a/unsupported/test/cxx11_tensor_simple.cpp +++ b/unsupported/test/cxx11_tensor_simple.cpp @@ -32,6 +32,7 @@ static void test_1d() vec1(5) = 42; vec2(5) = 5; vec3(5) = 0; vec4.setZero(); + VERIFY_IS_EQUAL((vec1.rank()), 1); VERIFY_IS_EQUAL((vec1.size()), 6); VERIFY_IS_EQUAL((vec1.dimensions()[0]), 6); @@ -99,10 +100,12 @@ static void test_2d() mat2(1,1) = 4; mat2(1,2) = 5; + VERIFY_IS_EQUAL((mat1.rank()), 2); VERIFY_IS_EQUAL((mat1.size()), 6); VERIFY_IS_EQUAL((mat1.dimensions()[0]), 2); VERIFY_IS_EQUAL((mat1.dimensions()[1]), 3); + VERIFY_IS_EQUAL((mat2.rank()), 2); VERIFY_IS_EQUAL((mat2.size()), 6); VERIFY_IS_EQUAL((mat2.dimensions()[0]), 2); VERIFY_IS_EQUAL((mat2.dimensions()[1]), 3); diff --git a/unsupported/test/cxx11_tensor_striding.cpp b/unsupported/test/cxx11_tensor_striding.cpp index 502569d1d..1feb39dca 100644 --- a/unsupported/test/cxx11_tensor_striding.cpp +++ b/unsupported/test/cxx11_tensor_striding.cpp @@ -13,9 +13,10 @@ using Eigen::Tensor; +template static void test_simple_striding() { - Tensor tensor(2,3,5,7); + Tensor tensor(2,3,5,7); tensor.setRandom(); array strides; strides[0] = 1; @@ -23,7 +24,7 @@ static void test_simple_striding() strides[2] = 1; strides[3] = 1; - Tensor no_stride; + Tensor no_stride; no_stride = tensor.stride(strides); VERIFY_IS_EQUAL(no_stride.dimension(0), 2); @@ -45,7 +46,7 @@ static void test_simple_striding() strides[1] = 4; strides[2] = 2; strides[3] = 3; - Tensor stride; + Tensor stride; stride = tensor.stride(strides); VERIFY_IS_EQUAL(stride.dimension(0), 1); @@ -65,7 +66,36 @@ static void test_simple_striding() } +template +static void test_striding_as_lvalue() +{ + Tensor tensor(2,3,5,7); + tensor.setRandom(); + array strides; + strides[0] = 2; + strides[1] = 4; + strides[2] = 2; + strides[3] = 3; + + Tensor result(3, 12, 10, 21); + result.stride(strides) = tensor; + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + VERIFY_IS_EQUAL(tensor(i,j,k,l), result(2*i,4*j,2*k,3*l)); + } + } + } + } +} + + void test_cxx11_tensor_striding() { - CALL_SUBTEST(test_simple_striding()); + CALL_SUBTEST(test_simple_striding()); + CALL_SUBTEST(test_simple_striding()); + CALL_SUBTEST(test_striding_as_lvalue()); + CALL_SUBTEST(test_striding_as_lvalue()); } diff --git a/unsupported/test/cxx11_tensor_thread_pool.cpp b/unsupported/test/cxx11_tensor_thread_pool.cpp index f0de61f8b..e25912279 100644 --- a/unsupported/test/cxx11_tensor_thread_pool.cpp +++ b/unsupported/test/cxx11_tensor_thread_pool.cpp @@ -9,11 +9,11 @@ #define EIGEN_USE_THREADS -#include + #include "main.h" +#include #include - using Eigen::Tensor; static void test_multithread_elementwise() @@ -60,12 +60,12 @@ static void test_multithread_compound_assignment() } } - +template static void test_multithread_contraction() { - Tensor t_left(30, 50, 37, 31); - Tensor t_right(37, 31, 70, 2, 10); - Tensor t_result(30, 50, 70, 2, 10); + Tensor t_left(30, 50, 37, 31); + Tensor t_right(37, 31, 70, 2, 10); + Tensor t_result(30, 50, 70, 2, 10); t_left.setRandom(); t_right.setRandom(); @@ -74,11 +74,10 @@ static void test_multithread_contraction() typedef Tensor::DimensionPair DimPair; Eigen::array dims({{DimPair(2, 0), DimPair(3, 1)}}); - - typedef Map MapXf; + typedef Map> MapXf; MapXf m_left(t_left.data(), 1500, 1147); MapXf m_right(t_right.data(), 1147, 1400); - MatrixXf m_result(1500, 1400); + Matrix m_result(1500, 1400); Eigen::ThreadPoolDevice thread_pool_device(4); @@ -95,12 +94,12 @@ static void test_multithread_contraction() } } - +template static void test_contraction_corner_cases() { - Tensor t_left(32, 500); - Tensor t_right(32, 28*28); - Tensor t_result(500, 28*28); + Tensor t_left(32, 500); + Tensor t_right(32, 28*28); + Tensor t_result(500, 28*28); t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f; t_right = (t_right.constant(-0.6f) + t_right.random()) * 2.0f; @@ -110,10 +109,10 @@ static void test_contraction_corner_cases() typedef Tensor::DimensionPair DimPair; Eigen::array dims{{DimPair(0, 0)}}; - typedef Map MapXf; + typedef Map> MapXf; MapXf m_left(t_left.data(), 32, 500); MapXf m_right(t_right.data(), 32, 28*28); - MatrixXf m_result(500, 28*28); + Matrix m_result(500, 28*28); Eigen::ThreadPoolDevice thread_pool_device(12); @@ -181,18 +180,18 @@ static void test_contraction_corner_cases() } } - +template static void test_multithread_contraction_agrees_with_singlethread() { int contract_size = internal::random(1, 5000); - Tensor left(internal::random(1, 80), - contract_size, - internal::random(1, 100)); + Tensor left(internal::random(1, 80), + contract_size, + internal::random(1, 100)); - Tensor right(internal::random(1, 25), - internal::random(1, 37), - contract_size, - internal::random(1, 51)); + Tensor right(internal::random(1, 25), + internal::random(1, 37), + contract_size, + internal::random(1, 51)); left.setRandom(); right.setRandom(); @@ -206,13 +205,13 @@ static void test_multithread_contraction_agrees_with_singlethread() { Eigen::ThreadPoolDevice thread_pool_device(internal::random(2, 11)); - Tensor st_result; + Tensor st_result; st_result = left.contract(right, dims); - Tensor tp_result(st_result.dimensions()); + Tensor tp_result(st_result.dimensions()); tp_result.device(thread_pool_device) = left.contract(right, dims); - VERIFY(internal::dimensions_match(st_result.dimensions(), tp_result.dimensions())); + VERIFY(dimensions_match(st_result.dimensions(), tp_result.dimensions())); for (ptrdiff_t i = 0; i < st_result.size(); i++) { // if both of the values are very small, then do nothing (because the test will fail // due to numerical precision issues when values are small) @@ -241,17 +240,30 @@ static void test_memcpy() { } +static void test_multithread_random() +{ + Eigen::ThreadPoolDevice device(2); + Tensor t(1 << 20); + t.device(device) = t.random>(); +} + + void test_cxx11_tensor_thread_pool() { CALL_SUBTEST(test_multithread_elementwise()); CALL_SUBTEST(test_multithread_compound_assignment()); - CALL_SUBTEST(test_multithread_contraction()); + CALL_SUBTEST(test_multithread_contraction()); + CALL_SUBTEST(test_multithread_contraction()); - CALL_SUBTEST(test_multithread_contraction_agrees_with_singlethread()); + CALL_SUBTEST(test_multithread_contraction_agrees_with_singlethread()); + CALL_SUBTEST(test_multithread_contraction_agrees_with_singlethread()); // Exercise various cases that have been problematic in the past. - CALL_SUBTEST(test_contraction_corner_cases()); + CALL_SUBTEST(test_contraction_corner_cases()); + CALL_SUBTEST(test_contraction_corner_cases()); CALL_SUBTEST(test_memcpy()); + + CALL_SUBTEST(test_multithread_random()); } -- cgit v1.2.3 From c03c73c9b7032f984bcd6c52d9ca3a430ce19c69 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 6 Feb 2015 14:26:12 +0100 Subject: Fix clang compilation --- unsupported/test/cxx11_tensor_thread_pool.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'unsupported/test/cxx11_tensor_thread_pool.cpp') diff --git a/unsupported/test/cxx11_tensor_thread_pool.cpp b/unsupported/test/cxx11_tensor_thread_pool.cpp index e25912279..f49523683 100644 --- a/unsupported/test/cxx11_tensor_thread_pool.cpp +++ b/unsupported/test/cxx11_tensor_thread_pool.cpp @@ -15,6 +15,7 @@ #include using Eigen::Tensor; +using std::isnan; static void test_multithread_elementwise() { -- cgit v1.2.3 From 4716c2c6666eb7018dac2e2ed050ead45c8933e1 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 10 Feb 2015 12:06:19 -0800 Subject: Fixed compilation error --- unsupported/test/cxx11_tensor_thread_pool.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'unsupported/test/cxx11_tensor_thread_pool.cpp') diff --git a/unsupported/test/cxx11_tensor_thread_pool.cpp b/unsupported/test/cxx11_tensor_thread_pool.cpp index f49523683..6fe65c7f9 100644 --- a/unsupported/test/cxx11_tensor_thread_pool.cpp +++ b/unsupported/test/cxx11_tensor_thread_pool.cpp @@ -15,7 +15,7 @@ #include using Eigen::Tensor; -using std::isnan; + static void test_multithread_elementwise() { @@ -122,7 +122,7 @@ static void test_contraction_corner_cases() m_result = m_left.transpose() * m_right; for (ptrdiff_t i = 0; i < t_result.size(); i++) { - assert(!isnan(t_result.data()[i])); + assert(!std::isnan(t_result.data()[i])); if (fabs(t_result.data()[i] - m_result.data()[i]) >= 1e-4) { std::cout << "mismatch detected at index " << i << " : " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl; assert(false); @@ -137,7 +137,7 @@ static void test_contraction_corner_cases() new(&m_left) MapXf(t_left.data(), 32, 1); m_result = m_left.transpose() * m_right; for (ptrdiff_t i = 0; i < t_result.size(); i++) { - assert(!isnan(t_result.data()[i])); + assert(!std::isnan(t_result.data()[i])); if (fabs(t_result.data()[i] - m_result.data()[i]) >= 1e-4) { std::cout << "mismatch detected: " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl; assert(false); @@ -155,7 +155,7 @@ static void test_contraction_corner_cases() new(&m_right) MapXf(t_right.data(), 32, 4); m_result = m_left.transpose() * m_right; for (ptrdiff_t i = 0; i < t_result.size(); i++) { - assert(!isnan(t_result.data()[i])); + assert(!std::isnan(t_result.data()[i])); if (fabs(t_result.data()[i] - m_result.data()[i]) >= 1e-4) { std::cout << "mismatch detected: " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl; assert(false); @@ -173,7 +173,7 @@ static void test_contraction_corner_cases() new(&m_right) MapXf(t_right.data(), 32, 4); m_result = m_left.transpose() * m_right; for (ptrdiff_t i = 0; i < t_result.size(); i++) { - assert(!isnan(t_result.data()[i])); + assert(!std::isnan(t_result.data()[i])); if (fabs(t_result.data()[i] - m_result.data()[i]) >= 1e-4) { std::cout << "mismatch detected: " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl; assert(false); -- cgit v1.2.3