aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/test
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2014-05-16 15:08:05 -0700
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2014-05-16 15:08:05 -0700
commit7402fea0a8e63e3ea248257047c584afee8f8bde (patch)
tree429aee7ea314c579ed62c1c5e1ff84850b14370a /unsupported/test
parent0320f7e3a71406b9a03d1bab0d168fd76e63d457 (diff)
Vectorized the evaluation of tensor expression (using SSE, AVX, NEON, ...)
Added the ability to parallelize the evaluation of a tensor expression over multiple cpu cores. Added the ability to offload the evaluation of a tensor expression to a GPU.
Diffstat (limited to 'unsupported/test')
-rw-r--r--unsupported/test/CMakeLists.txt3
-rw-r--r--unsupported/test/cxx11_tensor_device.cpp126
-rw-r--r--unsupported/test/cxx11_tensor_fixed_size.cpp28
-rw-r--r--unsupported/test/cxx11_tensor_thread_pool.cpp37
4 files changed, 194 insertions, 0 deletions
diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt
index 31583d3ca..abc3375e5 100644
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@@ -104,4 +104,7 @@ if(EIGEN_TEST_CXX11)
ei_add_test(cxx11_tensor_assign "-std=c++0x")
ei_add_test(cxx11_tensor_expr "-std=c++0x")
ei_add_test(cxx11_tensor_map "-std=c++0x")
+ ei_add_test(cxx11_tensor_device "-std=c++0x")
+# ei_add_test(cxx11_tensor_fixed_size "-std=c++0x")
+ ei_add_test(cxx11_tensor_thread_pool "-std=c++0x")
endif()
diff --git a/unsupported/test/cxx11_tensor_device.cpp b/unsupported/test/cxx11_tensor_device.cpp
new file mode 100644
index 000000000..9eb1d0420
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_device.cpp
@@ -0,0 +1,126 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_NO_COMPLEX
+#define EIGEN_TEST_FUNC cxx11_tensor_device
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
+#define EIGEN_USE_GPU
+
+
+#include "main.h"
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+using Eigen::RowMajor;
+
+// Context for evaluation on cpu
+struct CPUContext {
+ CPUContext(const Eigen::Tensor<float, 3>& in1, Eigen::Tensor<float, 3>& in2, Eigen::Tensor<float, 3>& out) : in1_(in1), in2_(in2), out_(out) { }
+
+ const Eigen::Tensor<float, 3>& in1() const { return in1_; }
+ const Eigen::Tensor<float, 3>& in2() const { return in2_; }
+ Eigen::TensorDevice<Eigen::Tensor<float, 3>, Eigen::DefaultDevice> out() { return TensorDevice<Eigen::Tensor<float, 3>, Eigen::DefaultDevice>(cpu_device_, out_); }
+
+ private:
+ const Eigen::Tensor<float, 3>& in1_;
+ const Eigen::Tensor<float, 3>& in2_;
+ Eigen::Tensor<float, 3>& out_;
+
+ Eigen::DefaultDevice cpu_device_;
+};
+
+
+// Context for evaluation on GPU
+struct GPUContext {
+ GPUContext(const Eigen::TensorMap<Eigen::Tensor<float, 3> >& in1, Eigen::TensorMap<Eigen::Tensor<float, 3> >& in2, Eigen::TensorMap<Eigen::Tensor<float, 3> >& out) : in1_(in1), in2_(in2), out_(out) { }
+
+ const Eigen::TensorMap<Eigen::Tensor<float, 3> >& in1() const { return in1_; }
+ const Eigen::TensorMap<Eigen::Tensor<float, 3> >& in2() const { return in2_; }
+ Eigen::TensorDevice<Eigen::TensorMap<Eigen::Tensor<float, 3> >, Eigen::GpuDevice> out() { return TensorDevice<Eigen::TensorMap<Eigen::Tensor<float, 3> >, Eigen::GpuDevice>(gpu_device_, out_); }
+
+ private:
+ const Eigen::TensorMap<Eigen::Tensor<float, 3> >& in1_;
+ const Eigen::TensorMap<Eigen::Tensor<float, 3> >& in2_;
+ Eigen::TensorMap<Eigen::Tensor<float, 3> >& out_;
+ Eigen::GpuDevice gpu_device_;
+};
+
+
+// The actual expression to evaluate
+template <typename Context>
+static void test_contextual_eval(Context* context)
+{
+ context->out() = context->in1() + context->in2() * 3.14f;
+}
+
+static void test_cpu() {
+ Eigen::Tensor<float, 3> in1(Eigen::array<int, 3>(2,3,7));
+ Eigen::Tensor<float, 3> in2(Eigen::array<int, 3>(2,3,7));
+ Eigen::Tensor<float, 3> out(Eigen::array<int, 3>(2,3,7));
+
+ in1.setRandom();
+ in2.setRandom();
+ CPUContext context(in1, in2, out);
+ test_contextual_eval(&context);
+
+ for (int i = 0; i < 2; ++i) {
+ for (int j = 0; j < 3; ++j) {
+ for (int k = 0; k < 7; ++k) {
+ VERIFY_IS_APPROX(out(Eigen::array<int, 3>(i,j,k)), in1(Eigen::array<int, 3>(i,j,k)) + in2(Eigen::array<int, 3>(i,j,k)) * 3.14f);
+ }
+ }
+ }
+}
+
+static void test_gpu() {
+ Eigen::Tensor<float, 3> in1(Eigen::array<int, 3>(2,3,7));
+ Eigen::Tensor<float, 3> in2(Eigen::array<int, 3>(2,3,7));
+ Eigen::Tensor<float, 3> out(Eigen::array<int, 3>(2,3,7));
+ in1.setRandom();
+ in2.setRandom();
+
+ std::size_t in1_bytes = in1.size() * sizeof(float);
+ std::size_t in2_bytes = in2.size() * sizeof(float);
+ std::size_t out_bytes = out.size() * sizeof(float);
+
+ float* d_in1;
+ float* d_in2;
+ float* d_out;
+ cudaMalloc((void**)(&d_in1), in1_bytes);
+ cudaMalloc((void**)(&d_in2), in2_bytes);
+ cudaMalloc((void**)(&d_out), out_bytes);
+
+ cudaMemcpy(d_in1, in1.data(), in1_bytes, cudaMemcpyHostToDevice);
+ cudaMemcpy(d_in2, in2.data(), in2_bytes, cudaMemcpyHostToDevice);
+
+ Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in1(d_in1, Eigen::array<int, 3>(2,3,7));
+ Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in2(d_in2, Eigen::array<int, 3>(2,3,7));
+ Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_out(d_out, Eigen::array<int, 3>(2,3,7));
+
+ GPUContext context(gpu_in1, gpu_in2, gpu_out);
+ test_contextual_eval(&context);
+
+ cudaMemcpy(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost);
+ for (int i = 0; i < 2; ++i) {
+ for (int j = 0; j < 3; ++j) {
+ for (int k = 0; k < 7; ++k) {
+ VERIFY_IS_APPROX(out(Eigen::array<int, 3>(i,j,k)), in1(Eigen::array<int, 3>(i,j,k)) + in2(Eigen::array<int, 3>(i,j,k)) * 3.14f);
+ }
+ }
+ }
+}
+
+
+
+void test_cxx11_tensor_device()
+{
+ CALL_SUBTEST(test_cpu());
+ CALL_SUBTEST(test_gpu());
+}
diff --git a/unsupported/test/cxx11_tensor_fixed_size.cpp b/unsupported/test/cxx11_tensor_fixed_size.cpp
index c1d74d881..214f6951d 100644
--- a/unsupported/test/cxx11_tensor_fixed_size.cpp
+++ b/unsupported/test/cxx11_tensor_fixed_size.cpp
@@ -159,9 +159,37 @@ static void test_3d()
}
+static void test_array()
+{
+ TensorFixedSize<float, Sizes<2, 3, 7> > mat1;
+ float val = 0.0;
+ for (int i = 0; i < 2; ++i) {
+ for (int j = 0; j < 3; ++j) {
+ for (int k = 0; k < 7; ++k) {
+ mat1(array<ptrdiff_t, 3>(i,j,k)) = val;
+ val += 1.0;
+ }
+ }
+ }
+
+ TensorFixedSize<float, Sizes<2, 3, 7> > mat3;
+ mat3 = mat1.cwisePow(3.5f);
+
+ val = 0.0;
+ for (int i = 0; i < 2; ++i) {
+ for (int j = 0; j < 3; ++j) {
+ for (int k = 0; k < 7; ++k) {
+ VERIFY_IS_APPROX(mat3(array<ptrdiff_t, 3>(i,j,k)), powf(val, 3.5f));
+ val += 1.0;
+ }
+ }
+ }
+}
+
void test_cxx11_tensor_fixed_size()
{
CALL_SUBTEST(test_1d());
CALL_SUBTEST(test_2d());
CALL_SUBTEST(test_3d());
+ CALL_SUBTEST(test_array());
}
diff --git a/unsupported/test/cxx11_tensor_thread_pool.cpp b/unsupported/test/cxx11_tensor_thread_pool.cpp
new file mode 100644
index 000000000..c9de71da3
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_thread_pool.cpp
@@ -0,0 +1,37 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_USE_THREADS
+
+
+#include "main.h"
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+
+void test_cxx11_tensor_thread_pool()
+{
+ Eigen::Tensor<float, 3> in1(Eigen::array<ptrdiff_t, 3>(2,3,7));
+ Eigen::Tensor<float, 3> in2(Eigen::array<ptrdiff_t, 3>(2,3,7));
+ Eigen::Tensor<float, 3> out(Eigen::array<ptrdiff_t, 3>(2,3,7));
+
+ in1.setRandom();
+ in2.setRandom();
+
+ Eigen::ThreadPoolDevice thread_pool_device(3);
+ out.device(thread_pool_device) = in1 + in2 * 3.14;
+
+ for (int i = 0; i < 2; ++i) {
+ for (int j = 0; j < 3; ++j) {
+ for (int k = 0; k < 7; ++k) {
+ VERIFY_IS_APPROX(out(Eigen::array<ptrdiff_t, 3>(i,j,k)), in1(Eigen::array<ptrdiff_t, 3>(i,j,k)) + in2(Eigen::array<ptrdiff_t, 3>(i,j,k)) * 3.14f);
+ }
+ }
+ }
+}