diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2014-09-04 20:27:28 -0700 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2014-09-04 20:27:28 -0700 |
commit | 1abe4ed14c0012d85e833c5f507f282cf26edc36 (patch) | |
tree | c9d8e8fc6f6fdcba6d3101a2e3baf5634ebffd8c /unsupported/test/cxx11_tensor_device.cpp | |
parent | d43f737b4ad52e84a3b4d954d9bfb4c40cf9e819 (diff) |
Created more regression tests
Diffstat (limited to 'unsupported/test/cxx11_tensor_device.cpp')
-rw-r--r-- | unsupported/test/cxx11_tensor_device.cpp | 279 |
1 files changed, 244 insertions, 35 deletions
diff --git a/unsupported/test/cxx11_tensor_device.cpp b/unsupported/test/cxx11_tensor_device.cpp index caf2e9735..f331cb481 100644 --- a/unsupported/test/cxx11_tensor_device.cpp +++ b/unsupported/test/cxx11_tensor_device.cpp @@ -22,17 +22,43 @@ using Eigen::RowMajor; // Context for evaluation on cpu struct CPUContext { - CPUContext(const Eigen::Tensor<float, 3>& in1, Eigen::Tensor<float, 3>& in2, Eigen::Tensor<float, 3>& out) : in1_(in1), in2_(in2), out_(out) { } + CPUContext(const Eigen::Tensor<float, 3>& in1, Eigen::Tensor<float, 3>& in2, Eigen::Tensor<float, 3>& out) : in1_(in1), in2_(in2), out_(out), kernel_1d_(2), kernel_2d_(Eigen::array<int, 2>(2,2)), kernel_3d_(Eigen::array<int, 3>(2,2,2)) { + kernel_1d_(0) = 3.14f; + kernel_1d_(1) = 2.7f; + + kernel_2d_(Eigen::array<int, 2>(0,0)) = 3.14f; + kernel_2d_(Eigen::array<int, 2>(1,0)) = 2.7f; + kernel_2d_(Eigen::array<int, 2>(0,1)) = 0.2f; + kernel_2d_(Eigen::array<int, 2>(1,1)) = 7.0f; + + kernel_3d_(Eigen::array<int, 3>(0,0,0)) = 3.14f; + kernel_3d_(Eigen::array<int, 3>(0,1,0)) = 2.7f; + kernel_3d_(Eigen::array<int, 3>(0,0,1)) = 0.2f; + kernel_3d_(Eigen::array<int, 3>(0,1,1)) = 7.0f; + kernel_3d_(Eigen::array<int, 3>(1,0,0)) = -1.0f; + kernel_3d_(Eigen::array<int, 3>(1,1,0)) = -0.3f; + kernel_3d_(Eigen::array<int, 3>(1,0,1)) = -0.7f; + kernel_3d_(Eigen::array<int, 3>(1,1,1)) = -0.5f; + } + + const Eigen::DefaultDevice& device() const { return cpu_device_; } const Eigen::Tensor<float, 3>& in1() const { return in1_; } const Eigen::Tensor<float, 3>& in2() const { return in2_; } - Eigen::TensorDevice<Eigen::Tensor<float, 3>, Eigen::DefaultDevice> out() { return TensorDevice<Eigen::Tensor<float, 3>, Eigen::DefaultDevice>(cpu_device_, out_); } + Eigen::Tensor<float, 3>& out() { return out_; } + const Eigen::Tensor<float, 1>& kernel1d() const { return kernel_1d_; } + const Eigen::Tensor<float, 2>& kernel2d() const { return kernel_2d_; } + const Eigen::Tensor<float, 3>& kernel3d() const { return kernel_3d_; } private: const Eigen::Tensor<float, 3>& in1_; const Eigen::Tensor<float, 3>& in2_; Eigen::Tensor<float, 3>& out_; + Eigen::Tensor<float, 1> kernel_1d_; + Eigen::Tensor<float, 2> kernel_2d_; + Eigen::Tensor<float, 3> kernel_3d_; + Eigen::DefaultDevice cpu_device_; }; @@ -40,19 +66,45 @@ struct CPUContext { // Context for evaluation on GPU struct GPUContext { GPUContext(const Eigen::TensorMap<Eigen::Tensor<float, 3> >& in1, Eigen::TensorMap<Eigen::Tensor<float, 3> >& in2, Eigen::TensorMap<Eigen::Tensor<float, 3> >& out) : in1_(in1), in2_(in2), out_(out), gpu_device_(&stream_) { - cudaStreamCreate(&stream_); + assert(cudaMalloc((void**)(&kernel_1d_), 2*sizeof(float)) == cudaSuccess); + float kernel_1d_val[] = {3.14f, 2.7f}; + assert(cudaMemcpy(kernel_1d_, kernel_1d_val, 2*sizeof(float), cudaMemcpyHostToDevice) == cudaSuccess); + + assert(cudaMalloc((void**)(&kernel_2d_), 4*sizeof(float)) == cudaSuccess); + float kernel_2d_val[] = {3.14f, 2.7f, 0.2f, 7.0f}; + assert(cudaMemcpy(kernel_2d_, kernel_2d_val, 4*sizeof(float), cudaMemcpyHostToDevice) == cudaSuccess); + + assert(cudaMalloc((void**)(&kernel_3d_), 8*sizeof(float)) == cudaSuccess); + float kernel_3d_val[] = {3.14f, -1.0f, 2.7f, -0.3f, 0.2f, -0.7f, 7.0f, -0.5f}; + assert(cudaMemcpy(kernel_3d_, kernel_3d_val, 8*sizeof(float), cudaMemcpyHostToDevice) == cudaSuccess); + + assert(cudaStreamCreate(&stream_) == cudaSuccess); } ~GPUContext() { - cudaStreamDestroy(stream_); + assert(cudaFree(kernel_1d_) == cudaSuccess); + assert(cudaFree(kernel_2d_) == cudaSuccess); + assert(cudaFree(kernel_3d_) == cudaSuccess); + assert(cudaStreamDestroy(stream_) == cudaSuccess); } + + const Eigen::GpuDevice& device() const { return gpu_device_; } + const Eigen::TensorMap<Eigen::Tensor<float, 3> >& in1() const { return in1_; } const Eigen::TensorMap<Eigen::Tensor<float, 3> >& in2() const { return in2_; } - Eigen::TensorDevice<Eigen::TensorMap<Eigen::Tensor<float, 3> >, Eigen::GpuDevice> out() { return TensorDevice<Eigen::TensorMap<Eigen::Tensor<float, 3> >, Eigen::GpuDevice>(gpu_device_, out_); } + Eigen::TensorMap<Eigen::Tensor<float, 3> >& out() { return out_; } + Eigen::TensorMap<Eigen::Tensor<float, 1> > kernel1d() const { return Eigen::TensorMap<Eigen::Tensor<float, 1> >(kernel_1d_, 2); } + Eigen::TensorMap<Eigen::Tensor<float, 2> > kernel2d() const { return Eigen::TensorMap<Eigen::Tensor<float, 2> >(kernel_2d_, Eigen::array<int, 2>(2, 2)); } + Eigen::TensorMap<Eigen::Tensor<float, 3> > kernel3d() const { return Eigen::TensorMap<Eigen::Tensor<float, 3> >(kernel_3d_, Eigen::array<int, 3>(2, 2, 2)); } private: const Eigen::TensorMap<Eigen::Tensor<float, 3> >& in1_; const Eigen::TensorMap<Eigen::Tensor<float, 3> >& in2_; Eigen::TensorMap<Eigen::Tensor<float, 3> >& out_; + + float* kernel_1d_; + float* kernel_2d_; + float* kernel_3d_; + cudaStream_t stream_; Eigen::GpuDevice gpu_device_; }; @@ -62,49 +114,151 @@ struct GPUContext { template <typename Context> static void test_contextual_eval(Context* context) { - context->out() = context->in1() + context->in2() * 3.14f + context->in1().constant(2.718f); + context->out().device(context->device()) = context->in1() + context->in2() * 3.14f + context->in1().constant(2.718f); } template <typename Context> static void test_forced_contextual_eval(Context* context) { - context->out() = (context->in1() + context->in2()).eval() * 3.14f + context->in1().constant(2.718f); + context->out().device(context->device()) = (context->in1() + context->in2()).eval() * 3.14f + context->in1().constant(2.718f); +} + +template <typename Context> +static void test_contraction(Context* context) +{ + Eigen::array<std::pair<int, int>, 2> dims; + dims[0] = std::make_pair(1, 1); + dims[1] = std::make_pair(2, 2); + + Eigen::array<int, 2> shape(40, 50*70); + + Eigen::DSizes<int, 2> indices(0,0); + Eigen::DSizes<int, 2> sizes(40,40); + + context->out().reshape(shape).slice(indices, sizes).device(context->device()) = context->in1().contract(context->in2(), dims); +} + + +template <typename Context> +static void test_1d_convolution(Context* context) +{ + Eigen::DSizes<int, 3> indices(Eigen::array<int, 3>(0,0,0)); + Eigen::DSizes<int, 3> sizes(Eigen::array<int, 3>(40,49,70)); + + Eigen::array<int, 1> dims(1); + context->out().slice(indices, sizes).device(context->device()) = context->in1().convolve(context->kernel1d(), dims); +} + +template <typename Context> +static void test_2d_convolution(Context* context) +{ + Eigen::DSizes<int, 3> indices(Eigen::array<int, 3>(0,0,0)); + Eigen::DSizes<int, 3> sizes(Eigen::array<int, 3>(40,49,69)); + + Eigen::array<int, 2> dims(1,2); + context->out().slice(indices, sizes).device(context->device()) = context->in1().convolve(context->kernel2d(), dims); +} + +template <typename Context> +static void test_3d_convolution(Context* context) +{ + Eigen::DSizes<int, 3> indices(Eigen::array<int, 3>(0,0,0)); + Eigen::DSizes<int, 3> sizes(Eigen::array<int, 3>(39,49,69)); + + Eigen::array<int, 3> dims(0,1,2); + context->out().slice(indices, sizes).device(context->device()) = context->in1().convolve(context->kernel3d(), dims); } + static void test_cpu() { - Eigen::Tensor<float, 3> in1(Eigen::array<int, 3>(2,3,7)); - Eigen::Tensor<float, 3> in2(Eigen::array<int, 3>(2,3,7)); - Eigen::Tensor<float, 3> out(Eigen::array<int, 3>(2,3,7)); + Eigen::Tensor<float, 3> in1(Eigen::array<int, 3>(40,50,70)); + Eigen::Tensor<float, 3> in2(Eigen::array<int, 3>(40,50,70)); + Eigen::Tensor<float, 3> out(Eigen::array<int, 3>(40,50,70)); - in1.setRandom(); - in2.setRandom(); + in1 = in1.random() + in1.constant(10.0f); + in2 = in2.random() + in2.constant(10.0f); CPUContext context(in1, in2, out); test_contextual_eval(&context); - for (int i = 0; i < 2; ++i) { - for (int j = 0; j < 3; ++j) { - for (int k = 0; k < 7; ++k) { + for (int i = 0; i < 40; ++i) { + for (int j = 0; j < 50; ++j) { + for (int k = 0; k < 70; ++k) { VERIFY_IS_APPROX(out(Eigen::array<int, 3>(i,j,k)), in1(Eigen::array<int, 3>(i,j,k)) + in2(Eigen::array<int, 3>(i,j,k)) * 3.14f + 2.718f); } } } test_forced_contextual_eval(&context); - for (int i = 0; i < 2; ++i) { - for (int j = 0; j < 3; ++j) { - for (int k = 0; k < 7; ++k) { + for (int i = 0; i < 40; ++i) { + for (int j = 0; j < 50; ++j) { + for (int k = 0; k < 70; ++k) { VERIFY_IS_APPROX(out(Eigen::array<int, 3>(i,j,k)), (in1(Eigen::array<int, 3>(i,j,k)) + in2(Eigen::array<int, 3>(i,j,k))) * 3.14f + 2.718f); } } } + + test_contraction(&context); + for (int i = 0; i < 40; ++i) { + for (int j = 0; j < 40; ++j) { + const float result = out(Eigen::array<int, 3>(i,j,0)); + float expected = 0; + for (int k = 0; k < 50; ++k) { + for (int l = 0; l < 70; ++l) { + expected += in1(Eigen::array<int, 3>(i, k, l)) * in2(Eigen::array<int, 3>(j, k, l)); + } + } + VERIFY_IS_APPROX(expected, result); + } + } + + test_1d_convolution(&context); + for (int i = 0; i < 40; ++i) { + for (int j = 0; j < 49; ++j) { + for (int k = 0; k < 70; ++k) { + VERIFY_IS_APPROX(out(Eigen::array<int, 3>(i,j,k)), (in1(Eigen::array<int, 3>(i,j,k)) * 3.14f + in1(Eigen::array<int, 3>(i,j+1,k)) * 2.7f)); + } + } + } + + test_2d_convolution(&context); + for (int i = 0; i < 40; ++i) { + for (int j = 0; j < 49; ++j) { + for (int k = 0; k < 69; ++k) { + const float result = out(Eigen::array<int, 3>(i,j,k)); + const float expected = (in1(Eigen::array<int, 3>(i,j,k)) * 3.14f + in1(Eigen::array<int, 3>(i,j+1,k)) * 2.7f) + + (in1(Eigen::array<int, 3>(i,j,k+1)) * 0.2f + in1(Eigen::array<int, 3>(i,j+1,k+1)) * 7.0f); + if (fabs(expected) < 1e-4 && fabs(result) < 1e-4) { + continue; + } + VERIFY_IS_APPROX(expected, result); + } + } + } + + test_3d_convolution(&context); + for (int i = 0; i < 39; ++i) { + for (int j = 0; j < 49; ++j) { + for (int k = 0; k < 69; ++k) { + const float result = out(Eigen::array<int, 3>(i,j,k)); + const float expected = (in1(Eigen::array<int, 3>(i,j,k)) * 3.14f + in1(Eigen::array<int, 3>(i,j+1,k)) * 2.7f + + in1(Eigen::array<int, 3>(i,j,k+1)) * 0.2f + in1(Eigen::array<int, 3>(i,j+1,k+1)) * 7.0f) + + (in1(Eigen::array<int, 3>(i+1,j,k)) * -1.0f + in1(Eigen::array<int, 3>(i+1,j+1,k)) * -0.3f + + in1(Eigen::array<int, 3>(i+1,j,k+1)) * -0.7f + in1(Eigen::array<int, 3>(i+1,j+1,k+1)) * -0.5f); + if (fabs(expected) < 1e-4 && fabs(result) < 1e-4) { + continue; + } + VERIFY_IS_APPROX(expected, result); + } + } + } } static void test_gpu() { - Eigen::Tensor<float, 3> in1(Eigen::array<int, 3>(2,3,7)); - Eigen::Tensor<float, 3> in2(Eigen::array<int, 3>(2,3,7)); - Eigen::Tensor<float, 3> out(Eigen::array<int, 3>(2,3,7)); - in1.setRandom(); - in2.setRandom(); + Eigen::Tensor<float, 3> in1(Eigen::array<int, 3>(40,50,70)); + Eigen::Tensor<float, 3> in2(Eigen::array<int, 3>(40,50,70)); + Eigen::Tensor<float, 3> out(Eigen::array<int, 3>(40,50,70)); + in1 = in1.random() + in1.constant(10.0f); + in2 = in2.random() + in2.constant(10.0f); std::size_t in1_bytes = in1.size() * sizeof(float); std::size_t in2_bytes = in2.size() * sizeof(float); @@ -120,32 +274,87 @@ static void test_gpu() { cudaMemcpy(d_in1, in1.data(), in1_bytes, cudaMemcpyHostToDevice); cudaMemcpy(d_in2, in2.data(), in2_bytes, cudaMemcpyHostToDevice); - Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in1(d_in1, Eigen::array<int, 3>(2,3,7)); - Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in2(d_in2, Eigen::array<int, 3>(2,3,7)); - Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_out(d_out, Eigen::array<int, 3>(2,3,7)); + Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in1(d_in1, Eigen::array<int, 3>(40,50,70)); + Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in2(d_in2, Eigen::array<int, 3>(40,50,70)); + Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_out(d_out, Eigen::array<int, 3>(40,50,70)); GPUContext context(gpu_in1, gpu_in2, gpu_out); test_contextual_eval(&context); - cudaMemcpy(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost); - for (int i = 0; i < 2; ++i) { - for (int j = 0; j < 3; ++j) { - for (int k = 0; k < 7; ++k) { + assert(cudaMemcpy(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost) == cudaSuccess); + for (int i = 0; i < 40; ++i) { + for (int j = 0; j < 50; ++j) { + for (int k = 0; k < 70; ++k) { VERIFY_IS_APPROX(out(Eigen::array<int, 3>(i,j,k)), in1(Eigen::array<int, 3>(i,j,k)) + in2(Eigen::array<int, 3>(i,j,k)) * 3.14f + 2.718f); } } } test_forced_contextual_eval(&context); - cudaMemcpy(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost); - for (int i = 0; i < 2; ++i) { - for (int j = 0; j < 3; ++j) { - for (int k = 0; k < 7; ++k) { + assert(cudaMemcpy(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost) == cudaSuccess); + for (int i = 0; i < 40; ++i) { + for (int j = 0; j < 50; ++j) { + for (int k = 0; k < 70; ++k) { VERIFY_IS_APPROX(out(Eigen::array<int, 3>(i,j,k)), (in1(Eigen::array<int, 3>(i,j,k)) + in2(Eigen::array<int, 3>(i,j,k))) * 3.14f + 2.718f); } } } -} + test_contraction(&context); + assert(cudaMemcpy(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost) == cudaSuccess); + for (int i = 0; i < 40; ++i) { + for (int j = 0; j < 40; ++j) { + const float result = out(Eigen::array<int, 3>(i,j,0)); + float expected = 0; + for (int k = 0; k < 50; ++k) { + for (int l = 0; l < 70; ++l) { + expected += in1(Eigen::array<int, 3>(i, k, l)) * in2(Eigen::array<int, 3>(j, k, l)); + } + } + VERIFY_IS_APPROX(expected, result); + } + } + + test_1d_convolution(&context); + assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, context.device().stream()) == cudaSuccess); + assert(cudaStreamSynchronize(context.device().stream()) == cudaSuccess); + for (int i = 0; i < 40; ++i) { + for (int j = 0; j < 49; ++j) { + for (int k = 0; k < 70; ++k) { + VERIFY_IS_APPROX(out(Eigen::array<int, 3>(i,j,k)), (in1(Eigen::array<int, 3>(i,j,k)) * 3.14f + in1(Eigen::array<int, 3>(i,j+1,k)) * 2.7f)); + } + } + } + + test_2d_convolution(&context); + assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, context.device().stream()) == cudaSuccess); + assert(cudaStreamSynchronize(context.device().stream()) == cudaSuccess); + for (int i = 0; i < 40; ++i) { + for (int j = 0; j < 49; ++j) { + for (int k = 0; k < 69; ++k) { + const float result = out(Eigen::array<int, 3>(i,j,k)); + const float expected = (in1(Eigen::array<int, 3>(i,j,k)) * 3.14f + in1(Eigen::array<int, 3>(i,j+1,k)) * 2.7f + + in1(Eigen::array<int, 3>(i,j,k+1)) * 0.2f + in1(Eigen::array<int, 3>(i,j+1,k+1)) * 7.0f); + VERIFY_IS_APPROX(expected, result); + } + } + } + + test_3d_convolution(&context); + assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, context.device().stream()) == cudaSuccess); + assert(cudaStreamSynchronize(context.device().stream()) == cudaSuccess); + for (int i = 0; i < 39; ++i) { + for (int j = 0; j < 49; ++j) { + for (int k = 0; k < 69; ++k) { + const float result = out(Eigen::array<int, 3>(i,j,k)); + const float expected = (in1(Eigen::array<int, 3>(i,j,k)) * 3.14f + in1(Eigen::array<int, 3>(i,j+1,k)) * 2.7f + + in1(Eigen::array<int, 3>(i,j,k+1)) * 0.2f + in1(Eigen::array<int, 3>(i,j+1,k+1)) * 7.0f + + in1(Eigen::array<int, 3>(i+1,j,k)) * -1.0f + in1(Eigen::array<int, 3>(i+1,j+1,k)) * -0.3f + + in1(Eigen::array<int, 3>(i+1,j,k+1)) * -0.7f + in1(Eigen::array<int, 3>(i+1,j+1,k+1)) * -0.5f); + VERIFY_IS_APPROX(expected, result); + } + } + } +} void test_cxx11_tensor_device() |