diff options
Diffstat (limited to 'tensorflow/core/kernels/depthtospace_op.cc')
-rw-r--r-- | tensorflow/core/kernels/depthtospace_op.cc | 61 |
1 files changed, 43 insertions, 18 deletions
diff --git a/tensorflow/core/kernels/depthtospace_op.cc b/tensorflow/core/kernels/depthtospace_op.cc index 01d5c479ae..4355bda960 100644 --- a/tensorflow/core/kernels/depthtospace_op.cc +++ b/tensorflow/core/kernels/depthtospace_op.cc @@ -21,6 +21,8 @@ limitations under the License. #include <string> #include <utility> +#include "tensorflow/core/kernels/depthtospace_op.h" + #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_kernel.h" @@ -60,8 +62,8 @@ class DepthToSpaceOp : public OpKernel { "instead of: ", dims)); const int batch_size = input.dim_size(0); - const int height = input.dim_size(1); - const int width = input.dim_size(2); + const int input_height = input.dim_size(1); + const int input_width = input.dim_size(2); const int input_depth = input.dim_size(3); const int block_size_sq = block_size_ * block_size_; @@ -73,40 +75,57 @@ class DepthToSpaceOp : public OpKernel { "should be divisible by: ", block_size_sq)); const int output_depth = input_depth / block_size_sq; - const int output_width = width * block_size_; - const int output_height = height * block_size_; + const int output_width = input_width * block_size_; + const int output_height = input_height * block_size_; // Allocate output tensor. - Tensor* outputs_tensor = nullptr; + Tensor* output = nullptr; OP_REQUIRES_OK(context, context->allocate_output( 0, TensorShape({batch_size, output_height, output_width, output_depth}), - &outputs_tensor)); + &output)); + + typename TTypes<T, 4>::ConstTensor Tinput = input.tensor<T, 4>(); + typename TTypes<T, 4>::Tensor Toutput = output->tensor<T, 4>(); + + functor::DepthToSpaceOpFunctor<Device, T> functor; + functor(context->eigen_device<Device>(), Tinput, block_size_, Toutput); + }; + + private: + int block_size_; +}; - auto Toutput = outputs_tensor->tensor<T, 4>(); - auto Tinput = input.tensor<T, 4>(); +// Partial specialization of DepthToSpaceOpFunctor for a CPUDevice. +namespace functor { +template <typename T> +struct DepthToSpaceOpFunctor<CPUDevice, T> { + void operator()(const CPUDevice& d, typename TTypes<T, 4>::ConstTensor input, + int block_size, typename TTypes<T, 4>::Tensor output) { + const int batch_size = output.dimension(0); + const int output_height = output.dimension(1); + const int output_width = output.dimension(2); + const int output_depth = output.dimension(3); for (int b = 0; b < batch_size; ++b) { for (int h = 0; h < output_height; ++h) { - const int in_h = h / block_size_; - const int offset_h = (h % block_size_); + const int in_h = h / block_size; + const int offset_h = (h % block_size); for (int w = 0; w < output_width; ++w) { - const int in_w = w / block_size_; - const int offset_w = (w % block_size_); + const int in_w = w / block_size; + const int offset_w = (w % block_size); const int offset_d = - (offset_h * block_size_ + offset_w) * output_depth; + (offset_h * block_size + offset_w) * output_depth; for (int d = 0; d < output_depth; ++d) { const int in_d = d + offset_d; - Toutput(b, h, w, d) = Tinput(b, in_h, in_w, in_d); + output(b, h, w, d) = input(b, in_h, in_w, in_d); } } } } - }; - - private: - int block_size_; + } }; +} // namespace functor #define REGISTER(type) \ REGISTER_KERNEL_BUILDER( \ @@ -116,4 +135,10 @@ class DepthToSpaceOp : public OpKernel { TF_CALL_ALL_TYPES(REGISTER); #undef REGISTER +#if GOOGLE_CUDA +REGISTER_KERNEL_BUILDER( + Name("DepthToSpace").Device(DEVICE_GPU).TypeConstraint<float>("T"), + DepthToSpaceOp<GPUDevice, float>); +#endif // GOOGLE_CUDA + } // end namespace tensorflow |