diff options
Diffstat (limited to 'tensorflow/core/kernels/tile_functor.h')
-rw-r--r-- | tensorflow/core/kernels/tile_functor.h | 115 |
1 files changed, 115 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/tile_functor.h b/tensorflow/core/kernels/tile_functor.h new file mode 100644 index 0000000000..d80898b24f --- /dev/null +++ b/tensorflow/core/kernels/tile_functor.h @@ -0,0 +1,115 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_KERNELS_TILE_FUNCTOR_H_ +#define TENSORFLOW_KERNELS_TILE_FUNCTOR_H_ + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { + +namespace internal { + +// Helper to compute 'strides' given a tensor 'shape'. I.e., +// strides[i] = prod(shape.dim_size[(i+1):]) +template <typename Index> +gtl::InlinedVector<Index, 8> ComputeStride(const TensorShape& shape) { + const int ndims = shape.dims(); + gtl::InlinedVector<Index, 8> strides(ndims); + Index stride = 1; + for (int i = ndims - 1; i >= 0; --i) { + strides[i] = stride; + stride *= static_cast<Index>(shape.dim_size(i)); + } + return strides; +} + + +// Device-specific naive implementation for tile. +template <typename Device, typename T> +void TileSimple(const Device& d, Tensor* out, const Tensor& in); + +template <typename Device, typename T, int NDIM> +void TileUsingEigen(const Device& d, Tensor* out, const Tensor& in, + const gtl::ArraySlice<int32>& broadcast_array) { + auto x = in.tensor<T, NDIM>(); + auto y = out->tensor<T, NDIM>(); + + Eigen::array<int32, NDIM> b; + for (int i = 0; i < NDIM; ++i) b[i] = broadcast_array[i]; + if (Eigen::internal::is_same<Device, Eigen::GpuDevice>::value) { + // Use 32bit indexing to speed up the computations + To32Bit(y).device(d) = To32Bit(x).broadcast(b); + } else { + y.device(d) = x.broadcast(b); + } +} + +template <typename Device, typename T> +void TileUsingEigen(const Device& d, Tensor* out, const Tensor& in, + const gtl::ArraySlice<int32>&) { + auto x = in.tensor<T, 0>(); + auto y = out->tensor<T, 0>(); + // In the scalar case we simply copy the input. + y.device(d) = x; +} + +} // end namespace internal + +namespace functor { + +template <typename Device, typename T> +struct Tile { + void operator()(const Device& d, Tensor* out, const Tensor& in, + const gtl::ArraySlice<int32> broadcast_array) const { + switch (in.dims()) { + case 0: + internal::TileUsingEigen<Device, T>(d, out, in, broadcast_array); + break; + case 1: + internal::TileUsingEigen<Device, T, 1>(d, out, in, broadcast_array); + break; + case 2: + internal::TileUsingEigen<Device, T, 2>(d, out, in, broadcast_array); + break; + case 3: + internal::TileUsingEigen<Device, T, 3>(d, out, in, broadcast_array); + break; + case 4: + internal::TileUsingEigen<Device, T, 4>(d, out, in, broadcast_array); + break; + case 5: + internal::TileUsingEigen<Device, T, 5>(d, out, in, broadcast_array); + break; + case 6: + internal::TileUsingEigen<Device, T, 6>(d, out, in, broadcast_array); + break; + case 7: + internal::TileUsingEigen<Device, T, 7>(d, out, in, broadcast_array); + break; + default: + internal::TileSimple<Device, T>(d, out, in); + break; + } + } +}; + +} // end namespace functor +} // end namespace tensorflow + +#endif // TENSORFLOW_KERNELS_TILE_FUNCTOR_H_ |