aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/kernels/tile_functor.h
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/core/kernels/tile_functor.h')
-rw-r--r--tensorflow/core/kernels/tile_functor.h115
1 files changed, 115 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/tile_functor.h b/tensorflow/core/kernels/tile_functor.h
new file mode 100644
index 0000000000..d80898b24f
--- /dev/null
+++ b/tensorflow/core/kernels/tile_functor.h
@@ -0,0 +1,115 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_KERNELS_TILE_FUNCTOR_H_
+#define TENSORFLOW_KERNELS_TILE_FUNCTOR_H_
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+
+namespace internal {
+
+// Helper to compute 'strides' given a tensor 'shape'. I.e.,
+// strides[i] = prod(shape.dim_size[(i+1):])
+template <typename Index>
+gtl::InlinedVector<Index, 8> ComputeStride(const TensorShape& shape) {
+ const int ndims = shape.dims();
+ gtl::InlinedVector<Index, 8> strides(ndims);
+ Index stride = 1;
+ for (int i = ndims - 1; i >= 0; --i) {
+ strides[i] = stride;
+ stride *= static_cast<Index>(shape.dim_size(i));
+ }
+ return strides;
+}
+
+
+// Device-specific naive implementation for tile.
+template <typename Device, typename T>
+void TileSimple(const Device& d, Tensor* out, const Tensor& in);
+
+template <typename Device, typename T, int NDIM>
+void TileUsingEigen(const Device& d, Tensor* out, const Tensor& in,
+ const gtl::ArraySlice<int32>& broadcast_array) {
+ auto x = in.tensor<T, NDIM>();
+ auto y = out->tensor<T, NDIM>();
+
+ Eigen::array<int32, NDIM> b;
+ for (int i = 0; i < NDIM; ++i) b[i] = broadcast_array[i];
+ if (Eigen::internal::is_same<Device, Eigen::GpuDevice>::value) {
+ // Use 32bit indexing to speed up the computations
+ To32Bit(y).device(d) = To32Bit(x).broadcast(b);
+ } else {
+ y.device(d) = x.broadcast(b);
+ }
+}
+
+template <typename Device, typename T>
+void TileUsingEigen(const Device& d, Tensor* out, const Tensor& in,
+ const gtl::ArraySlice<int32>&) {
+ auto x = in.tensor<T, 0>();
+ auto y = out->tensor<T, 0>();
+ // In the scalar case we simply copy the input.
+ y.device(d) = x;
+}
+
+} // end namespace internal
+
+namespace functor {
+
+template <typename Device, typename T>
+struct Tile {
+ void operator()(const Device& d, Tensor* out, const Tensor& in,
+ const gtl::ArraySlice<int32> broadcast_array) const {
+ switch (in.dims()) {
+ case 0:
+ internal::TileUsingEigen<Device, T>(d, out, in, broadcast_array);
+ break;
+ case 1:
+ internal::TileUsingEigen<Device, T, 1>(d, out, in, broadcast_array);
+ break;
+ case 2:
+ internal::TileUsingEigen<Device, T, 2>(d, out, in, broadcast_array);
+ break;
+ case 3:
+ internal::TileUsingEigen<Device, T, 3>(d, out, in, broadcast_array);
+ break;
+ case 4:
+ internal::TileUsingEigen<Device, T, 4>(d, out, in, broadcast_array);
+ break;
+ case 5:
+ internal::TileUsingEigen<Device, T, 5>(d, out, in, broadcast_array);
+ break;
+ case 6:
+ internal::TileUsingEigen<Device, T, 6>(d, out, in, broadcast_array);
+ break;
+ case 7:
+ internal::TileUsingEigen<Device, T, 7>(d, out, in, broadcast_array);
+ break;
+ default:
+ internal::TileSimple<Device, T>(d, out, in);
+ break;
+ }
+ }
+};
+
+} // end namespace functor
+} // end namespace tensorflow
+
+#endif // TENSORFLOW_KERNELS_TILE_FUNCTOR_H_