From f41959ccb2d9d4c722fe8fc3351401d53bcf4900 Mon Sep 17 00:00:00 2001 From: Manjunath Kudlur Date: Fri, 6 Nov 2015 16:27:58 -0800 Subject: TensorFlow: Initial commit of TensorFlow library. TensorFlow is an open source software library for numerical computation using data flow graphs. Base CL: 107276108 --- tensorflow/core/kernels/slice_op.cc | 242 ++++++++++++++++++++++++++++++++++++ 1 file changed, 242 insertions(+) create mode 100644 tensorflow/core/kernels/slice_op.cc (limited to 'tensorflow/core/kernels/slice_op.cc') diff --git a/tensorflow/core/kernels/slice_op.cc b/tensorflow/core/kernels/slice_op.cc new file mode 100644 index 0000000000..3477266d5d --- /dev/null +++ b/tensorflow/core/kernels/slice_op.cc @@ -0,0 +1,242 @@ +// See docs in ../ops/array_ops.cc. + +#define EIGEN_USE_THREADS + +#if GOOGLE_CUDA +#define EIGEN_USE_GPU +#endif // GOOGLE_CUDA + +#include "tensorflow/core/kernels/slice_op.h" + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/public/status.h" +#include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/public/tensor.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + +namespace tensorflow { + +namespace { + +gtl::InlinedVector IntTensorToInt64Vec(const Tensor& tensor) { + gtl::InlinedVector out; + if (tensor.dtype() == DT_INT32) { + for (int64 i = 0; i < tensor.NumElements(); ++i) { + out.push_back(tensor.flat()(i)); + } + } else if (tensor.dtype() == DT_INT64) { + for (int64 i = 0; i < tensor.NumElements(); ++i) { + out.push_back(tensor.flat()(i)); + } + } else { + LOG(FATAL) << "begin must be either int32 or int64"; + } + return out; +} + +} // namespace + +typedef Eigen::ThreadPoolDevice CPUDevice; +typedef Eigen::GpuDevice GPUDevice; + +// Shared code that is not dependent on the type of T. We do this to reduce +// code size by not duplicating all this for all T (float, double, int32, etc.) +static void SharedValidation(OpKernelContext* context, + TensorShape* output_shape, bool* is_identity, + bool* slice_dim0, + gtl::InlinedVector* begin, + gtl::InlinedVector* size) { + const Tensor& input = context->input(0); + const Tensor& begin_tensor = context->input(1); + const Tensor& size_tensor = context->input(2); + + OP_REQUIRES( + context, TensorShapeUtils::IsLegacyVector(begin_tensor.shape()) && + TensorShapeUtils::IsLegacyVector(size_tensor.shape()) && + begin_tensor.NumElements() == input.dims() && + size_tensor.NumElements() == input.dims(), + errors::InvalidArgument( + "Expected begin and size arguments to be 1-D tensors of size ", + input.dims(), ", but got ", begin_tensor.NumElements(), " and ", + size_tensor.NumElements(), " instead.")); + + const int input_dims = input.dims(); + *begin = IntTensorToInt64Vec(begin_tensor); + *size = IntTensorToInt64Vec(size_tensor); + for (int i = 0; i < input_dims; ++i) { + if ((*size)[i] == -1) { + // A size[i] of -1 means "all elements from begin[i] to dim_size(i)". + (*size)[i] = input.dim_size(i) - (*begin)[i]; + } + } + + *is_identity = true; + *slice_dim0 = true; + for (int i = 0; i < input_dims; ++i) { + int64 b = (*begin)[i]; + int64 s = (*size)[i]; + if (input.dim_size(i) == 0) { + OP_REQUIRES( + context, b == 0 && s == 0, + errors::InvalidArgument("Expected begin[", i, "] == 0 (got ", b, + ") and size[", i, "] == 0 ", "(got ", s, + ") when ", "input.dim_size(", i, ") == 0")); + } else { + OP_REQUIRES(context, 0 <= b && b <= input.dim_size(i), + errors::InvalidArgument("Expected begin[", i, "] in [0, ", + input.dim_size(i), "], but got ", b)); + OP_REQUIRES( + context, 0 <= s && b + s <= input.dim_size(i), + errors::InvalidArgument("Expected size[", i, "] in [0, ", + input.dim_size(i) - b, "], but ", "got ", s)); + } + output_shape->AddDim(s); + const bool take_all = (b == 0) && (s == input.dim_size(i)); + (*is_identity) &= take_all; + (*slice_dim0) &= (i == 0) || take_all; + } +} + +template +class SliceOp : public OpKernel { + public: + explicit SliceOp(OpKernelConstruction* context) : OpKernel(context) {} + + void Compute(OpKernelContext* context) override { + TensorShape output_shape; + bool is_identity = true; + bool slice_dim0 = true; + gtl::InlinedVector begin; + gtl::InlinedVector size; + SharedValidation(context, &output_shape, &is_identity, &slice_dim0, &begin, + &size); + if (!context->status().ok()) return; + const Tensor& input = context->input(0); + if (is_identity) { + VLOG(1) << "Slice identity"; + context->set_output(0, input); + return; + } + + if (slice_dim0 && IsInnerDimsSizeAligned(input.shape())) { + VLOG(1) << "Slice dim 0: " << input.shape().DebugString(); + CHECK_GE(input.dims(), 1); // Otherwise, is_identity should be true. + context->set_output(0, input.Slice(begin[0], begin[0] + size[0])); + return; + } + + Tensor* result = nullptr; + OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &result)); + const int input_dims = input.dims(); + + if (output_shape.num_elements() > 0) { + if (std::is_same::value && input_dims == 2 && + DataTypeCanUseMemcpy(DataTypeToEnum::v())) { + auto input = context->input(0).tensor(); + auto output = result->tensor(); + // TODO(agarwal): Consider multi-threading this loop for cases where + // size[0] is very large. + for (int i = 0; i < size[0]; ++i) { + const int row = begin[0] + i; + if (i + 1 < size[0]) { + port::prefetch(&output(i + 1, 0)); + port::prefetch(&input(row + 1, begin[1])); + } + memcpy(&output(i, 0), &input(row, begin[1]), size[1] * sizeof(T)); + } + return; + } +#define HANDLE_DIM(NDIM) \ + if (input_dims == NDIM) { \ + HandleCase(context, begin, size, result); \ + return; \ + } + + HANDLE_DIM(1); + HANDLE_DIM(2); + HANDLE_DIM(3); + HANDLE_DIM(4); + HANDLE_DIM(5); + +#undef HANDLE_DIM + + OP_REQUIRES(context, false, errors::Unimplemented( + "SliceOp : Unhandled input dimensions")); + } + } + + private: + template + void HandleCase(OpKernelContext* context, const gtl::ArraySlice& begin, + const gtl::ArraySlice& size, Tensor* result) { + Eigen::DSizes indices; + Eigen::DSizes sizes; + for (int i = 0; i < NDIM; ++i) { + indices[i] = begin[i]; + sizes[i] = size[i]; + } + + functor::Slice()( + context->eigen_device(), result->tensor(), + context->input(0).tensor(), indices, sizes); + } +}; + +#define REGISTER_SLICE(type) \ + REGISTER_KERNEL_BUILDER(Name("Slice") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .HostMemory("begin") \ + .HostMemory("size"), \ + SliceOp) + +TF_CALL_ALL_TYPES(REGISTER_SLICE); +REGISTER_SLICE(bfloat16); + +#undef REGISTER_SLICE + +#if GOOGLE_CUDA +// Forward declarations of the functor specializations for GPU. +namespace functor { +#define DECLARE_GPU_SPEC(T, NDIM) \ + template <> \ + void Slice::operator()( \ + const GPUDevice& d, typename TTypes::Tensor output, \ + typename TTypes::ConstTensor input, \ + const Eigen::DSizes& indices, \ + const Eigen::DSizes& sizes); \ + extern template struct Slice; + +#define DECLARE_FOR_N(T) \ + DECLARE_GPU_SPEC(T, 1); \ + DECLARE_GPU_SPEC(T, 2); \ + DECLARE_GPU_SPEC(T, 3); \ + DECLARE_GPU_SPEC(T, 4); \ + DECLARE_GPU_SPEC(T, 5); + +TF_CALL_GPU_NUMBER_TYPES(DECLARE_FOR_N); +DECLARE_FOR_N(int32); + +#undef DECLARE_FOR_N +#undef DECLARE_GPU_SPEC +} // namespace functor + +#define REGISTER_GPU(type) \ + REGISTER_KERNEL_BUILDER(Name("Slice") \ + .Device(DEVICE_GPU) \ + .TypeConstraint("T") \ + .HostMemory("begin") \ + .HostMemory("size") \ + .TypeConstraint("Index"), \ + SliceOp) + +TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); +REGISTER_GPU(int32); + +#undef REGISTER_GPU + +#endif // GOOGLE_CUDA + +} // namespace tensorflow -- cgit v1.2.3