diff options
author | 2016-11-09 07:47:42 -0800 | |
---|---|---|
committer | 2016-11-09 08:04:24 -0800 | |
commit | 988fec702bb0c23613cfd3ba76d04bf41c1a7c59 (patch) | |
tree | ddce50e28a6d0630f8b1bfb927131624d7bb48cd /tensorflow | |
parent | 6eb522b4d6fac69274cbc245a2a0f5e4738ced5a (diff) |
Optimize slice for 1D tensors.
Avoid copy for aligned slices of 1D tensors. The previous alignment check
returned false for 1D aligned slices, resulting in an unnecessary copy.
Change: 138637436
Diffstat (limited to 'tensorflow')
-rw-r--r-- | tensorflow/core/kernels/BUILD | 2 | ||||
-rw-r--r-- | tensorflow/core/kernels/ops_util.h | 19 | ||||
-rw-r--r-- | tensorflow/core/kernels/ops_util_test.cc | 54 | ||||
-rw-r--r-- | tensorflow/core/kernels/slice_op.cc | 2 | ||||
-rw-r--r-- | tensorflow/core/kernels/strided_slice_op.cc | 4 |
5 files changed, 79 insertions, 2 deletions
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 5acbca8f22..2f0a514abc 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -230,8 +230,10 @@ tf_cc_test( srcs = ["ops_util_test.cc"], deps = [ ":ops_util", + "//tensorflow/core:framework", "//tensorflow/core:test", "//tensorflow/core:test_main", + "//third_party/eigen3", ], ) diff --git a/tensorflow/core/kernels/ops_util.h b/tensorflow/core/kernels/ops_util.h index 19cca7341f..2d81e682ca 100644 --- a/tensorflow/core/kernels/ops_util.h +++ b/tensorflow/core/kernels/ops_util.h @@ -54,6 +54,25 @@ bool IsInnerDimsSizeAligned(const TensorShape& s) { return bytes_per_dim0 % EIGEN_MAX_ALIGN_BYTES == 0; } +// Given a shape 's' of a tensor of type T and the `start` and `end` index of a +// dim 0 slice, returns true iff slice is aligned with respect to original +// tensor. Here aligned implies the address is a multiple of +// EIGEN_MAX_ALIGN_BYTES. +template <typename T> +bool IsDim0SliceAligned(const TensorShape& s, int64 start, int64 end_or_size) { + if (s.dims() == 1) { + bool start_aligned = (start * sizeof(T)) % EIGEN_MAX_ALIGN_BYTES == 0; + // End is aligned if either the explicit end index is passed and is a + // a multiple of EIGEN_MAX_ALIGN_BYTES, or the start index is aligned and + // the size is aligned. So for convenience we can either pass start and + // index, or start and size. + bool end_aligned = (end_or_size * sizeof(T)) % EIGEN_MAX_ALIGN_BYTES == 0; + return start_aligned && end_aligned; + } else { + return IsInnerDimsSizeAligned<T>(s); + } +} + // Returns <suffix> sanitized to have only [a-zA-Z0-9-_]. string SanitizeThreadSuffix(string suffix); diff --git a/tensorflow/core/kernels/ops_util_test.cc b/tensorflow/core/kernels/ops_util_test.cc index ffbcd7f8ed..04a42a9921 100644 --- a/tensorflow/core/kernels/ops_util_test.cc +++ b/tensorflow/core/kernels/ops_util_test.cc @@ -14,6 +14,8 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/kernels/ops_util.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/platform/test.h" namespace tensorflow { @@ -283,5 +285,57 @@ TEST_F(OpsUtilTest, SanitizeThreadSuffix) { EXPECT_EQ("_aBc123_-___", SanitizeThreadSuffix("/aBc123_- /")); } +TEST_F(OpsUtilTest, Aligned1DSlice) { + Tensor t(DT_FLOAT, TensorShape({EIGEN_MAX_ALIGN_BYTES * 2})); + int64 start = 0; + int64 end = EIGEN_MAX_ALIGN_BYTES; + bool output = IsDim0SliceAligned<float>(t.shape(), start, end); + EXPECT_EQ(output, true); + // Checks sliced 1D tensor is aligned for sanity. + Tensor sliced; + CHECK(sliced.CopyFrom(t.Slice(start, end), TensorShape({end - start}))); + EXPECT_EQ(sliced.IsAligned(), true); +} + +TEST_F(OpsUtilTest, Misaligned1DSlice) { + Tensor t(DT_FLOAT, TensorShape({EIGEN_MAX_ALIGN_BYTES * 2})); + int64 start = 1; + int64 end = EIGEN_MAX_ALIGN_BYTES + 1; + bool output = IsDim0SliceAligned<float>(t.shape(), start, end); + EXPECT_EQ(output, false); + // Checks sliced 1D tensor is misaligned for sanity. + Tensor sliced; + CHECK(sliced.CopyFrom(t.Slice(start, end), TensorShape({end - start}))); + EXPECT_EQ(sliced.IsAligned(), false); +} + +TEST_F(OpsUtilTest, Aligned2DSliceOfDim0) { + // For multidimensional tensors, alignment is dictated by inner_dim_size. + int64 inner_dim_size = EIGEN_MAX_ALIGN_BYTES; + Tensor t(DT_FLOAT, TensorShape({3, inner_dim_size})); + int64 start = 1; + int64 end = 2; + bool output = IsDim0SliceAligned<float>(t.shape(), start, end); + EXPECT_EQ(output, true); + // Checks sliced 2D is aligned, for sanity. + Tensor sliced; + CHECK(sliced.CopyFrom(t.Slice(start, end), TensorShape({1, inner_dim_size}))); + EXPECT_EQ(sliced.IsAligned(), true); +} + +TEST_F(OpsUtilTest, Misaligned2DSliceOfDim0) { + // For multidimensional tensors, alignment is dictated by inner_dim_size. + int64 inner_dim_size = EIGEN_MAX_ALIGN_BYTES + 1; + Tensor t(DT_FLOAT, TensorShape({3, inner_dim_size})); + int64 start = 1; + int64 end = 2; + bool output = IsDim0SliceAligned<float>(t.shape(), start, end); + EXPECT_EQ(output, false); + // Checks sliced 2D is misaligned, for sanity. + Tensor sliced; + CHECK(sliced.CopyFrom(t.Slice(start, end), TensorShape({1, inner_dim_size}))); + EXPECT_EQ(sliced.IsAligned(), false); +} + } // namespace } // namespace tensorflow diff --git a/tensorflow/core/kernels/slice_op.cc b/tensorflow/core/kernels/slice_op.cc index f8e5d234ea..c6ac1b429a 100644 --- a/tensorflow/core/kernels/slice_op.cc +++ b/tensorflow/core/kernels/slice_op.cc @@ -136,7 +136,7 @@ class SliceOp : public OpKernel { return; } - if (slice_dim0 && IsInnerDimsSizeAligned<T>(input.shape())) { + if (slice_dim0 && IsDim0SliceAligned<T>(input.shape(), begin[0], size[0])) { VLOG(1) << "Slice dim 0: " << input.shape().DebugString(); CHECK_GE(input.dims(), 1); // Otherwise, is_identity should be true. context->set_output(0, input.Slice(begin[0], begin[0] + size[0])); diff --git a/tensorflow/core/kernels/strided_slice_op.cc b/tensorflow/core/kernels/strided_slice_op.cc index 6cbcbf9fd9..a3463feac8 100644 --- a/tensorflow/core/kernels/strided_slice_op.cc +++ b/tensorflow/core/kernels/strided_slice_op.cc @@ -75,6 +75,7 @@ class StridedSliceOp : public OpKernel { // Optimization #1, slice is a no-op plus reshape if (is_identity) { + VLOG(1) << "Strided slice identity "; Tensor tmp; CHECK(tmp.CopyFrom(input, final_shape)); context->set_output(0, tmp); @@ -82,8 +83,9 @@ class StridedSliceOp : public OpKernel { } // Optimization #2, slice is memory contiguous (only occurs in dim 0) - if (slice_dim0 && IsInnerDimsSizeAligned<T>(input.shape())) { + if (slice_dim0 && IsDim0SliceAligned<T>(input.shape(), begin[0], end[0])) { CHECK_GE(input.dims(), 1); // Otherwise, is_identity should be true. + VLOG(1) << "Strided slice dim 0: " << input.shape().DebugString(); Tensor tmp; CHECK(tmp.CopyFrom(input.Slice(begin[0], end[0]), final_shape)); context->set_output(0, tmp); |