aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow
diff options
context:
space:
mode:
authorGravatar Jack Rae <jwrae@google.com>2016-11-09 07:47:42 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-11-09 08:04:24 -0800
commit988fec702bb0c23613cfd3ba76d04bf41c1a7c59 (patch)
treeddce50e28a6d0630f8b1bfb927131624d7bb48cd /tensorflow
parent6eb522b4d6fac69274cbc245a2a0f5e4738ced5a (diff)
Optimize slice for 1D tensors.
Avoid copy for aligned slices of 1D tensors. The previous alignment check returned false for 1D aligned slices, resulting in an unnecessary copy. Change: 138637436
Diffstat (limited to 'tensorflow')
-rw-r--r--tensorflow/core/kernels/BUILD2
-rw-r--r--tensorflow/core/kernels/ops_util.h19
-rw-r--r--tensorflow/core/kernels/ops_util_test.cc54
-rw-r--r--tensorflow/core/kernels/slice_op.cc2
-rw-r--r--tensorflow/core/kernels/strided_slice_op.cc4
5 files changed, 79 insertions, 2 deletions
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 5acbca8f22..2f0a514abc 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -230,8 +230,10 @@ tf_cc_test(
srcs = ["ops_util_test.cc"],
deps = [
":ops_util",
+ "//tensorflow/core:framework",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
+ "//third_party/eigen3",
],
)
diff --git a/tensorflow/core/kernels/ops_util.h b/tensorflow/core/kernels/ops_util.h
index 19cca7341f..2d81e682ca 100644
--- a/tensorflow/core/kernels/ops_util.h
+++ b/tensorflow/core/kernels/ops_util.h
@@ -54,6 +54,25 @@ bool IsInnerDimsSizeAligned(const TensorShape& s) {
return bytes_per_dim0 % EIGEN_MAX_ALIGN_BYTES == 0;
}
+// Given a shape 's' of a tensor of type T and the `start` and `end` index of a
+// dim 0 slice, returns true iff slice is aligned with respect to original
+// tensor. Here aligned implies the address is a multiple of
+// EIGEN_MAX_ALIGN_BYTES.
+template <typename T>
+bool IsDim0SliceAligned(const TensorShape& s, int64 start, int64 end_or_size) {
+ if (s.dims() == 1) {
+ bool start_aligned = (start * sizeof(T)) % EIGEN_MAX_ALIGN_BYTES == 0;
+ // End is aligned if either the explicit end index is passed and is a
+ // a multiple of EIGEN_MAX_ALIGN_BYTES, or the start index is aligned and
+ // the size is aligned. So for convenience we can either pass start and
+ // index, or start and size.
+ bool end_aligned = (end_or_size * sizeof(T)) % EIGEN_MAX_ALIGN_BYTES == 0;
+ return start_aligned && end_aligned;
+ } else {
+ return IsInnerDimsSizeAligned<T>(s);
+ }
+}
+
// Returns <suffix> sanitized to have only [a-zA-Z0-9-_].
string SanitizeThreadSuffix(string suffix);
diff --git a/tensorflow/core/kernels/ops_util_test.cc b/tensorflow/core/kernels/ops_util_test.cc
index ffbcd7f8ed..04a42a9921 100644
--- a/tensorflow/core/kernels/ops_util_test.cc
+++ b/tensorflow/core/kernels/ops_util_test.cc
@@ -14,6 +14,8 @@ limitations under the License.
==============================================================================*/
#include "tensorflow/core/kernels/ops_util.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/platform/test.h"
namespace tensorflow {
@@ -283,5 +285,57 @@ TEST_F(OpsUtilTest, SanitizeThreadSuffix) {
EXPECT_EQ("_aBc123_-___", SanitizeThreadSuffix("/aBc123_- /"));
}
+TEST_F(OpsUtilTest, Aligned1DSlice) {
+ Tensor t(DT_FLOAT, TensorShape({EIGEN_MAX_ALIGN_BYTES * 2}));
+ int64 start = 0;
+ int64 end = EIGEN_MAX_ALIGN_BYTES;
+ bool output = IsDim0SliceAligned<float>(t.shape(), start, end);
+ EXPECT_EQ(output, true);
+ // Checks sliced 1D tensor is aligned for sanity.
+ Tensor sliced;
+ CHECK(sliced.CopyFrom(t.Slice(start, end), TensorShape({end - start})));
+ EXPECT_EQ(sliced.IsAligned(), true);
+}
+
+TEST_F(OpsUtilTest, Misaligned1DSlice) {
+ Tensor t(DT_FLOAT, TensorShape({EIGEN_MAX_ALIGN_BYTES * 2}));
+ int64 start = 1;
+ int64 end = EIGEN_MAX_ALIGN_BYTES + 1;
+ bool output = IsDim0SliceAligned<float>(t.shape(), start, end);
+ EXPECT_EQ(output, false);
+ // Checks sliced 1D tensor is misaligned for sanity.
+ Tensor sliced;
+ CHECK(sliced.CopyFrom(t.Slice(start, end), TensorShape({end - start})));
+ EXPECT_EQ(sliced.IsAligned(), false);
+}
+
+TEST_F(OpsUtilTest, Aligned2DSliceOfDim0) {
+ // For multidimensional tensors, alignment is dictated by inner_dim_size.
+ int64 inner_dim_size = EIGEN_MAX_ALIGN_BYTES;
+ Tensor t(DT_FLOAT, TensorShape({3, inner_dim_size}));
+ int64 start = 1;
+ int64 end = 2;
+ bool output = IsDim0SliceAligned<float>(t.shape(), start, end);
+ EXPECT_EQ(output, true);
+ // Checks sliced 2D is aligned, for sanity.
+ Tensor sliced;
+ CHECK(sliced.CopyFrom(t.Slice(start, end), TensorShape({1, inner_dim_size})));
+ EXPECT_EQ(sliced.IsAligned(), true);
+}
+
+TEST_F(OpsUtilTest, Misaligned2DSliceOfDim0) {
+ // For multidimensional tensors, alignment is dictated by inner_dim_size.
+ int64 inner_dim_size = EIGEN_MAX_ALIGN_BYTES + 1;
+ Tensor t(DT_FLOAT, TensorShape({3, inner_dim_size}));
+ int64 start = 1;
+ int64 end = 2;
+ bool output = IsDim0SliceAligned<float>(t.shape(), start, end);
+ EXPECT_EQ(output, false);
+ // Checks sliced 2D is misaligned, for sanity.
+ Tensor sliced;
+ CHECK(sliced.CopyFrom(t.Slice(start, end), TensorShape({1, inner_dim_size})));
+ EXPECT_EQ(sliced.IsAligned(), false);
+}
+
} // namespace
} // namespace tensorflow
diff --git a/tensorflow/core/kernels/slice_op.cc b/tensorflow/core/kernels/slice_op.cc
index f8e5d234ea..c6ac1b429a 100644
--- a/tensorflow/core/kernels/slice_op.cc
+++ b/tensorflow/core/kernels/slice_op.cc
@@ -136,7 +136,7 @@ class SliceOp : public OpKernel {
return;
}
- if (slice_dim0 && IsInnerDimsSizeAligned<T>(input.shape())) {
+ if (slice_dim0 && IsDim0SliceAligned<T>(input.shape(), begin[0], size[0])) {
VLOG(1) << "Slice dim 0: " << input.shape().DebugString();
CHECK_GE(input.dims(), 1); // Otherwise, is_identity should be true.
context->set_output(0, input.Slice(begin[0], begin[0] + size[0]));
diff --git a/tensorflow/core/kernels/strided_slice_op.cc b/tensorflow/core/kernels/strided_slice_op.cc
index 6cbcbf9fd9..a3463feac8 100644
--- a/tensorflow/core/kernels/strided_slice_op.cc
+++ b/tensorflow/core/kernels/strided_slice_op.cc
@@ -75,6 +75,7 @@ class StridedSliceOp : public OpKernel {
// Optimization #1, slice is a no-op plus reshape
if (is_identity) {
+ VLOG(1) << "Strided slice identity ";
Tensor tmp;
CHECK(tmp.CopyFrom(input, final_shape));
context->set_output(0, tmp);
@@ -82,8 +83,9 @@ class StridedSliceOp : public OpKernel {
}
// Optimization #2, slice is memory contiguous (only occurs in dim 0)
- if (slice_dim0 && IsInnerDimsSizeAligned<T>(input.shape())) {
+ if (slice_dim0 && IsDim0SliceAligned<T>(input.shape(), begin[0], end[0])) {
CHECK_GE(input.dims(), 1); // Otherwise, is_identity should be true.
+ VLOG(1) << "Strided slice dim 0: " << input.shape().DebugString();
Tensor tmp;
CHECK(tmp.CopyFrom(input.Slice(begin[0], end[0]), final_shape));
context->set_output(0, tmp);