Sped up construction of BCast helper class for the very common case

where both shapes are the same by using much more straightforward code to achieve the same ultimate initialization of the various instance variables with simpler code. Added benchmark for this to bcast_test.cc. Speeds up the same_shape case by 65% (67 ns to 23 ns for a two-dimensional shape) without any real effect on the different shape case. Run on machine with (40 X 2801 MHz CPUs); 2016/01/28-11:12:26 CPU: Intel Ivybridge with HyperThreading (20 cores) dL1:32KB dL2:256KB dL3:25MB Benchmark Base (ns) New (ns) Improvement ------------------------------------------------------------------ BM_BCastSetup/0 122 122 +0.0% BM_BCastSetup/1 67 23 +65.7% Change: 113374076
author: A. Unique TensorFlower <nobody@tensorflow.org> 2016-01-29 10:20:54 -0800
committer: Vijay Vasudevan <vrv@google.com> 2016-01-29 20:15:44 -0800
commit: 938902bade73fb90ecc178a6c46cdf2b30d3a69a (patch)
tree: 788fb21a3e4104ef436d522036ec3872b5a20fbd /tensorflow
parent: 5dc0ab7565afc0f707adc628c2865233a9702cfa (diff)
3 files changed, 145 insertions, 103 deletions
diff --git a/tensorflow/core/util/bcast.cc b/tensorflow/core/util/bcast.cc
index 46cf2dfafb..c045ee902b 100644
--- a/tensorflow/core/util/bcast.cc
+++ b/tensorflow/core/util/bcast.cc
@@ -22,114 +22,132 @@ namespace tensorflow {
 void BCast::Reverse(Vec* shape) { std::reverse(shape->begin(), shape->end()); }
 
 BCast::BCast(const Vec& sx, const Vec& sy) {
-  // Reverse the shape of x and y for convenience.
-  // After the reverse, 0-th is the inner-most dimension.
-  Vec x = sx;
-  Reverse(&x);
-  Vec y = sy;
-  Reverse(&y);
-
-  // 1-extend and align x and y so that they are the same size.
-  if (x.size() > y.size()) {
-    y.resize(x.size(), 1);
+  if (sx == sy) {
+    // Fast path for common case of identical shapes for sx and sy
+    int64 elements = 1;
+    const int n = sx.size();
+    output_.resize(n);
+    for (int i = 0; i < n; i++) {
+      int64 dim = sx[i];
+      elements *= dim;
+      output_[i] = dim;
+    }
+    x_reshape_.push_back(elements);
+    y_reshape_.push_back(elements);
+    x_bcast_.push_back(1);
+    y_bcast_.push_back(1);
+    result_.push_back(elements);
+    // grad_x_reduce_ and grad_y_reduce_ are left as empty
   } else {
-    x.resize(y.size(), 1);
-  }
+    // Reverse the shape of x and y for convenience.
+    // After the reverse, 0-th is the inner-most dimension.
+    Vec x = sx;
+    Reverse(&x);
+    Vec y = sy;
+    Reverse(&y);
 
-  // Going through each dimension starting from the inner-most
-  // dimension, compares dimension of x and y. They are compatible if
-  // they are equal or either is 1.
-  enum State {
-    UNKNOWN,
-    SAME,
-    X_ONE,
-    Y_ONE,
-  };
-  State prev = UNKNOWN;
-  const int64 n = x.size();
-  for (int i = 0; i < n; ++i) {
-    // Output shape.
-    State curr = UNKNOWN;
-    const int64 x_i = x[i];  // i-th dimension of x.
-    CHECK_GE(x_i, 0);
-    const int64 y_i = y[i];  // i-th dimension of y.
-    CHECK_GE(y_i, 0);
-    int64 o_i;   // i-th dimension of the output.
-    int64 bx_i;  // i-th broadcast for x.
-    int64 by_i;  // i-th broadcast for y.
-    // Invariant:
-    //   o_i = x_i * bx_i = y_i * by_i
-    if (x_i == y_i) {
-      // No broadcast.
-      o_i = x_i;
-      bx_i = 1;
-      by_i = 1;
-      curr = SAME;
-    } else if (x_i == 1) {
-      // x broadcast to y on this dimension.
-      o_i = y_i;
-      bx_i = y_i;
-      by_i = 1;
-      grad_x_reduce_idx_.push_back(n - 1 - i);
-      curr = X_ONE;
-    } else if (y_i == 1) {
-      // y broadcast to x on this dimension.
-      o_i = x_i;
-      bx_i = 1;
-      by_i = x_i;
-      grad_y_reduce_idx_.push_back(n - 1 - i);
-      curr = Y_ONE;
+    // 1-extend and align x and y so that they are the same size.
+    if (x.size() > y.size()) {
+      y.resize(x.size(), 1);
     } else {
-      valid_ = false;
-      return;
+      x.resize(y.size(), 1);
     }
-    output_.push_back(o_i);
-    // Reshape/broadcast.
-    // Invariant:
-    //  result[i] == x_reshape[i] * x_bcast[i] == y_reshape_[i] * y_bcast_[i]
-    if (curr == SAME && x_i == 1) {
-      // Both side are 1s.
-      grad_x_reduce_idx_.push_back(n - 1 - i);
-      grad_y_reduce_idx_.push_back(n - 1 - i);
-      continue;
-    } else if (prev == curr) {
-      // It is a run of the same cases (no broadcast, x broadcast to
-      // y, y broadcast to x). We can reshape the input so that fewer
-      // dimensions are involved in the intermediate computation.
-      result_.back() *= o_i;
-      x_reshape_.back() *= x_i;
-      x_bcast_.back() *= bx_i;
-      y_reshape_.back() *= y_i;
-      y_bcast_.back() *= by_i;
-    } else {
-      result_.push_back(o_i);
-      x_reshape_.push_back(x_i);
-      x_bcast_.push_back(bx_i);
-      y_reshape_.push_back(y_i);
-      y_bcast_.push_back(by_i);
+
+    // Going through each dimension starting from the inner-most
+    // dimension, compares dimension of x and y. They are compatible if
+    // they are equal or either is 1.
+    enum State {
+      UNKNOWN,
+      SAME,
+      X_ONE,
+      Y_ONE,
+    };
+    State prev = UNKNOWN;
+    const int64 n = x.size();
+    for (int i = 0; i < n; ++i) {
+      // Output shape.
+      State curr = UNKNOWN;
+      const int64 x_i = x[i];  // i-th dimension of x.
+      CHECK_GE(x_i, 0);
+      const int64 y_i = y[i];  // i-th dimension of y.
+      CHECK_GE(y_i, 0);
+      int64 o_i;   // i-th dimension of the output.
+      int64 bx_i;  // i-th broadcast for x.
+      int64 by_i;  // i-th broadcast for y.
+      // Invariant:
+      //   o_i = x_i * bx_i = y_i * by_i
+      if (x_i == y_i) {
+        // No broadcast.
+        o_i = x_i;
+        bx_i = 1;
+        by_i = 1;
+        curr = SAME;
+      } else if (x_i == 1) {
+        // x broadcast to y on this dimension.
+        o_i = y_i;
+        bx_i = y_i;
+        by_i = 1;
+        grad_x_reduce_idx_.push_back(n - 1 - i);
+        curr = X_ONE;
+      } else if (y_i == 1) {
+        // y broadcast to x on this dimension.
+        o_i = x_i;
+        bx_i = 1;
+        by_i = x_i;
+        grad_y_reduce_idx_.push_back(n - 1 - i);
+        curr = Y_ONE;
+      } else {
+        valid_ = false;
+        return;
+      }
+      output_.push_back(o_i);
+      // Reshape/broadcast.
+      // Invariant:
+      //  result[i] == x_reshape[i] * x_bcast[i] == y_reshape_[i] * y_bcast_[i]
+      if (curr == SAME && x_i == 1) {
+        // Both side are 1s.
+        grad_x_reduce_idx_.push_back(n - 1 - i);
+        grad_y_reduce_idx_.push_back(n - 1 - i);
+        continue;
+      } else if (prev == curr) {
+        // It is a run of the same cases (no broadcast, x broadcast to
+        // y, y broadcast to x). We can reshape the input so that fewer
+        // dimensions are involved in the intermediate computation.
+        result_.back() *= o_i;
+        x_reshape_.back() *= x_i;
+        x_bcast_.back() *= bx_i;
+        y_reshape_.back() *= y_i;
+        y_bcast_.back() *= by_i;
+      } else {
+        result_.push_back(o_i);
+        x_reshape_.push_back(x_i);
+        x_bcast_.push_back(bx_i);
+        y_reshape_.push_back(y_i);
+        y_bcast_.push_back(by_i);
+      }
+      prev = curr;
     }
-    prev = curr;
-  }
 
-  if (result_.empty()) {
-    // Can happen when both x and y are effectively scalar.
-    result_.push_back(1);
-    x_reshape_.push_back(1);
-    x_bcast_.push_back(1);
-    y_reshape_.push_back(1);
-    y_bcast_.push_back(1);
-  }
+    if (result_.empty()) {
+      // Can happen when both x and y are effectively scalar.
+      result_.push_back(1);
+      x_reshape_.push_back(1);
+      x_bcast_.push_back(1);
+      y_reshape_.push_back(1);
+      y_bcast_.push_back(1);
+    }
 
-  // Reverse all vectors since x and y were reversed at very
-  // beginning.
-  Reverse(&x_reshape_);
-  Reverse(&x_bcast_);
-  Reverse(&y_reshape_);
-  Reverse(&y_bcast_);
-  Reverse(&result_);
-  Reverse(&output_);
-  Reverse(&grad_x_reduce_idx_);
-  Reverse(&grad_y_reduce_idx_);
+    // Reverse all vectors since x and y were reversed at very
+    // beginning.
+    Reverse(&x_reshape_);
+    Reverse(&x_bcast_);
+    Reverse(&y_reshape_);
+    Reverse(&y_bcast_);
+    Reverse(&result_);
+    Reverse(&output_);
+    Reverse(&grad_x_reduce_idx_);
+    Reverse(&grad_y_reduce_idx_);
+  }
 }
 
 }  // end namespace tensorflow
diff --git a/tensorflow/core/util/bcast.h b/tensorflow/core/util/bcast.h
index 1d01ce809d..1eeb97778f 100644
--- a/tensorflow/core/util/bcast.h
+++ b/tensorflow/core/util/bcast.h
@@ -67,7 +67,7 @@ namespace tensorflow {
 // TODO(zhifengc): Adds support for n-ary (n >= 2).
 class BCast {
  public:
-  // A vector of int32 representing the shape of tensor. The 0-th
+  // A vector of int64 representing the shape of tensor. The 0-th
   // element is the outer-most dimension and the last element is the
   // inner-most dimension. Note that we do not use TensorShape since
   // it's more convenient to manipulate Vec directly for this module.
@@ -104,7 +104,6 @@ class BCast {
   Vec grad_y_reduce_idx_;
 
   static void Reverse(Vec* shape);
-  static bool HasZero(const Vec& shape);
 
   TF_DISALLOW_COPY_AND_ASSIGN(BCast);
 };
diff --git a/tensorflow/core/util/bcast_test.cc b/tensorflow/core/util/bcast_test.cc
index f2c0950975..a9602e6db8 100644
--- a/tensorflow/core/util/bcast_test.cc
+++ b/tensorflow/core/util/bcast_test.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/test_benchmark.h"
 
 namespace tensorflow {
 namespace {
@@ -57,6 +58,15 @@ TEST(BCastTest, Basic_SameShape) {
             "[][]");
 }
 
+TEST(BCastTest, Basic_SameShapeWithZeroDim) {
+  // Effectively no broadcast needed.
+  EXPECT_EQ(BCast({11, 7, 0, 3, 2}, {11, 7, 0, 3, 2}),
+            "[0][1][0][1]"
+            "[0]"
+            "[11,7,0,3,2]"
+            "[][]");
+}
+
 TEST(BCastTest, Basic_Scalar_Scalar) {
   // Effectively it's a scalar and a scalar.
   // [1, 1] [1]
@@ -237,5 +247,20 @@ TEST(BCastTest, TestZeroDimensionShape) {
             "[0,1,3][]");
 }
 
+static void BM_BCastSetup(int iters, int same_shape) {
+  if (same_shape) {
+    testing::SetLabel("same_shapes");
+    while (--iters > 0) {
+      class BCast b({1000, 100}, {1000, 100});
+    }
+  } else {
+    testing::SetLabel("different_shapes");
+    while (--iters > 0) {
+      class BCast b({3, 1, 5}, {2, 0, 3, 0, 5});
+    }
+  }
+}
+BENCHMARK(BM_BCastSetup)->Arg(0)->Arg(1);
+
 }  // namespace
 }  // namespace tensorflow
author	A. Unique TensorFlower <nobody@tensorflow.org>	2016-01-29 10:20:54 -0800
committer	Vijay Vasudevan <vrv@google.com>	2016-01-29 20:15:44 -0800
commit	938902bade73fb90ecc178a6c46cdf2b30d3a69a (patch)
tree	788fb21a3e4104ef436d522036ec3872b5a20fbd /tensorflow
parent	5dc0ab7565afc0f707adc628c2865233a9702cfa (diff)