Sped up construction of BCast helper class for the very common case

where both shapes are the same by using much more straightforward code to achieve the same ultimate initialization of the various instance variables with simpler code. Added benchmark for this to bcast_test.cc. Speeds up the same_shape case by 65% (67 ns to 23 ns for a two-dimensional shape) without any real effect on the different shape case. Run on machine with (40 X 2801 MHz CPUs); 2016/01/28-11:12:26 CPU: Intel Ivybridge with HyperThreading (20 cores) dL1:32KB dL2:256KB dL3:25MB Benchmark Base (ns) New (ns) Improvement ------------------------------------------------------------------ BM_BCastSetup/0 122 122 +0.0% BM_BCastSetup/1 67 23 +65.7% Change: 113374076
author: A. Unique TensorFlower <nobody@tensorflow.org> 2016-01-29 10:20:54 -0800
committer: Vijay Vasudevan <vrv@google.com> 2016-01-29 20:15:44 -0800
commit: 938902bade73fb90ecc178a6c46cdf2b30d3a69a (patch)
tree: 788fb21a3e4104ef436d522036ec3872b5a20fbd /tensorflow/core/util/bcast.cc
parent: 5dc0ab7565afc0f707adc628c2865233a9702cfa (diff)
1 files changed, 119 insertions, 101 deletions
diff --git a/tensorflow/core/util/bcast.cc b/tensorflow/core/util/bcast.cc
index 46cf2dfafb..c045ee902b 100644
--- a/tensorflow/core/util/bcast.cc
+++ b/tensorflow/core/util/bcast.cc
@@ -22,114 +22,132 @@ namespace tensorflow {
 void BCast::Reverse(Vec* shape) { std::reverse(shape->begin(), shape->end()); }
 
 BCast::BCast(const Vec& sx, const Vec& sy) {
-  // Reverse the shape of x and y for convenience.
-  // After the reverse, 0-th is the inner-most dimension.
-  Vec x = sx;
-  Reverse(&x);
-  Vec y = sy;
-  Reverse(&y);
-
-  // 1-extend and align x and y so that they are the same size.
-  if (x.size() > y.size()) {
-    y.resize(x.size(), 1);
+  if (sx == sy) {
+    // Fast path for common case of identical shapes for sx and sy
+    int64 elements = 1;
+    const int n = sx.size();
+    output_.resize(n);
+    for (int i = 0; i < n; i++) {
+      int64 dim = sx[i];
+      elements *= dim;
+      output_[i] = dim;
+    }
+    x_reshape_.push_back(elements);
+    y_reshape_.push_back(elements);
+    x_bcast_.push_back(1);
+    y_bcast_.push_back(1);
+    result_.push_back(elements);
+    // grad_x_reduce_ and grad_y_reduce_ are left as empty
   } else {
-    x.resize(y.size(), 1);
-  }
+    // Reverse the shape of x and y for convenience.
+    // After the reverse, 0-th is the inner-most dimension.
+    Vec x = sx;
+    Reverse(&x);
+    Vec y = sy;
+    Reverse(&y);
 
-  // Going through each dimension starting from the inner-most
-  // dimension, compares dimension of x and y. They are compatible if
-  // they are equal or either is 1.
-  enum State {
-    UNKNOWN,
-    SAME,
-    X_ONE,
-    Y_ONE,
-  };
-  State prev = UNKNOWN;
-  const int64 n = x.size();
-  for (int i = 0; i < n; ++i) {
-    // Output shape.
-    State curr = UNKNOWN;
-    const int64 x_i = x[i];  // i-th dimension of x.
-    CHECK_GE(x_i, 0);
-    const int64 y_i = y[i];  // i-th dimension of y.
-    CHECK_GE(y_i, 0);
-    int64 o_i;   // i-th dimension of the output.
-    int64 bx_i;  // i-th broadcast for x.
-    int64 by_i;  // i-th broadcast for y.
-    // Invariant:
-    //   o_i = x_i * bx_i = y_i * by_i
-    if (x_i == y_i) {
-      // No broadcast.
-      o_i = x_i;
-      bx_i = 1;
-      by_i = 1;
-      curr = SAME;
-    } else if (x_i == 1) {
-      // x broadcast to y on this dimension.
-      o_i = y_i;
-      bx_i = y_i;
-      by_i = 1;
-      grad_x_reduce_idx_.push_back(n - 1 - i);
-      curr = X_ONE;
-    } else if (y_i == 1) {
-      // y broadcast to x on this dimension.
-      o_i = x_i;
-      bx_i = 1;
-      by_i = x_i;
-      grad_y_reduce_idx_.push_back(n - 1 - i);
-      curr = Y_ONE;
+    // 1-extend and align x and y so that they are the same size.
+    if (x.size() > y.size()) {
+      y.resize(x.size(), 1);
     } else {
-      valid_ = false;
-      return;
+      x.resize(y.size(), 1);
     }
-    output_.push_back(o_i);
-    // Reshape/broadcast.
-    // Invariant:
-    //  result[i] == x_reshape[i] * x_bcast[i] == y_reshape_[i] * y_bcast_[i]
-    if (curr == SAME && x_i == 1) {
-      // Both side are 1s.
-      grad_x_reduce_idx_.push_back(n - 1 - i);
-      grad_y_reduce_idx_.push_back(n - 1 - i);
-      continue;
-    } else if (prev == curr) {
-      // It is a run of the same cases (no broadcast, x broadcast to
-      // y, y broadcast to x). We can reshape the input so that fewer
-      // dimensions are involved in the intermediate computation.
-      result_.back() *= o_i;
-      x_reshape_.back() *= x_i;
-      x_bcast_.back() *= bx_i;
-      y_reshape_.back() *= y_i;
-      y_bcast_.back() *= by_i;
-    } else {
-      result_.push_back(o_i);
-      x_reshape_.push_back(x_i);
-      x_bcast_.push_back(bx_i);
-      y_reshape_.push_back(y_i);
-      y_bcast_.push_back(by_i);
+
+    // Going through each dimension starting from the inner-most
+    // dimension, compares dimension of x and y. They are compatible if
+    // they are equal or either is 1.
+    enum State {
+      UNKNOWN,
+      SAME,
+      X_ONE,
+      Y_ONE,
+    };
+    State prev = UNKNOWN;
+    const int64 n = x.size();
+    for (int i = 0; i < n; ++i) {
+      // Output shape.
+      State curr = UNKNOWN;
+      const int64 x_i = x[i];  // i-th dimension of x.
+      CHECK_GE(x_i, 0);
+      const int64 y_i = y[i];  // i-th dimension of y.
+      CHECK_GE(y_i, 0);
+      int64 o_i;   // i-th dimension of the output.
+      int64 bx_i;  // i-th broadcast for x.
+      int64 by_i;  // i-th broadcast for y.
+      // Invariant:
+      //   o_i = x_i * bx_i = y_i * by_i
+      if (x_i == y_i) {
+        // No broadcast.
+        o_i = x_i;
+        bx_i = 1;
+        by_i = 1;
+        curr = SAME;
+      } else if (x_i == 1) {
+        // x broadcast to y on this dimension.
+        o_i = y_i;
+        bx_i = y_i;
+        by_i = 1;
+        grad_x_reduce_idx_.push_back(n - 1 - i);
+        curr = X_ONE;
+      } else if (y_i == 1) {
+        // y broadcast to x on this dimension.
+        o_i = x_i;
+        bx_i = 1;
+        by_i = x_i;
+        grad_y_reduce_idx_.push_back(n - 1 - i);
+        curr = Y_ONE;
+      } else {
+        valid_ = false;
+        return;
+      }
+      output_.push_back(o_i);
+      // Reshape/broadcast.
+      // Invariant:
+      //  result[i] == x_reshape[i] * x_bcast[i] == y_reshape_[i] * y_bcast_[i]
+      if (curr == SAME && x_i == 1) {
+        // Both side are 1s.
+        grad_x_reduce_idx_.push_back(n - 1 - i);
+        grad_y_reduce_idx_.push_back(n - 1 - i);
+        continue;
+      } else if (prev == curr) {
+        // It is a run of the same cases (no broadcast, x broadcast to
+        // y, y broadcast to x). We can reshape the input so that fewer
+        // dimensions are involved in the intermediate computation.
+        result_.back() *= o_i;
+        x_reshape_.back() *= x_i;
+        x_bcast_.back() *= bx_i;
+        y_reshape_.back() *= y_i;
+        y_bcast_.back() *= by_i;
+      } else {
+        result_.push_back(o_i);
+        x_reshape_.push_back(x_i);
+        x_bcast_.push_back(bx_i);
+        y_reshape_.push_back(y_i);
+        y_bcast_.push_back(by_i);
+      }
+      prev = curr;
     }
-    prev = curr;
-  }
 
-  if (result_.empty()) {
-    // Can happen when both x and y are effectively scalar.
-    result_.push_back(1);
-    x_reshape_.push_back(1);
-    x_bcast_.push_back(1);
-    y_reshape_.push_back(1);
-    y_bcast_.push_back(1);
-  }
+    if (result_.empty()) {
+      // Can happen when both x and y are effectively scalar.
+      result_.push_back(1);
+      x_reshape_.push_back(1);
+      x_bcast_.push_back(1);
+      y_reshape_.push_back(1);
+      y_bcast_.push_back(1);
+    }
 
-  // Reverse all vectors since x and y were reversed at very
-  // beginning.
-  Reverse(&x_reshape_);
-  Reverse(&x_bcast_);
-  Reverse(&y_reshape_);
-  Reverse(&y_bcast_);
-  Reverse(&result_);
-  Reverse(&output_);
-  Reverse(&grad_x_reduce_idx_);
-  Reverse(&grad_y_reduce_idx_);
+    // Reverse all vectors since x and y were reversed at very
+    // beginning.
+    Reverse(&x_reshape_);
+    Reverse(&x_bcast_);
+    Reverse(&y_reshape_);
+    Reverse(&y_bcast_);
+    Reverse(&result_);
+    Reverse(&output_);
+    Reverse(&grad_x_reduce_idx_);
+    Reverse(&grad_y_reduce_idx_);
+  }
 }
 
 }  // end namespace tensorflow
author	A. Unique TensorFlower <nobody@tensorflow.org>	2016-01-29 10:20:54 -0800
committer	Vijay Vasudevan <vrv@google.com>	2016-01-29 20:15:44 -0800
commit	938902bade73fb90ecc178a6c46cdf2b30d3a69a (patch)
tree	788fb21a3e4104ef436d522036ec3872b5a20fbd /tensorflow/core/util/bcast.cc
parent	5dc0ab7565afc0f707adc628c2865233a9702cfa (diff)