Allow BCast to disable the optimization of grouping consecutive dimensions.

Also move a few helper functions into the BCast class. Before: Benchmark Time(ns) CPU(ns) Iterations ------------------------------------------------ BM_BCastSetup/0 114 114 6062466 different_shapes BM_BCastSetup/1 20 20 34863786 same_shapes After: Benchmark Time(ns) CPU(ns) Iterations ------------------------------------------------ BM_BCastSetup/0 126 126 5562646 different_shapes BM_BCastSetup/1 32 32 21627499 same_shapes The performance hit likely stems from 1-2 branch mispredictions (~7ns per misprediction). Change: 121437477
author: Zongheng Yang <zongheng.y@gmail.com> 2016-05-03 17:00:04 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2016-05-03 18:01:30 -0700
commit: 90caf0e179e82e3f014203e22faa055b0bb0f014 (patch)
tree: 6efee433b6b15c6d3d88174b060a864a335f1c09 /tensorflow/core/util/bcast.cc
parent: 0393436023d8fe7a2f98284420c58de6e461212a (diff)
1 files changed, 30 insertions, 9 deletions
diff --git a/tensorflow/core/util/bcast.cc b/tensorflow/core/util/bcast.cc
index c045ee902b..d49512819c 100644
--- a/tensorflow/core/util/bcast.cc
+++ b/tensorflow/core/util/bcast.cc
@@ -21,29 +21,29 @@ namespace tensorflow {
 /* static */
 void BCast::Reverse(Vec* shape) { std::reverse(shape->begin(), shape->end()); }
 
-BCast::BCast(const Vec& sx, const Vec& sy) {
-  if (sx == sy) {
+BCast::BCast(const Vec& sx, const Vec& sy, const bool fewer_dims_optimization) {
+  if (sx == sy && TF_PREDICT_TRUE(fewer_dims_optimization)) {
     // Fast path for common case of identical shapes for sx and sy
     int64 elements = 1;
     const int n = sx.size();
     output_.resize(n);
     for (int i = 0; i < n; i++) {
-      int64 dim = sx[i];
+      const int64 dim = sx[i];
       elements *= dim;
       output_[i] = dim;
     }
+    result_.push_back(elements);
     x_reshape_.push_back(elements);
     y_reshape_.push_back(elements);
     x_bcast_.push_back(1);
     y_bcast_.push_back(1);
-    result_.push_back(elements);
     // grad_x_reduce_ and grad_y_reduce_ are left as empty
   } else {
     // Reverse the shape of x and y for convenience.
     // After the reverse, 0-th is the inner-most dimension.
     Vec x = sx;
-    Reverse(&x);
     Vec y = sy;
+    Reverse(&x);
     Reverse(&y);
 
     // 1-extend and align x and y so that they are the same size.
@@ -108,11 +108,18 @@ BCast::BCast(const Vec& sx, const Vec& sy) {
         // Both side are 1s.
         grad_x_reduce_idx_.push_back(n - 1 - i);
         grad_y_reduce_idx_.push_back(n - 1 - i);
+        if (!TF_PREDICT_TRUE(fewer_dims_optimization)) {
+          result_.push_back(o_i);
+          x_reshape_.push_back(x_i);
+          x_bcast_.push_back(bx_i);
+          y_reshape_.push_back(y_i);
+          y_bcast_.push_back(by_i);
+        }
         continue;
-      } else if (prev == curr) {
-        // It is a run of the same cases (no broadcast, x broadcast to
-        // y, y broadcast to x). We can reshape the input so that fewer
-        // dimensions are involved in the intermediate computation.
+      } else if (TF_PREDICT_TRUE(fewer_dims_optimization) && prev == curr) {
+        // It is a run of the same cases(no broadcast, x broadcast to y, y
+        // broadcast to x). We can reshape the input so that fewer dimensions
+        // are involved in the intermediate computation.
         result_.back() *= o_i;
         x_reshape_.back() *= x_i;
         x_bcast_.back() *= bx_i;
@@ -150,4 +157,18 @@ BCast::BCast(const Vec& sx, const Vec& sy) {
   }
 }
 
+BCast::Vec BCast::FromShape(const TensorShape& shape) {
+  const int N = shape.dims();
+  BCast::Vec ret(N);
+  for (int i = 0; i < N; ++i) {
+    ret[i] = shape.dim_size(i);
+  }
+  return ret;
+}
+
+TensorShape BCast::ToShape(const BCast::Vec& vec) {
+  TensorShape shape(vec);
+  return shape;
+}
+
 }  // end namespace tensorflow
author	Zongheng Yang <zongheng.y@gmail.com>	2016-05-03 17:00:04 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2016-05-03 18:01:30 -0700
commit	90caf0e179e82e3f014203e22faa055b0bb0f014 (patch)
tree	6efee433b6b15c6d3d88174b060a864a335f1c09 /tensorflow/core/util/bcast.cc
parent	0393436023d8fe7a2f98284420c58de6e461212a (diff)