diff options
author | 2016-05-03 17:00:04 -0800 | |
---|---|---|
committer | 2016-05-03 18:01:30 -0700 | |
commit | 90caf0e179e82e3f014203e22faa055b0bb0f014 (patch) | |
tree | 6efee433b6b15c6d3d88174b060a864a335f1c09 /tensorflow/core/util/bcast.cc | |
parent | 0393436023d8fe7a2f98284420c58de6e461212a (diff) |
Allow BCast to disable the optimization of grouping consecutive dimensions.
Also move a few helper functions into the BCast class.
Before:
Benchmark Time(ns) CPU(ns) Iterations
------------------------------------------------
BM_BCastSetup/0 114 114 6062466 different_shapes
BM_BCastSetup/1 20 20 34863786 same_shapes
After:
Benchmark Time(ns) CPU(ns) Iterations
------------------------------------------------
BM_BCastSetup/0 126 126 5562646 different_shapes
BM_BCastSetup/1 32 32 21627499 same_shapes
The performance hit likely stems from 1-2 branch mispredictions (~7ns per misprediction).
Change: 121437477
Diffstat (limited to 'tensorflow/core/util/bcast.cc')
-rw-r--r-- | tensorflow/core/util/bcast.cc | 39 |
1 files changed, 30 insertions, 9 deletions
diff --git a/tensorflow/core/util/bcast.cc b/tensorflow/core/util/bcast.cc index c045ee902b..d49512819c 100644 --- a/tensorflow/core/util/bcast.cc +++ b/tensorflow/core/util/bcast.cc @@ -21,29 +21,29 @@ namespace tensorflow { /* static */ void BCast::Reverse(Vec* shape) { std::reverse(shape->begin(), shape->end()); } -BCast::BCast(const Vec& sx, const Vec& sy) { - if (sx == sy) { +BCast::BCast(const Vec& sx, const Vec& sy, const bool fewer_dims_optimization) { + if (sx == sy && TF_PREDICT_TRUE(fewer_dims_optimization)) { // Fast path for common case of identical shapes for sx and sy int64 elements = 1; const int n = sx.size(); output_.resize(n); for (int i = 0; i < n; i++) { - int64 dim = sx[i]; + const int64 dim = sx[i]; elements *= dim; output_[i] = dim; } + result_.push_back(elements); x_reshape_.push_back(elements); y_reshape_.push_back(elements); x_bcast_.push_back(1); y_bcast_.push_back(1); - result_.push_back(elements); // grad_x_reduce_ and grad_y_reduce_ are left as empty } else { // Reverse the shape of x and y for convenience. // After the reverse, 0-th is the inner-most dimension. Vec x = sx; - Reverse(&x); Vec y = sy; + Reverse(&x); Reverse(&y); // 1-extend and align x and y so that they are the same size. @@ -108,11 +108,18 @@ BCast::BCast(const Vec& sx, const Vec& sy) { // Both side are 1s. grad_x_reduce_idx_.push_back(n - 1 - i); grad_y_reduce_idx_.push_back(n - 1 - i); + if (!TF_PREDICT_TRUE(fewer_dims_optimization)) { + result_.push_back(o_i); + x_reshape_.push_back(x_i); + x_bcast_.push_back(bx_i); + y_reshape_.push_back(y_i); + y_bcast_.push_back(by_i); + } continue; - } else if (prev == curr) { - // It is a run of the same cases (no broadcast, x broadcast to - // y, y broadcast to x). We can reshape the input so that fewer - // dimensions are involved in the intermediate computation. + } else if (TF_PREDICT_TRUE(fewer_dims_optimization) && prev == curr) { + // It is a run of the same cases(no broadcast, x broadcast to y, y + // broadcast to x). We can reshape the input so that fewer dimensions + // are involved in the intermediate computation. result_.back() *= o_i; x_reshape_.back() *= x_i; x_bcast_.back() *= bx_i; @@ -150,4 +157,18 @@ BCast::BCast(const Vec& sx, const Vec& sy) { } } +BCast::Vec BCast::FromShape(const TensorShape& shape) { + const int N = shape.dims(); + BCast::Vec ret(N); + for (int i = 0; i < N; ++i) { + ret[i] = shape.dim_size(i); + } + return ret; +} + +TensorShape BCast::ToShape(const BCast::Vec& vec) { + TensorShape shape(vec); + return shape; +} + } // end namespace tensorflow |