aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/util/bcast.cc
diff options
context:
space:
mode:
authorGravatar Zongheng Yang <zongheng.y@gmail.com>2016-05-03 17:00:04 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-05-03 18:01:30 -0700
commit90caf0e179e82e3f014203e22faa055b0bb0f014 (patch)
tree6efee433b6b15c6d3d88174b060a864a335f1c09 /tensorflow/core/util/bcast.cc
parent0393436023d8fe7a2f98284420c58de6e461212a (diff)
Allow BCast to disable the optimization of grouping consecutive dimensions.
Also move a few helper functions into the BCast class. Before: Benchmark Time(ns) CPU(ns) Iterations ------------------------------------------------ BM_BCastSetup/0 114 114 6062466 different_shapes BM_BCastSetup/1 20 20 34863786 same_shapes After: Benchmark Time(ns) CPU(ns) Iterations ------------------------------------------------ BM_BCastSetup/0 126 126 5562646 different_shapes BM_BCastSetup/1 32 32 21627499 same_shapes The performance hit likely stems from 1-2 branch mispredictions (~7ns per misprediction). Change: 121437477
Diffstat (limited to 'tensorflow/core/util/bcast.cc')
-rw-r--r--tensorflow/core/util/bcast.cc39
1 files changed, 30 insertions, 9 deletions
diff --git a/tensorflow/core/util/bcast.cc b/tensorflow/core/util/bcast.cc
index c045ee902b..d49512819c 100644
--- a/tensorflow/core/util/bcast.cc
+++ b/tensorflow/core/util/bcast.cc
@@ -21,29 +21,29 @@ namespace tensorflow {
/* static */
void BCast::Reverse(Vec* shape) { std::reverse(shape->begin(), shape->end()); }
-BCast::BCast(const Vec& sx, const Vec& sy) {
- if (sx == sy) {
+BCast::BCast(const Vec& sx, const Vec& sy, const bool fewer_dims_optimization) {
+ if (sx == sy && TF_PREDICT_TRUE(fewer_dims_optimization)) {
// Fast path for common case of identical shapes for sx and sy
int64 elements = 1;
const int n = sx.size();
output_.resize(n);
for (int i = 0; i < n; i++) {
- int64 dim = sx[i];
+ const int64 dim = sx[i];
elements *= dim;
output_[i] = dim;
}
+ result_.push_back(elements);
x_reshape_.push_back(elements);
y_reshape_.push_back(elements);
x_bcast_.push_back(1);
y_bcast_.push_back(1);
- result_.push_back(elements);
// grad_x_reduce_ and grad_y_reduce_ are left as empty
} else {
// Reverse the shape of x and y for convenience.
// After the reverse, 0-th is the inner-most dimension.
Vec x = sx;
- Reverse(&x);
Vec y = sy;
+ Reverse(&x);
Reverse(&y);
// 1-extend and align x and y so that they are the same size.
@@ -108,11 +108,18 @@ BCast::BCast(const Vec& sx, const Vec& sy) {
// Both side are 1s.
grad_x_reduce_idx_.push_back(n - 1 - i);
grad_y_reduce_idx_.push_back(n - 1 - i);
+ if (!TF_PREDICT_TRUE(fewer_dims_optimization)) {
+ result_.push_back(o_i);
+ x_reshape_.push_back(x_i);
+ x_bcast_.push_back(bx_i);
+ y_reshape_.push_back(y_i);
+ y_bcast_.push_back(by_i);
+ }
continue;
- } else if (prev == curr) {
- // It is a run of the same cases (no broadcast, x broadcast to
- // y, y broadcast to x). We can reshape the input so that fewer
- // dimensions are involved in the intermediate computation.
+ } else if (TF_PREDICT_TRUE(fewer_dims_optimization) && prev == curr) {
+ // It is a run of the same cases(no broadcast, x broadcast to y, y
+ // broadcast to x). We can reshape the input so that fewer dimensions
+ // are involved in the intermediate computation.
result_.back() *= o_i;
x_reshape_.back() *= x_i;
x_bcast_.back() *= bx_i;
@@ -150,4 +157,18 @@ BCast::BCast(const Vec& sx, const Vec& sy) {
}
}
+BCast::Vec BCast::FromShape(const TensorShape& shape) {
+ const int N = shape.dims();
+ BCast::Vec ret(N);
+ for (int i = 0; i < N; ++i) {
+ ret[i] = shape.dim_size(i);
+ }
+ return ret;
+}
+
+TensorShape BCast::ToShape(const BCast::Vec& vec) {
+ TensorShape shape(vec);
+ return shape;
+}
+
} // end namespace tensorflow