aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
diff options
context:
space:
mode:
authorGravatar Adrian Kuegel <akuegel@google.com>2018-06-27 10:02:59 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-06-27 10:06:16 -0700
commit8e9784264d9df8ac59821008283aa9c76a3bf64b (patch)
tree5c6322e42f4a3169a59593b367e9fc16eb0ce155 /tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
parent80bc59b99bca7f9bc167975bab1c295bc4793c9a (diff)
Fix check whether there is more than one tile.
The previous check was checking the number of elements in a tile against the number of elements in the input shape. This doesn't work if one dimension of the tile is bigger than the input dimension, but the other dimension is smaller. PiperOrigin-RevId: 202326635
Diffstat (limited to 'tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc')
-rw-r--r--tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc26
1 files changed, 15 insertions, 11 deletions
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
index fbd647f251..bdb9e77da4 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
@@ -1531,7 +1531,7 @@ Status IrEmitterUnnested::EmitRowReduction(
// for (element_id_in_tile : range(x_tile_size)) {
// int x = x_in_tiles * x_tile_size + element_id_in_tile;
// if (x < width)
- // partial_result = reducer(partial_result, input[z][y][z]);
+ // partial_result = reducer(partial_result, input[z][y][x]);
// }
// AtomicReducer(&output[y], partial_result);
// }
@@ -1585,10 +1585,11 @@ Status IrEmitterUnnested::EmitRowReduction(
// for (int element_id_in_z_tile = 0; element_id_in_z_tile < z_tile_size;
// ++element_id_in_z_tile) {
// z = z_in_tiles * z_tile_size + element_id_in_z_tile;
+ // int tx = x;
// for (int element_id_in_x_tile = 0;
// element_id_in_x_tile < x_tile_size;
- // ++element_id_in_x_tile, x += warpSize) {
- // partial_result = Reducer(partial_result, input[z][y][x]);
+ // ++element_id_in_x_tile, tx += warpSize) {
+ // partial_result = Reducer(partial_result, input[z][y][tx]);
// }
// }
// } else {
@@ -1596,10 +1597,11 @@ Status IrEmitterUnnested::EmitRowReduction(
// for (int element_id_in_z_tile = 0; element_id_in_z_tile < z_tile_size;
// ++element_id_in_z_tile) {
// z = z_in_tiles * z_tile_size + element_id_in_z_tile;
+ // int tx = x;
// for (int element_id_in_x_tile = 0; element_id_in_x_tile <
- // x_tile_size; ++element_id_in_tile, x += warpSize) {
- // if (x < width)
- // partial_result = Reducer(partial_result, input[z][y][x]);
+ // x_tile_size; ++element_id_in_tile, tx += warpSize) {
+ // if (tx < width)
+ // partial_result = Reducer(partial_result, input[z][y][tx]);
// }
// }
// }
@@ -1838,15 +1840,17 @@ Status IrEmitterUnnested::EmitRowReduction(
reduce_output_shapes[i]),
&ir_builder_),
&ir_builder_, "output_element_address");
- if (x_tile_size * z_tile_size < depth * width) {
- TF_RETURN_IF_ERROR(EmitAtomicOperationForNestedComputation(
- *reducers[i], output_address,
- partial_reduction_result_addresses[i]));
- } else {
+ // We don't need to emit atomic operations if there is only one tile of
+ // results. 'depth' is the z dimension, 'width' is the x dimension.
+ if (z_tile_size >= depth && x_tile_size >= width) {
TF_RETURN_IF_ERROR(EmitCallToNestedComputation(
*reducers[i],
{output_address, partial_reduction_result_addresses[i]},
output_address));
+ } else {
+ TF_RETURN_IF_ERROR(EmitAtomicOperationForNestedComputation(
+ *reducers[i], output_address,
+ partial_reduction_result_addresses[i]));
}
}
return Status::OK();