diff options
author | Adrian Kuegel <akuegel@google.com> | 2018-06-27 10:02:59 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2018-06-27 10:06:16 -0700 |
commit | 8e9784264d9df8ac59821008283aa9c76a3bf64b (patch) | |
tree | 5c6322e42f4a3169a59593b367e9fc16eb0ce155 /tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc | |
parent | 80bc59b99bca7f9bc167975bab1c295bc4793c9a (diff) |
Fix check whether there is more than one tile.
The previous check was checking the number of elements in a tile against
the number of elements in the input shape. This doesn't work if one dimension
of the tile is bigger than the input dimension, but the other dimension is smaller.
PiperOrigin-RevId: 202326635
Diffstat (limited to 'tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc')
-rw-r--r-- | tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc | 26 |
1 files changed, 15 insertions, 11 deletions
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc index fbd647f251..bdb9e77da4 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc @@ -1531,7 +1531,7 @@ Status IrEmitterUnnested::EmitRowReduction( // for (element_id_in_tile : range(x_tile_size)) { // int x = x_in_tiles * x_tile_size + element_id_in_tile; // if (x < width) - // partial_result = reducer(partial_result, input[z][y][z]); + // partial_result = reducer(partial_result, input[z][y][x]); // } // AtomicReducer(&output[y], partial_result); // } @@ -1585,10 +1585,11 @@ Status IrEmitterUnnested::EmitRowReduction( // for (int element_id_in_z_tile = 0; element_id_in_z_tile < z_tile_size; // ++element_id_in_z_tile) { // z = z_in_tiles * z_tile_size + element_id_in_z_tile; + // int tx = x; // for (int element_id_in_x_tile = 0; // element_id_in_x_tile < x_tile_size; - // ++element_id_in_x_tile, x += warpSize) { - // partial_result = Reducer(partial_result, input[z][y][x]); + // ++element_id_in_x_tile, tx += warpSize) { + // partial_result = Reducer(partial_result, input[z][y][tx]); // } // } // } else { @@ -1596,10 +1597,11 @@ Status IrEmitterUnnested::EmitRowReduction( // for (int element_id_in_z_tile = 0; element_id_in_z_tile < z_tile_size; // ++element_id_in_z_tile) { // z = z_in_tiles * z_tile_size + element_id_in_z_tile; + // int tx = x; // for (int element_id_in_x_tile = 0; element_id_in_x_tile < - // x_tile_size; ++element_id_in_tile, x += warpSize) { - // if (x < width) - // partial_result = Reducer(partial_result, input[z][y][x]); + // x_tile_size; ++element_id_in_tile, tx += warpSize) { + // if (tx < width) + // partial_result = Reducer(partial_result, input[z][y][tx]); // } // } // } @@ -1838,15 +1840,17 @@ Status IrEmitterUnnested::EmitRowReduction( reduce_output_shapes[i]), &ir_builder_), &ir_builder_, "output_element_address"); - if (x_tile_size * z_tile_size < depth * width) { - TF_RETURN_IF_ERROR(EmitAtomicOperationForNestedComputation( - *reducers[i], output_address, - partial_reduction_result_addresses[i])); - } else { + // We don't need to emit atomic operations if there is only one tile of + // results. 'depth' is the z dimension, 'width' is the x dimension. + if (z_tile_size >= depth && x_tile_size >= width) { TF_RETURN_IF_ERROR(EmitCallToNestedComputation( *reducers[i], {output_address, partial_reduction_result_addresses[i]}, output_address)); + } else { + TF_RETURN_IF_ERROR(EmitAtomicOperationForNestedComputation( + *reducers[i], output_address, + partial_reduction_result_addresses[i])); } } return Status::OK(); |