aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc4
1 files changed, 3 insertions, 1 deletions
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
index abc739d181..ec7f3c75c4 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
@@ -123,10 +123,12 @@ void UpdateLaunchDimensions(const LaunchDimensions& launch_dims, Thunk* thunk,
llvm::ConstantInt* threads_per_block_ir_value = llvm::ConstantInt::get(
llvm::IntegerType::get(llvm_context, /*NumBits=*/32),
launch_dims.threads_per_block());
+ // Our launch bounds are exact, so we can specify them as reqntidx rather than
+ // maxntidx.
nvvm_annotations_node->addOperand(llvm::MDNode::get(
llvm_context,
{llvm::ConstantAsMetadata::get(ir_kernel),
- llvm::MDString::get(llvm_context, "maxntidx"),
+ llvm::MDString::get(llvm_context, "reqntidx"),
llvm::ConstantAsMetadata::get(threads_per_block_ir_value)}));
}
} // namespace