aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Sanjoy Das <sanjoy@google.com>2018-07-27 13:24:46 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-07-27 13:28:38 -0700
commit388d0d860110a19a9d133fe4de85f8f6fa060cde (patch)
treece12ca4a32266f4a4bc5101e65b0123f810f1373
parent90fe37ab8d056c56a5127e9b7ae237c04a7907ec (diff)
Use constant buffer allocations for XLA:CPU
This is simpler than the corresponding change to XLA:GPU because on XLA:CPU all instructions are codegened so we can always embed a pointer to the constant global variable directly in the generated LLVM IR. PiperOrigin-RevId: 206363887
-rw-r--r--tensorflow/compiler/xla/service/cpu/BUILD1
-rw-r--r--tensorflow/compiler/xla/service/cpu/cpu_compiler.cc16
-rw-r--r--tensorflow/compiler/xla/service/cpu/cpu_executable.cc5
-rw-r--r--tensorflow/compiler/xla/service/cpu/ir_emitter.cc44
-rw-r--r--tensorflow/compiler/xla/service/cpu/ir_emitter.h6
-rw-r--r--tensorflow/compiler/xla/service/gpu/BUILD4
-rw-r--r--tensorflow/compiler/xla/service/gpu/buffer_allocations.cc39
-rw-r--r--tensorflow/compiler/xla/service/gpu/buffer_allocations.h9
-rw-r--r--tensorflow/compiler/xla/service/gpu/gpu_executable.cc11
-rw-r--r--tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc4
-rw-r--r--tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc10
-rw-r--r--tensorflow/compiler/xla/service/llvm_ir/BUILD9
-rw-r--r--tensorflow/compiler/xla/service/llvm_ir/alias_analysis_test.cc2
-rw-r--r--tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.cc59
-rw-r--r--tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.h34
-rw-r--r--tensorflow/compiler/xla/tests/local_client_aot_test_helper.cc3
16 files changed, 179 insertions, 77 deletions
diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD
index 6c997a068d..504b61d134 100644
--- a/tensorflow/compiler/xla/service/cpu/BUILD
+++ b/tensorflow/compiler/xla/service/cpu/BUILD
@@ -252,6 +252,7 @@ cc_library(
"//tensorflow/compiler/xla/service:hlo_module_config",
"//tensorflow/compiler/xla/service:name_uniquer",
"//tensorflow/compiler/xla/service/llvm_ir:alias_analysis",
+ "//tensorflow/compiler/xla/service/llvm_ir:buffer_assignment_util",
"//tensorflow/compiler/xla/service/llvm_ir:dynamic_update_slice_util",
"//tensorflow/compiler/xla/service/llvm_ir:fused_ir_emitter",
"//tensorflow/compiler/xla/service/llvm_ir:ir_array",
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index 29fa29d33a..b49ea89896 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -562,7 +562,9 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
BufferAssigner::Run(
module.get(),
xla::MakeUnique<SequentialHloOrdering>(module.get(), module_sequence),
- BufferSizeBytesFunction(), memory_alignment));
+ BufferSizeBytesFunction(), memory_alignment,
+ /*allow_input_output_aliasing=*/false,
+ /*allocate_buffers_for_constants=*/true));
// BufferAssignment::ToString() includes a header, so no need for us to
// print one ourselves.
XLA_VLOG_LINES(2, assignment->ToString());
@@ -584,6 +586,8 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
std::move(computation_to_profile_idx),
&target_machine_features);
+ TF_RETURN_IF_ERROR(ir_emitter.EmitConstantGlobals());
+
for (auto embedded_computation :
entry_computation->MakeEmbeddedComputationsList()) {
if (embedded_computation->IsFusionComputation()) {
@@ -747,7 +751,9 @@ CpuCompiler::CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> modules,
BufferAssigner::Run(
module,
xla::MakeUnique<SequentialHloOrdering>(module, module_sequence),
- BufferSizeBytesFunction(), memory_alignment));
+ BufferSizeBytesFunction(), memory_alignment,
+ /*allow_input_output_aliasing=*/false,
+ /*allocate_buffers_for_constants=*/true));
// BufferAssignment::ToString() includes a header, so no need for us to
// print one ourselves.
XLA_VLOG_LINES(2, assignment->ToString());
@@ -776,6 +782,9 @@ CpuCompiler::CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> modules,
std::move(instruction_to_profile_idx),
std::move(computation_to_profile_idx),
&target_machine_features);
+
+ TF_RETURN_IF_ERROR(ir_emitter.EmitConstantGlobals());
+
HloComputation* computation = module->entry_computation();
for (auto embedded_computation :
computation->MakeEmbeddedComputationsList()) {
@@ -832,7 +841,8 @@ CpuCompiler::CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> modules,
BufferSizes buffer_sizes;
for (const BufferAllocation& allocation : assignment->Allocations()) {
// Callers don't need to allocate temporary buffers for parameters.
- if (allocation.is_entry_computation_parameter()) {
+ if (allocation.is_entry_computation_parameter() ||
+ allocation.is_constant()) {
buffer_sizes.push_back(-1);
continue;
}
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc
index 1093559892..81e17a5cd4 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc
@@ -88,6 +88,11 @@ Status CpuExecutable::AllocateBuffers(
continue;
}
+ if (allocation.is_constant()) {
+ VLOG(3) << "allocation #" << i << " is a constant";
+ continue;
+ }
+
if (allocation.is_thread_local()) {
VLOG(3) << "buffer #" << i << " is thread-local";
continue;
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index 9d9d3e04a9..a6d8551841 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -51,6 +51,7 @@ limitations under the License.
#include "tensorflow/compiler/xla/service/hlo_casting_utils.h"
#include "tensorflow/compiler/xla/service/hlo_instructions.h"
#include "tensorflow/compiler/xla/service/hlo_opcode.h"
+#include "tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.h"
#include "tensorflow/compiler/xla/service/llvm_ir/dynamic_update_slice_util.h"
#include "tensorflow/compiler/xla/service/llvm_ir/fused_ir_emitter.h"
#include "tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h"
@@ -175,25 +176,36 @@ llvm::Constant* IrEmitter::EmitGlobalForLiteral(const Literal& literal) {
result_global, IrShapeType(literal.shape())->getPointerTo());
}
-Status IrEmitter::HandleConstant(HloInstruction* constant) {
- VLOG(2) << "HandleConstant: " << constant->ToString();
- const Literal& literal = constant->literal();
- llvm::Constant* global_for_const;
+Status IrEmitter::EmitConstantGlobals() {
+ for (const BufferAllocation& allocation : assignment_.Allocations()) {
+ if (!allocation.is_constant()) {
+ continue;
+ }
- auto it = emitted_literals_.find(&literal);
- if (it != emitted_literals_.end()) {
- global_for_const = it->second;
- } else {
- global_for_const = EmitGlobalForLiteral(literal);
- emitted_literals_[&literal] = global_for_const;
+ const Literal& literal = llvm_ir::LiteralForConstantAllocation(allocation);
+ llvm::Constant* global_for_const;
+ auto it = emitted_literals_.find(&literal);
+ if (it != emitted_literals_.end()) {
+ global_for_const = it->second;
+ } else {
+ global_for_const = EmitGlobalForLiteral(literal);
+ InsertOrDie(&emitted_literals_, &literal, global_for_const);
+ }
+
+ InsertOrDie(&constant_buffer_to_global_, allocation.index(),
+ global_for_const);
}
- emitted_value_[constant] = global_for_const;
- VLOG(2) << " emitted value: " << llvm_ir::DumpToString(*global_for_const);
- VLOG(2) << " its type: "
- << llvm_ir::DumpToString(*global_for_const->getType());
+
return Status::OK();
}
+Status IrEmitter::HandleConstant(HloInstruction* constant) {
+ VLOG(2) << "HandleConstant: " << constant->ToString();
+ // IrEmitter::EmitConstantGlobals has already taken care of emitting the body
+ // of the constant.
+ return EmitTargetAddressForOp(constant);
+}
+
Status IrEmitter::HandleCopy(HloInstruction* copy) {
if (ShapeUtil::IsTuple(copy->shape())) {
// kCopy shallow copies a tuple so just memcpy the top-level buffer.
@@ -2712,6 +2724,10 @@ llvm::Value* IrEmitter::EmitTempBufferPointer(
return b_.CreateBitCast(tempbuf_address, element_type->getPointerTo());
}
+ if (allocation.is_constant()) {
+ return FindOrDie(constant_buffer_to_global_, allocation.index());
+ }
+
llvm::Value* tempbuf_address_ptr = llvm_ir::EmitBufferIndexingGEP(
GetTempBuffersArgument(), slice.index(), &b_);
llvm::LoadInst* tempbuf_address_base = b_.CreateLoad(tempbuf_address_ptr);
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.h b/tensorflow/compiler/xla/service/cpu/ir_emitter.h
index cf7fa05b20..03bbb2afb5 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.h
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.h
@@ -105,6 +105,9 @@ class IrEmitter : public DfsHloVisitorWithDefault {
PrimitiveType return_type, HloComputation* computation,
const std::vector<llvm::Value*>& arguments, tensorflow::StringPiece name);
+ // Emit an LLVM global variable for every constant buffer allocation.
+ Status EmitConstantGlobals();
+
protected:
//
// The following methods implement the DfsHloVisitor interface.
@@ -560,6 +563,9 @@ class IrEmitter : public DfsHloVisitorWithDefault {
LiteralPtrHashFunctor, LiteralPtrEqualityFunctor>
emitted_literals_;
+ tensorflow::gtl::FlatMap<BufferAllocation::Index, llvm::Constant*>
+ constant_buffer_to_global_;
+
TF_DISALLOW_COPY_AND_ASSIGN(IrEmitter);
};
diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index 885365105f..a73a341fdb 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -120,6 +120,7 @@ cc_library(
"//tensorflow/compiler/xla/service:buffer_assignment",
"//tensorflow/compiler/xla/service:hlo",
"//tensorflow/compiler/xla/service/llvm_ir:alias_analysis",
+ "//tensorflow/compiler/xla/service/llvm_ir:buffer_assignment_util",
"//tensorflow/compiler/xla/service/llvm_ir:ir_array",
"//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
"//tensorflow/compiler/xla/service/llvm_ir:tuple_ops",
@@ -165,6 +166,7 @@ cc_library(
"//tensorflow/compiler/xla/service:elemental_ir_emitter",
"//tensorflow/compiler/xla/service:hlo",
"//tensorflow/compiler/xla/service:name_uniquer",
+ "//tensorflow/compiler/xla/service/llvm_ir:buffer_assignment_util",
"//tensorflow/compiler/xla/service/llvm_ir:dynamic_update_slice_util",
"//tensorflow/compiler/xla/service/llvm_ir:fused_ir_emitter",
"//tensorflow/compiler/xla/service/llvm_ir:ir_array",
@@ -323,9 +325,9 @@ cc_library(
"//tensorflow/compiler/xla/service:hlo_execution_profile",
"//tensorflow/compiler/xla/service:logical_buffer",
"//tensorflow/compiler/xla/service:shaped_buffer",
- "//tensorflow/compiler/xla/service:stream_pool",
"//tensorflow/compiler/xla/service:transfer_manager",
"//tensorflow/compiler/xla/service:tuple_points_to_analysis",
+ "//tensorflow/compiler/xla/service/llvm_ir:buffer_assignment_util",
"//tensorflow/core:lib",
"//tensorflow/core:lib_internal",
"//tensorflow/core:stream_executor_no_cuda",
diff --git a/tensorflow/compiler/xla/service/gpu/buffer_allocations.cc b/tensorflow/compiler/xla/service/gpu/buffer_allocations.cc
index 20d4285766..537295292b 100644
--- a/tensorflow/compiler/xla/service/gpu/buffer_allocations.cc
+++ b/tensorflow/compiler/xla/service/gpu/buffer_allocations.cc
@@ -173,45 +173,6 @@ void BufferAllocations::SetBuffer(BufferAllocation::Index buffer_index,
buffers_[buffer_index] = buffer;
}
-static const HloInstruction& InstrForConstantBufferAllocation(
- const BufferAllocation& allocation) {
- CHECK(allocation.is_constant());
- HloInstruction* const_instr = nullptr;
- for (const auto& buffer_offset_pair : allocation.assigned_buffers()) {
- const LogicalBuffer* buffer = buffer_offset_pair.first;
- // BufferAssignment may have assigned non-constant instructions to this
- // allocation too so we can't CHECK this condition. E.g. for
- //
- // while(init = constant, body = identity, cond = ...)
- //
- // the LogicalBuffer for the kWhile instruction will have the same
- // BufferAllocation as the LogicalBuffer for the (init) constant.
- if (buffer->instruction()->opcode() == HloOpcode::kConstant) {
- CHECK_EQ(const_instr, nullptr)
- << const_instr->ToString() << " " << buffer->ToString();
- const_instr = buffer->instruction();
- }
- }
- CHECK_NE(const_instr, nullptr);
- return *const_instr;
-}
-
-string ConstantBufferAllocationToGlobalName(
- const BufferAllocation& allocation) {
- string instr_name = InstrForConstantBufferAllocation(allocation).name();
- for (char& c : instr_name) {
- if (c == '.') {
- c = '_';
- }
- }
- return tensorflow::strings::StrCat("buffer_for_", instr_name);
-}
-
-const Literal& LiteralForConstantAllocation(
- const BufferAllocation& allocation) {
- return InstrForConstantBufferAllocation(allocation).literal();
-}
-
bool ShouldEmitLiteralInLlvmIr(const Literal& literal) {
// LLVM can sometimes do interesting optimizations using scalar constants.
return ShapeUtil::IsScalar(literal.shape());
diff --git a/tensorflow/compiler/xla/service/gpu/buffer_allocations.h b/tensorflow/compiler/xla/service/gpu/buffer_allocations.h
index f21861ed81..f13eab0dd7 100644
--- a/tensorflow/compiler/xla/service/gpu/buffer_allocations.h
+++ b/tensorflow/compiler/xla/service/gpu/buffer_allocations.h
@@ -107,15 +107,6 @@ class BufferAllocations {
bool torn_down_ = false;
};
-// In XLA:GPU we map constant buffer allocations to globals in the generated
-// LLVM IR. This function gives us the name of the global variable a constant
-// buffer is mapped to.
-string ConstantBufferAllocationToGlobalName(const BufferAllocation& allocation);
-
-// Return the Literal corresponding to `allocation`, which must be a constant
-// allocation.
-const Literal& LiteralForConstantAllocation(const BufferAllocation& allocation);
-
// LLVM and PTXAS don't deal well with large constants, so we only emit very
// small constants directly in LLVM IR. Larger constants are emitted with zero
// initializers in LLVM IR and are later overwritten when the PTX/CUBIN is
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
index 0179b43240..bb71c79fd7 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
@@ -24,6 +24,7 @@ limitations under the License.
#include "tensorflow/compiler/xla/service/gpu/buffer_allocations.h"
#include "tensorflow/compiler/xla/service/gpu/hlo_execution_profiler.h"
#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.h"
#include "tensorflow/compiler/xla/service/logical_buffer.h"
#include "tensorflow/compiler/xla/service/shaped_buffer.h"
#include "tensorflow/compiler/xla/service/transfer_manager.h"
@@ -206,13 +207,15 @@ GpuExecutable::ResolveConstantGlobals(se::StreamExecutor* executor) {
TF_ASSIGN_OR_RETURN(
se::DeviceMemoryBase global,
executor->GetUntypedSymbol(
- ConstantBufferAllocationToGlobalName(allocation), module_handle));
+ llvm_ir::ConstantBufferAllocationToGlobalName(allocation),
+ module_handle));
VLOG(3) << "Resolved global "
- << ConstantBufferAllocationToGlobalName(allocation) << " to "
- << global.opaque();
+ << llvm_ir::ConstantBufferAllocationToGlobalName(allocation)
+ << " to " << global.opaque();
InsertOrDie(&globals, i, global);
- const Literal& literal = LiteralForConstantAllocation(allocation);
+ const Literal& literal =
+ llvm_ir::LiteralForConstantAllocation(allocation);
CHECK(ShapeUtil::IsArray(literal.shape()));
if (!ShouldEmitLiteralInLlvmIr(literal)) {
VLOG(3) << "H2D memcpy for constant with shape "
diff --git a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc
index c02a95d193..8c11cd0541 100644
--- a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc
+++ b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc
@@ -21,6 +21,7 @@ limitations under the License.
#include "tensorflow/compiler/xla/service/gpu/buffer_allocations.h"
#include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
#include "tensorflow/compiler/xla/service/hlo_opcode.h"
+#include "tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.h"
#include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h"
#include "tensorflow/compiler/xla/service/llvm_ir/tuple_ops.h"
#include "tensorflow/core/lib/strings/str_util.h"
@@ -114,7 +115,8 @@ void HloToIrBindings::EmitBasePointersForHlos(
} else if (slice.allocation()->is_constant()) {
llvm::Value* global_for_constant =
module_->getGlobalVariable(llvm_ir::AsStringRef(
- ConstantBufferAllocationToGlobalName(*slice.allocation())));
+ llvm_ir::ConstantBufferAllocationToGlobalName(
+ *slice.allocation())));
BindHloToIrValue(*non_io_hlo, global_for_constant);
} else {
const int64 offset = slice.offset();
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
index fb9540b7ef..3a5394dac6 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
@@ -60,6 +60,7 @@ limitations under the License.
#include "tensorflow/compiler/xla/service/hlo_computation.h"
#include "tensorflow/compiler/xla/service/hlo_instruction.h"
#include "tensorflow/compiler/xla/service/hlo_opcode.h"
+#include "tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.h"
#include "tensorflow/compiler/xla/service/llvm_ir/dynamic_update_slice_util.h"
#include "tensorflow/compiler/xla/service/llvm_ir/fused_ir_emitter.h"
#include "tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h"
@@ -2411,8 +2412,8 @@ std::unique_ptr<KernelThunk> IrEmitterUnnested::BuildKernelThunk(
llvm::Value* loc;
if (slice.allocation()->is_constant()) {
loc = ir_emitter_context_->llvm_module()->getGlobalVariable(
- llvm_ir::AsStringRef(
- ConstantBufferAllocationToGlobalName(*slice.allocation())));
+ llvm_ir::AsStringRef(llvm_ir::ConstantBufferAllocationToGlobalName(
+ *slice.allocation())));
CHECK_NE(loc, nullptr);
} else {
loc = b_.CreateInBoundsGEP(kernel_args.at(slice.allocation()),
@@ -3428,7 +3429,7 @@ Status IrEmitterUnnested::EmitConstantGlobals() {
continue;
}
- const Literal& literal = LiteralForConstantAllocation(allocation);
+ const Literal& literal = llvm_ir::LiteralForConstantAllocation(allocation);
const bool should_emit_initializer = ShouldEmitLiteralInLlvmIr(literal);
llvm::ArrayType* global_type =
llvm::ArrayType::get(b_.getInt8Ty(), allocation.size());
@@ -3453,7 +3454,8 @@ Status IrEmitterUnnested::EmitConstantGlobals() {
global_type, /*isConstant=*/should_emit_initializer,
llvm::GlobalValue::ExternalLinkage,
/*Initializer=*/initializer,
- llvm_ir::AsStringRef(ConstantBufferAllocationToGlobalName(allocation)));
+ llvm_ir::AsStringRef(
+ llvm_ir::ConstantBufferAllocationToGlobalName(allocation)));
global_for_const->setAlignment(kConstantBufferAlignBytes);
ir_emitter_context_->llvm_module()->getGlobalList().push_back(
global_for_const);
diff --git a/tensorflow/compiler/xla/service/llvm_ir/BUILD b/tensorflow/compiler/xla/service/llvm_ir/BUILD
index 309a186e58..cdd3daf73b 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/BUILD
+++ b/tensorflow/compiler/xla/service/llvm_ir/BUILD
@@ -225,6 +225,15 @@ cc_library(
)
cc_library(
+ name = "buffer_assignment_util",
+ srcs = ["buffer_assignment_util.cc"],
+ hdrs = ["buffer_assignment_util.h"],
+ deps = [
+ "//tensorflow/compiler/xla/service:buffer_assignment",
+ ],
+)
+
+cc_library(
name = "math_ops",
srcs = ["math_ops.cc"],
hdrs = ["math_ops.h"],
diff --git a/tensorflow/compiler/xla/service/llvm_ir/alias_analysis_test.cc b/tensorflow/compiler/xla/service/llvm_ir/alias_analysis_test.cc
index 2552ff4a6a..941d940684 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/alias_analysis_test.cc
+++ b/tensorflow/compiler/xla/service/llvm_ir/alias_analysis_test.cc
@@ -58,7 +58,7 @@ ENTRY while3 {
CompileAndVerifyIr(hlo_string, R"(
; CHECK-LABEL: @body(i8* align 4 dereferenceable(4) %retval
; CHECK: %[[add_result:.*]] = fadd fast float %[[fadd_lhs:.*]], %[[fadd_rhs:.*]]
-; CHECK: store float %[[add_result]], float* %[[store_dest:.*]], !alias.scope ![[alias_scope_md_for_store:.*]]
+; CHECK: store float %[[add_result]], float* %[[store_dest:.*]], !alias.scope ![[alias_scope_md_for_store:[0-9]+]]
;
; CHECK-LABEL: @condition(i8* align 1 dereferenceable(1) %fusion, i8* noalias %run_options, i8** noalias %params
; CHECK: %[[cond_state_buf_ptr:.*]] = getelementptr inbounds i8*, i8** %params, i64 0
diff --git a/tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.cc b/tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.cc
new file mode 100644
index 0000000000..4eb5d9fb47
--- /dev/null
+++ b/tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.cc
@@ -0,0 +1,59 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.h"
+
+namespace xla {
+namespace llvm_ir {
+static const HloInstruction& InstrForConstantBufferAllocation(
+ const BufferAllocation& allocation) {
+ CHECK(allocation.is_constant());
+ HloInstruction* const_instr = nullptr;
+ for (const auto& buffer_offset_pair : allocation.assigned_buffers()) {
+ const LogicalBuffer* buffer = buffer_offset_pair.first;
+ // BufferAssignment may have assigned non-constant instructions to this
+ // allocation too so we can't CHECK this condition. E.g. for
+ //
+ // while(init = constant, body = identity, cond = ...)
+ //
+ // the LogicalBuffer for the kWhile instruction will have the same
+ // BufferAllocation as the LogicalBuffer for the (init) constant.
+ if (buffer->instruction()->opcode() == HloOpcode::kConstant) {
+ CHECK_EQ(const_instr, nullptr)
+ << const_instr->ToString() << " " << buffer->ToString();
+ const_instr = buffer->instruction();
+ }
+ }
+ CHECK_NE(const_instr, nullptr);
+ return *const_instr;
+}
+
+string ConstantBufferAllocationToGlobalName(
+ const BufferAllocation& allocation) {
+ string instr_name = InstrForConstantBufferAllocation(allocation).name();
+ for (char& c : instr_name) {
+ if (c == '.') {
+ c = '_';
+ }
+ }
+ return tensorflow::strings::StrCat("buffer_for_", instr_name);
+}
+
+const Literal& LiteralForConstantAllocation(
+ const BufferAllocation& allocation) {
+ return InstrForConstantBufferAllocation(allocation).literal();
+}
+} // namespace llvm_ir
+} // namespace xla
diff --git a/tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.h b/tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.h
new file mode 100644
index 0000000000..bfb6eecb87
--- /dev/null
+++ b/tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.h
@@ -0,0 +1,34 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_BUFFER_ASSIGNMENT_UTIL_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_BUFFER_ASSIGNMENT_UTIL_H_
+
+#include "tensorflow/compiler/xla/service/buffer_assignment.h"
+
+namespace xla {
+namespace llvm_ir {
+// In XLA:GPU we map constant buffer allocations to globals in the generated
+// LLVM IR. This function gives us the name of the global variable a constant
+// buffer is mapped to. Not used on XLA:CPU.
+string ConstantBufferAllocationToGlobalName(const BufferAllocation& allocation);
+
+// Returns the Literal corresponding to `allocation`, which must be a constant
+// allocation.
+const Literal& LiteralForConstantAllocation(const BufferAllocation& allocation);
+} // namespace llvm_ir
+} // namespace xla
+
+#endif // TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_BUFFER_ASSIGNMENT_UTIL_H_
diff --git a/tensorflow/compiler/xla/tests/local_client_aot_test_helper.cc b/tensorflow/compiler/xla/tests/local_client_aot_test_helper.cc
index 9e21c53569..74494e60e8 100644
--- a/tensorflow/compiler/xla/tests/local_client_aot_test_helper.cc
+++ b/tensorflow/compiler/xla/tests/local_client_aot_test_helper.cc
@@ -92,9 +92,10 @@ int main(int argc, char** argv) {
// It's lame to hard-code the buffer assignments, but we need
// local_client_aot_test.cc to be able to easily invoke the function.
CHECK_EQ(result->result_buffer_index(), 1);
- CHECK_EQ(result->buffer_sizes().size(), 2);
+ CHECK_EQ(result->buffer_sizes().size(), 3);
CHECK_EQ(result->buffer_sizes()[0], -1); // param buffer
CHECK_EQ(result->buffer_sizes()[1], sizeof(float)); // result buffer
+ CHECK_EQ(result->buffer_sizes()[2], -1); // const buffer
if (triple.isOSBinFormatELF()) {
// Check the ELF magic.
CHECK_EQ(result->object_file_data()[0], 0x7F);