aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/compiler/aot
diff options
context:
space:
mode:
authorGravatar Sanjoy Das <sanjoy@google.com>2017-11-30 11:23:25 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-11-30 11:27:07 -0800
commiteafa8efc55fb9989a679e36b030742c6d87b0310 (patch)
tree16fbaafc1c3b92a2c541f7e8ddede80c4c310944 /tensorflow/compiler/aot
parent4146ff1259c0b4ada8afbbad11a7b37d8373d1b9 (diff)
[XLA:CPU] Add Hlo profiling support to XlaJitCompiledCpuFunction
Some of the functionality has bled into the generic XlaCompiledCpuFunction, but there still remains a fair amount of work to do before the AOT side of things start working. This CL also fixes a bug I introduced in a previous CL -- when I changed IrEmitter::hlo_to_profile_idx_ to a value, I changed the signature of the generated function to always have the "profile_counters" argument when the AOT client code expects the signature to not have that argument. In practice this wasn't an issue for the standard x86 calling convention, but it could easily have been problematic on other architectures and calling conventions. After this change the mismatch is no longer present. PiperOrigin-RevId: 177481998
Diffstat (limited to 'tensorflow/compiler/aot')
-rw-r--r--tensorflow/compiler/aot/codegen.cc8
-rw-r--r--tensorflow/compiler/aot/codegen_test_h.golden8
-rw-r--r--tensorflow/compiler/aot/tests/tfcompile_test.cc4
3 files changed, 10 insertions, 10 deletions
diff --git a/tensorflow/compiler/aot/codegen.cc b/tensorflow/compiler/aot/codegen.cc
index ae22f7edc4..28ac40df18 100644
--- a/tensorflow/compiler/aot/codegen.cc
+++ b/tensorflow/compiler/aot/codegen.cc
@@ -418,7 +418,7 @@ namespace xla { class ExecutableRunOptions; }
// (Implementation detail) Entry point to the function in the object file.
extern "C" void {{ENTRY}}(
void* result, const xla::ExecutableRunOptions* run_options,
- const void** args, void** temps);
+ const void** args, void** temps, tensorflow::int64* profile_counters);
{{NS_START}}
// {{CLASS}} represents a computation previously specified in a
@@ -483,7 +483,7 @@ class {{CLASS}} : public tensorflow::XlaCompiledCpuFunction {
return *kStaticData;
}
- {{CLASS}}(AllocMode alloc_mode = AllocMode::ARGS_RESULTS_AND_TEMPS)
+ {{CLASS}}(AllocMode alloc_mode = AllocMode::ARGS_RESULTS_PROFILES_AND_TEMPS)
: XlaCompiledCpuFunction(StaticData(), alloc_mode) {}
{{CLASS}}(const {{CLASS}}&) = delete;
@@ -496,8 +496,8 @@ class {{CLASS}} : public tensorflow::XlaCompiledCpuFunction {
// void set_argN_data(void* data)
// Sets the buffer of type T for positional argument N. May be called in
// any AllocMode. Must be called before Run to have an affect. Must be
- // called in AllocMode::RESULTS_AND_TEMPS_ONLY for each positional argument,
- // to set the argument buffers.
+ // called in AllocMode::RESULTS_PROFILES_AND_TEMPS_ONLY for each positional
+ // argument, to set the argument buffers.
//
// T* argN_data()
// Returns the buffer of type T for positional argument N.
diff --git a/tensorflow/compiler/aot/codegen_test_h.golden b/tensorflow/compiler/aot/codegen_test_h.golden
index 65f342ce27..cf01bee325 100644
--- a/tensorflow/compiler/aot/codegen_test_h.golden
+++ b/tensorflow/compiler/aot/codegen_test_h.golden
@@ -19,7 +19,7 @@ namespace xla { class ExecutableRunOptions; }
// (Implementation detail) Entry point to the function in the object file.
extern "C" void entry_point(
void* result, const xla::ExecutableRunOptions* run_options,
- const void** args, void** temps);
+ const void** args, void** temps, tensorflow::int64* profile_counters);
namespace foo {
namespace bar {
@@ -86,7 +86,7 @@ class MyClass : public tensorflow::XlaCompiledCpuFunction {
return *kStaticData;
}
- MyClass(AllocMode alloc_mode = AllocMode::ARGS_RESULTS_AND_TEMPS)
+ MyClass(AllocMode alloc_mode = AllocMode::ARGS_RESULTS_PROFILES_AND_TEMPS)
: XlaCompiledCpuFunction(StaticData(), alloc_mode) {}
MyClass(const MyClass&) = delete;
@@ -99,8 +99,8 @@ class MyClass : public tensorflow::XlaCompiledCpuFunction {
// void set_argN_data(void* data)
// Sets the buffer of type T for positional argument N. May be called in
// any AllocMode. Must be called before Run to have an affect. Must be
- // called in AllocMode::RESULTS_AND_TEMPS_ONLY for each positional argument,
- // to set the argument buffers.
+ // called in AllocMode::RESULTS_PROFILES_AND_TEMPS_ONLY for each positional
+ // argument, to set the argument buffers.
//
// T* argN_data()
// Returns the buffer of type T for positional argument N.
diff --git a/tensorflow/compiler/aot/tests/tfcompile_test.cc b/tensorflow/compiler/aot/tests/tfcompile_test.cc
index 6b037f276a..413efd9cea 100644
--- a/tensorflow/compiler/aot/tests/tfcompile_test.cc
+++ b/tensorflow/compiler/aot/tests/tfcompile_test.cc
@@ -70,7 +70,7 @@ TEST(TFCompileTest, Add) {
// Run tests that use set_argN_data separately, to avoid accidentally re-using
// non-existent buffers.
TEST(TFCompileTest, Add_SetArg) {
- AddComp add(AddComp::AllocMode::RESULTS_AND_TEMPS_ONLY);
+ AddComp add(AddComp::AllocMode::RESULTS_PROFILES_AND_TEMPS_ONLY);
int32 arg_x = 10;
int32 arg_y = 32;
@@ -258,7 +258,7 @@ TEST(TFCompileTest, MatMul2_SetArg) {
Eigen::ThreadPoolDevice device(&tp, tp.NumThreads());
foo::bar::MatMulComp matmul(
- foo::bar::MatMulComp::AllocMode::RESULTS_AND_TEMPS_ONLY);
+ foo::bar::MatMulComp::AllocMode::RESULTS_PROFILES_AND_TEMPS_ONLY);
matmul.set_thread_pool(&device);
// Test using the set_argN_data() methods.