1 files changed, 268 insertions, 0 deletions
diff --git a/tensorflow/compiler/aot/codegen_test_h.golden b/tensorflow/compiler/aot/codegen_test_h.golden
new file mode 100644
index 0000000000..46d7c03006
--- /dev/null
+++ b/tensorflow/compiler/aot/codegen_test_h.golden
@@ -0,0 +1,268 @@
+// Generated by tfcompile, the TensorFlow graph compiler.  DO NOT EDIT!
+//
+// This header was generated via ahead-of-time compilation of a TensorFlow
+// graph.  An object file corresponding to this header was also generated.
+// This header gives access to the functionality in that object file.
+//
+// clang-format off
+
+#ifndef TFCOMPILE_GENERATED_entry_point_H_  // NOLINT(build/header_guard)
+#define TFCOMPILE_GENERATED_entry_point_H_  // NOLINT(build/header_guard)
+
+#include "tensorflow/compiler/tf2xla/xla_local_runtime_context.h"
+#include "tensorflow/compiler/aot/runtime.h"
+#include "tensorflow/compiler/xla/executable_run_options.h"
+#include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace Eigen { class ThreadPoolDevice; }
+
+// (Implementation detail) Entry point to the function in the object file.
+extern "C" void entry_point(
+    void* result, xla::ExecutableRunOptions* run_options,
+    void** args, void** temps);
+
+namespace foo {
+namespace bar {
+
+// MyClass represents a computation previously specified in a
+// TensorFlow graph, now compiled into executable code. Usage example:
+//
+//   MyClass computation;
+//   // ...set args using computation.argN methods
+//   CHECK(computation.Run());
+//   // ...inspect results using computation.resultN methods
+//
+// The Run method invokes the actual computation, with inputs read from arg
+// buffers, and outputs written to result buffers. Each Run call may also use
+// a set of temporary buffers for the computation.
+//
+// By default each instance of this class manages its own arg, result and temp
+// buffers. The AllocMode constructor parameter may be used to modify the
+// buffer allocation strategy.
+//
+// Under the default allocation strategy, this class is thread-compatible:
+//   o Calls to non-const methods require exclusive access to the object.
+//   o Concurrent calls to const methods are OK, if those calls are made while
+//     it is guaranteed that no thread may call a non-const method.
+//
+// The logical function signature is:
+//   ((unknown): f32[1,2], (unknown): s64[3,4], (unknown): opaque[]) -> u32[5,6]
+//
+// Memory stats:
+//   arg bytes total:    104
+//   arg bytes aligned:  128
+//   temp bytes total:   126
+//   temp bytes aligned: 224
+class MyClass {
+ public:
+  // Number of input arguments for the compiled computation.
+  static constexpr size_t kNumArgs = 3;
+
+  // Byte size of each argument buffer. There are kNumArgs entries.
+  static const intptr_t* ArgSizes() {
+    static constexpr intptr_t kArgSizes[kNumArgs] = {8, 96, -1};
+    return kArgSizes;
+  }
+
+  // AllocMode controls the buffer allocation mode.
+  enum class AllocMode {
+    // Allocate all buffers - args, results and temps.
+    ARGS_RESULTS_AND_TEMPS,
+
+    // Only allocate result and temp buffers.
+    // Use set_argN_data to set argument buffers before Run is called.
+    RESULTS_AND_TEMPS_ONLY,
+  };
+
+  MyClass(AllocMode mode = AllocMode::ARGS_RESULTS_AND_TEMPS) {
+    if (mode == AllocMode::ARGS_RESULTS_AND_TEMPS) {
+      alloc_args_ = tensorflow::tfcompile::runtime::MallocContiguousBuffers(
+          ArgSizes(), kNumArgs, args_, false /* annotate_initialized */);
+    }
+    args_[kNumArgs-1] = &context_;
+    alloc_temps_ = tensorflow::tfcompile::runtime::MallocContiguousBuffers(
+        TempSizes(), kNumTemps, temps_, true /* annotate_initialized */);
+  }
+
+  ~MyClass() {
+    tensorflow::tfcompile::runtime::FreeContiguous(alloc_args_);
+    tensorflow::tfcompile::runtime::FreeContiguous(alloc_temps_);
+  }
+
+  // Sets the thread pool to use during the Run call.
+  MyClass& set_thread_pool(const Eigen::ThreadPoolDevice* pool) {
+    run_options_.set_intra_op_thread_pool(pool);
+    context_.thread_pool = pool;
+    return *this;
+  }
+
+  // Runs the computation, with inputs read from arg buffers, and outputs
+  // written to result buffers. Returns true on success and false on failure.
+  bool Run() {
+    entry_point(temps_[kResultIndex], &run_options_, args_, temps_);
+    return !context_.error;
+  }
+
+  // Returns the error message from the previous failed Run call.
+  tensorflow::string error_msg() const { return context_.error_msg; }
+
+  // Arg methods for managing input buffers. Buffers are in row-major order.
+  // There is a set of methods for each positional argument, with the following
+  // general form:
+  //
+  // void set_argN_data(void* data)
+  //   Sets the buffer of type T for positional argument N. May be called in
+  //   any AllocMode. Must be called before Run to have an affect. Must be
+  //   called in AllocMode::RESULTS_AND_TEMPS_ONLY for each positional argument,
+  //   to set the argument buffers.
+  //
+  // T* argN_data()
+  //   Returns the buffer of type T for positional argument N.
+  //
+  // T& argN(...dim indices...)
+  //   Returns a reference to the value of type T for positional argument N,
+  //   with dim indices specifying which value. No bounds checking is performed
+  //   on dim indices.
+  //
+  // void** args()
+  //   Returns an array of argument buffers, where args()[N] is the buffer for
+  //   positional argument N.
+
+  void** args()                   { return args_; }
+  const void *const *args() const { return args_; }
+
+  void set_arg0_data(void* data) {
+    args_[0] = data;
+  }
+  float* arg0_data() {
+    return static_cast<float*>(args_[0]);
+  }
+  float& arg0(size_t dim0, size_t dim1) {
+    return (*static_cast<float(*)[1][2]>(
+        args_[0]))[dim0][dim1];
+  }
+  const float* arg0_data() const {
+    return static_cast<const float*>(args_[0]);
+  }
+  const float& arg0(size_t dim0, size_t dim1) const {
+    return (*static_cast<const float(*)[1][2]>(
+        args_[0]))[dim0][dim1];
+  }
+
+  void set_arg_myfeed_data(void* data) {
+    args_[0] = data;
+  }
+  float* arg_myfeed_data() {
+    return static_cast<float*>(args_[0]);
+  }
+  float& arg_myfeed(size_t dim0, size_t dim1) {
+    return (*static_cast<float(*)[1][2]>(
+        args_[0]))[dim0][dim1];
+  }
+  const float* arg_myfeed_data() const {
+    return static_cast<const float*>(args_[0]);
+  }
+  const float& arg_myfeed(size_t dim0, size_t dim1) const {
+    return (*static_cast<const float(*)[1][2]>(
+        args_[0]))[dim0][dim1];
+  }
+
+  void set_arg1_data(void* data) {
+    args_[1] = data;
+  }
+  tensorflow::int64* arg1_data() {
+    return static_cast<tensorflow::int64*>(args_[1]);
+  }
+  tensorflow::int64& arg1(size_t dim0, size_t dim1) {
+    return (*static_cast<tensorflow::int64(*)[3][4]>(
+        args_[1]))[dim0][dim1];
+  }
+  const tensorflow::int64* arg1_data() const {
+    return static_cast<const tensorflow::int64*>(args_[1]);
+  }
+  const tensorflow::int64& arg1(size_t dim0, size_t dim1) const {
+    return (*static_cast<const tensorflow::int64(*)[3][4]>(
+        args_[1]))[dim0][dim1];
+  }
+
+  // Result methods for managing output buffers. Buffers are in row-major order.
+  // Must only be called after a successful Run call. There is a set of methods
+  // for each positional result, with the following general form:
+  //
+  // T* resultN_data()
+  //   Returns the buffer of type T for positional result N.
+  //
+  // T& resultN(...dim indices...)
+  //   Returns a reference to the value of type T for positional result N,
+  //   with dim indices specifying which value. No bounds checking is performed
+  //   on dim indices.
+  //
+  // void** results()
+  //   Returns an array of result buffers, where results()[N] is the buffer for
+  //   positional result N.
+  //
+  // Unlike the arg methods, there is no set_resultN_data method. The result
+  // buffers are managed internally, and may change after each call to Run.
+
+  void** results() { return temps_ + kResultIndex; }
+  const void *const *results() const { return temps_ + kResultIndex; }
+
+  tensorflow::uint32* result0_data() {
+    return static_cast<tensorflow::uint32*>(temps_[kResultIndex]);
+  }
+  tensorflow::uint32& result0(size_t dim0, size_t dim1) {
+    return (*static_cast<tensorflow::uint32(*)[5][6]>(
+        temps_[kResultIndex]))[dim0][dim1];
+  }
+  const tensorflow::uint32* result0_data() const {
+    return static_cast<const tensorflow::uint32*>(temps_[kResultIndex]);
+  }
+  const tensorflow::uint32& result0(size_t dim0, size_t dim1) const {
+    return (*static_cast<const tensorflow::uint32(*)[5][6]>(
+        temps_[kResultIndex]))[dim0][dim1];
+  }
+
+  tensorflow::uint32* result_myfetch_data() {
+    return static_cast<tensorflow::uint32*>(temps_[kResultIndex]);
+  }
+  tensorflow::uint32& result_myfetch(size_t dim0, size_t dim1) {
+    return (*static_cast<tensorflow::uint32(*)[5][6]>(
+        temps_[kResultIndex]))[dim0][dim1];
+  }
+  const tensorflow::uint32* result_myfetch_data() const {
+    return static_cast<const tensorflow::uint32*>(temps_[kResultIndex]);
+  }
+  const tensorflow::uint32& result_myfetch(size_t dim0, size_t dim1) const {
+    return (*static_cast<const tensorflow::uint32(*)[5][6]>(
+        temps_[kResultIndex]))[dim0][dim1];
+  }
+
+ private:
+  // Number of result and temporary buffers for the compiled computation.
+  static constexpr size_t kNumTemps = 6;
+  // The 0-based index of the result in the temporary buffers.
+  static constexpr size_t kResultIndex = 5;
+
+  // Byte size of each result / temporary buffer. There are kNumTemps entries.
+  static const intptr_t* TempSizes() {
+    static constexpr intptr_t kTempSizes[kNumTemps] = {1, -1, 2, -1, 3, 120};
+    return kTempSizes;
+  }
+
+  void* args_[kNumArgs];
+  void* temps_[kNumTemps];
+  void* alloc_args_ = nullptr;
+  void* alloc_temps_ = nullptr;
+  xla::ExecutableRunOptions run_options_;
+  tensorflow::XlaLocalRuntimeContext context_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(MyClass);
+};
+
+}  // end namespace bar
+}  // end namespace foo
+
+#endif  // TFCOMPILE_GENERATED_entry_point_H_
+
+// clang-format on