tensorflow/compiler/xla/client/executable_build_options.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112

/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#ifndef TENSORFLOW_COMPILER_XLA_CLIENT_EXECUTABLE_BUILD_OPTIONS_H_
#define TENSORFLOW_COMPILER_XLA_CLIENT_EXECUTABLE_BUILD_OPTIONS_H_

#include "absl/strings/string_view.h"
#include "absl/types/optional.h"
#include "tensorflow/compiler/xla/service/device_memory_allocator.h"
#include "tensorflow/compiler/xla/util.h"
#include "tensorflow/compiler/xla/xla_data.pb.h"

namespace xla {

// Class containing options for building an LocalExecutable with
// LocalClient::Compile.
class ExecutableBuildOptions {
 public:
  // If set, this is the device to build the computation for. Valid
  // device_ordinal values are: 0 to # of devices - 1. These values are
  // identical to the device ordinal values used by StreamExecutor. The built
  // executable will be executable on any device equivalent to the specified
  // device as determined by Backend::devices_equivalent(). A value of -1
  // indicates this option has not been set.
  ExecutableBuildOptions& set_device_ordinal(int device_ordinal);
  int device_ordinal() const;

  // If set, this specifies the layout of the result of the computation. If not
  // set, the service will chose the layout of the result. A Shape is used to
  // store the layout to accommodate tuple result shapes. A value of nullptr
  // indicates the option has not been set.
  ExecutableBuildOptions& set_result_layout(const Shape& shape_with_layout);
  const Shape* result_layout() const;

  // If set, this specifies an allocator that can be used to allocate temporary
  // space on the device during compilation.  For example, the compiler might
  // want to run various algorithms on the device and pick the fastest one -- it
  // might allocate buffers for use by these algorithms using this allocator.
  //
  // This does not need to be the same as the DeviceMemoryAllocator passed when
  // running the executable.
  ExecutableBuildOptions& set_device_allocator(
      DeviceMemoryAllocator* allocator);
  DeviceMemoryAllocator* device_allocator() const;

  // If set, specifies a regexp of HLO graphs to dump (as in DebugOptions).
  ExecutableBuildOptions& set_generate_hlo_graph(string regex);
  const absl::optional<string>& generate_hlo_graph() const;

  // If set, specifies a dirpath to dump the end-of-optimization-pipeline HLO
  // protobuf to (as in DebugOptions).
  ExecutableBuildOptions& set_dump_optimized_hlo_proto_to(
      absl::string_view dirpath);
  const absl::optional<string>& dump_optimized_hlo_proto_to() const;

  // If set, specifies a dirpath to dump the start-of-optimization-pipeline HLO
  // protobuf to (as in DebugOptions).
  ExecutableBuildOptions& set_dump_unoptimized_hlo_proto_to(
      absl::string_view dirpath);
  const absl::optional<string>& dump_unoptimized_hlo_proto_to() const;

  // If set, specifies a dirpath to dump the per-pass-in-pipeline HLO protobufs
  // to (as in DebugOptions).
  ExecutableBuildOptions& set_dump_per_pass_hlo_proto_to(
      absl::string_view dirpath);
  const absl::optional<string>& dump_per_pass_hlo_proto_to() const;

  // If true, specifies that we should record an HLO profile during execution
  // and log it after execution (as in DebugOptions). If nullopt the default is
  // used.
  ExecutableBuildOptions& set_hlo_profile(bool enabled);
  absl::optional<bool> hlo_profile() const;

  void add_disabled_hlo_pass(absl::string_view pass_name) {
    disabled_hlo_passes_.push_back(std::string(pass_name));
  }
  const tensorflow::gtl::ArraySlice<std::string> disabled_hlo_passes() const {
    return disabled_hlo_passes_;
  }

  // Returns a string representation of the build options, suitable for
  // debugging.
  string ToString() const;

 private:
  absl::optional<bool> hlo_profile_;
  int device_ordinal_ = -1;
  Shape result_layout_;
  bool result_layout_set_ = false;
  absl::optional<string> generate_hlo_graph_;
  absl::optional<string> dump_optimized_hlo_proto_to_;
  absl::optional<string> dump_unoptimized_hlo_proto_to_;
  absl::optional<string> dump_per_pass_hlo_proto_to_;
  DeviceMemoryAllocator* device_allocator_ = nullptr;
  std::vector<std::string> disabled_hlo_passes_;
};

}  // namespace xla

#endif  // TENSORFLOW_COMPILER_XLA_CLIENT_EXECUTABLE_BUILD_OPTIONS_H_