aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/compiler/xla
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/compiler/xla')
-rw-r--r--tensorflow/compiler/xla/array.h1
-rw-r--r--tensorflow/compiler/xla/client/computation_builder.h2
-rw-r--r--tensorflow/compiler/xla/service/copy_insertion_test.cc2
-rw-r--r--tensorflow/compiler/xla/service/cpu/cpu_runtime_avx.h6
-rw-r--r--tensorflow/compiler/xla/service/cpu/cpu_runtime_neon.h6
-rw-r--r--tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h6
-rw-r--r--tensorflow/compiler/xla/service/cpu/llvm_ir_runtime.cc10
-rw-r--r--tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc32
-rw-r--r--tensorflow/compiler/xla/service/gpu/while_transformer.cc2
-rw-r--r--tensorflow/compiler/xla/service/heap_simulator.h2
-rw-r--r--tensorflow/compiler/xla/service/hlo_graph_dumper.cc11
-rw-r--r--tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc11
-rw-r--r--tensorflow/compiler/xla/shape_tree.h2
-rw-r--r--tensorflow/compiler/xla/util.h3
14 files changed, 66 insertions, 30 deletions
diff --git a/tensorflow/compiler/xla/array.h b/tensorflow/compiler/xla/array.h
index 213e0bac6c..71aa057cd3 100644
--- a/tensorflow/compiler/xla/array.h
+++ b/tensorflow/compiler/xla/array.h
@@ -22,6 +22,7 @@ limitations under the License.
#include <initializer_list>
#include <iterator>
#include <memory>
+#include <numeric>
#include <random>
#include <type_traits>
#include <vector>
diff --git a/tensorflow/compiler/xla/client/computation_builder.h b/tensorflow/compiler/xla/client/computation_builder.h
index daad09364c..7293b35c0f 100644
--- a/tensorflow/compiler/xla/client/computation_builder.h
+++ b/tensorflow/compiler/xla/client/computation_builder.h
@@ -770,7 +770,7 @@ class ComputationBuilder {
// The operand must represent a constant value, which in this case
// means that it must not statically depend on any parameter of the
// computation that is being built other then the ones specified on the
- // paramtere list. The parameters in the list will be indexed by their
+ // parameter list. The parameters in the list will be indexed by their
// parameter id property so the number of parameters specified should be at
// least as many as the largest used parameter index.
//
diff --git a/tensorflow/compiler/xla/service/copy_insertion_test.cc b/tensorflow/compiler/xla/service/copy_insertion_test.cc
index 3278fd5f06..8388574716 100644
--- a/tensorflow/compiler/xla/service/copy_insertion_test.cc
+++ b/tensorflow/compiler/xla/service/copy_insertion_test.cc
@@ -339,7 +339,7 @@ TEST_F(CopyInsertionTest, ElementOfNestedTupleParameter) {
ShapeUtil::MakeShape(F32, {42})}),
"param0"));
- // The return value of the computation is the zero-th elemnt of the nested
+ // The return value of the computation is the zero-th element of the nested
// tuple. This element is itself a tuple.
auto gte = builder.AddInstruction(HloInstruction::CreateGetTupleElement(
ShapeUtil::GetSubshape(param->shape(), {0}), param, 0));
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime_avx.h b/tensorflow/compiler/xla/service/cpu/cpu_runtime_avx.h
index acfada8540..74ae6d00c9 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_runtime_avx.h
+++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime_avx.h
@@ -38,14 +38,16 @@ typedef float V8F32AVX __attribute__((__vector_size__(32)));
extern "C" {
+#ifdef __AVX__
// The following functions are vectorized versions of a selection of libm
// library functions.
// References to these functions are created by the LLVM vectorizer.
xla::cpu::runtime::V8F32AVX __xla_cpu_runtime_ExpV8F32AVX(
- xla::cpu::runtime::V8F32AVX x) TF_ATTRIBUTE_WEAK;
+ xla::cpu::runtime::V8F32AVX x);
xla::cpu::runtime::V8F32AVX __xla_cpu_runtime_LogV8F32AVX(
- xla::cpu::runtime::V8F32AVX x) TF_ATTRIBUTE_WEAK;
+ xla::cpu::runtime::V8F32AVX x);
+#endif
}
#endif // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_RUNTIME_AVX_H_
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime_neon.h b/tensorflow/compiler/xla/service/cpu/cpu_runtime_neon.h
index 75cb16b273..645a43858f 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_runtime_neon.h
+++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime_neon.h
@@ -49,14 +49,16 @@ struct V4F32NEON;
extern "C" {
+#ifdef __ARM_NEON__
// The following functions are vectorized versions of a selection of libm
// library functions.
// References to these functions are created by the LLVM vectorizer.
xla::cpu::runtime::V4F32NEON __xla_cpu_runtime_ExpV4F32NEON(
- xla::cpu::runtime::V4F32NEON x) TF_ATTRIBUTE_WEAK;
+ xla::cpu::runtime::V4F32NEON x);
xla::cpu::runtime::V4F32NEON __xla_cpu_runtime_LogV4F32NEON(
- xla::cpu::runtime::V4F32NEON x) TF_ATTRIBUTE_WEAK;
+ xla::cpu::runtime::V4F32NEON x);
+#endif // __ARM_NEON__
}
#endif // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_RUNTIME_NEON_H_
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h b/tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h
index 96587d10d2..80ca4243a2 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h
+++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h
@@ -39,14 +39,16 @@ typedef float V4F32SSE __attribute__((__vector_size__(16)));
extern "C" {
+#ifdef __SSE4_1__
// The following functions are vectorized versions of a selection of libm
// library functions.
// References to these functions are created by the LLVM vectorizer.
xla::cpu::runtime::V4F32SSE __xla_cpu_runtime_ExpV4F32SSE(
- xla::cpu::runtime::V4F32SSE x) TF_ATTRIBUTE_WEAK;
+ xla::cpu::runtime::V4F32SSE x);
xla::cpu::runtime::V4F32SSE __xla_cpu_runtime_LogV4F32SSE(
- xla::cpu::runtime::V4F32SSE x) TF_ATTRIBUTE_WEAK;
+ xla::cpu::runtime::V4F32SSE x);
+#endif
}
#endif // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_RUNTIME_SSE4_1_H_
diff --git a/tensorflow/compiler/xla/service/cpu/llvm_ir_runtime.cc b/tensorflow/compiler/xla/service/cpu/llvm_ir_runtime.cc
index 81c29e4726..0f71258ff0 100644
--- a/tensorflow/compiler/xla/service/cpu/llvm_ir_runtime.cc
+++ b/tensorflow/compiler/xla/service/cpu/llvm_ir_runtime.cc
@@ -65,13 +65,13 @@ llvm::Function* EmitVectorF32TanhIfNeeded(llvm::Module* module,
llvm::ConstantFP::get(vector_type, 9.0), &ir_builder);
std::array<float, 7> numerator_coeffs(
- {{-2.76076847742355e-16f, 2.00018790482477e-13f, -8.60467152213735e-11f,
- 5.12229709037114e-08f, 1.48572235717979e-05f, 6.37261928875436e-04f,
- 4.89352455891786e-03f}});
+ {-2.76076847742355e-16f, 2.00018790482477e-13f, -8.60467152213735e-11f,
+ 5.12229709037114e-08f, 1.48572235717979e-05f, 6.37261928875436e-04f,
+ 4.89352455891786e-03f});
std::array<float, 4> denominator_coeffs(
- {{1.19825839466702e-06f, 1.18534705686654e-04f, 2.26843463243900e-03f,
- 4.89352518554385e-03f}});
+ {1.19825839466702e-06f, 1.18534705686654e-04f, 2.26843463243900e-03f,
+ 4.89352518554385e-03f});
llvm::Value* input_squared =
ir_builder.CreateFMul(input_clamped, input_clamped);
diff --git a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc
index cda2783307..c942cd6bf1 100644
--- a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc
+++ b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc
@@ -102,9 +102,21 @@ llvm::StringRef GetHostCpuName() {
CompilerFunctor::VectorIntrinsics GetAvailableIntrinsics() {
CompilerFunctor::VectorIntrinsics intrinsics;
- intrinsics.sse_intrinsics = (&__xla_cpu_runtime_ExpV4F32SSE != nullptr);
- intrinsics.avx_intrinsics = (&__xla_cpu_runtime_ExpV8F32AVX != nullptr);
- intrinsics.neon_intrinsics = (&__xla_cpu_runtime_ExpV4F32NEON != nullptr);
+#ifdef __SSE4_1__
+ intrinsics.sse_intrinsics = true;
+#else
+ intrinsics.sse_intrinsics = false;
+#endif
+#ifdef __AVX__
+ intrinsics.avx_intrinsics = true;
+#else
+ intrinsics.avx_intrinsics = false;
+#endif
+#ifdef __ARM_NEON__
+ intrinsics.neon_intrinsics = true;
+#else
+ intrinsics.neon_intrinsics = false;
+#endif
return intrinsics;
}
@@ -201,12 +213,18 @@ bool RegisterKnownJITSymbols() {
REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedConvF32);
REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF32);
REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF64);
+#ifdef __ARM_NEON__
REGISTER_CPU_RUNTIME_SYMBOL(ExpV4F32NEON);
- REGISTER_CPU_RUNTIME_SYMBOL(ExpV4F32SSE);
- REGISTER_CPU_RUNTIME_SYMBOL(ExpV8F32AVX);
REGISTER_CPU_RUNTIME_SYMBOL(LogV4F32NEON);
+#endif
+#ifdef __SSE4_1__
+ REGISTER_CPU_RUNTIME_SYMBOL(ExpV4F32SSE);
REGISTER_CPU_RUNTIME_SYMBOL(LogV4F32SSE);
+#endif
+#ifdef __AVX__
+ REGISTER_CPU_RUNTIME_SYMBOL(ExpV8F32AVX);
REGISTER_CPU_RUNTIME_SYMBOL(LogV8F32AVX);
+#endif
REGISTER_CPU_RUNTIME_SYMBOL(ParallelForkJoin);
REGISTER_CPU_RUNTIME_SYMBOL(ReleaseInfeedBufferAfterDequeue);
REGISTER_CPU_RUNTIME_SYMBOL(ReleaseOutfeedBufferAfterPopulation);
@@ -275,7 +293,11 @@ bool RegisterKnownJITSymbols() {
REGISTER_LIBM_SYMBOL(scalbln, double (*)(double, long));
REGISTER_LIBM_SYMBOL(scalbn, double (*)(double, int));
REGISTER_LIBM_SYMBOL(sin, double (*)(double));
+#ifdef __APPLE__
+ REGISTER_LIBM_SYMBOL(__sincos, void (*)(double, double*, double*));
+#else
REGISTER_LIBM_SYMBOL(sincos, void (*)(double, double*, double*));
+#endif
REGISTER_LIBM_SYMBOL(sinh, double (*)(double));
REGISTER_LIBM_SYMBOL(sqrt, double (*)(double));
REGISTER_LIBM_SYMBOL(tan, double (*)(double));
diff --git a/tensorflow/compiler/xla/service/gpu/while_transformer.cc b/tensorflow/compiler/xla/service/gpu/while_transformer.cc
index ccdd171759..ab94d7d543 100644
--- a/tensorflow/compiler/xla/service/gpu/while_transformer.cc
+++ b/tensorflow/compiler/xla/service/gpu/while_transformer.cc
@@ -44,7 +44,7 @@ namespace {
//
// Parameter
// |
-// Const GetTupleElemet
+// Const GetTupleElement
// \ /
// Add (root)
//
diff --git a/tensorflow/compiler/xla/service/heap_simulator.h b/tensorflow/compiler/xla/service/heap_simulator.h
index a03ad2f37c..88a8698d16 100644
--- a/tensorflow/compiler/xla/service/heap_simulator.h
+++ b/tensorflow/compiler/xla/service/heap_simulator.h
@@ -264,7 +264,7 @@ class LazyBestFitHeap : public HeapAlgorithm {
enum { kLazyAllocOffset = -1 };
struct OrderChunkByIncreasingSize {
- bool operator()(const Chunk& a, const Chunk& b) {
+ bool operator()(const Chunk& a, const Chunk& b) const {
if (a.size != b.size) return a.size < b.size;
return a.offset < b.offset;
}
diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
index 0e7ab00713..b9f76531f3 100644
--- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
+++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
@@ -1318,19 +1318,16 @@ string SaveGraph(const string& graph,
file_extension = ".pbtxt";
break;
}
- string path = JoinPath(
- dest_path, StrCat("hlo_graph_", output_num++, ".XXXXXX", file_extension));
+ string path = JoinPath(dest_path, StrCat("hlo_graph_", output_num++, "."));
auto status = Status::OK();
- int fd = mkstemps(&path[0], file_extension.length());
- if (fd < 0) {
+ auto env = tensorflow::Env::Default();
+ if (!env->CreateUniqueFileName(&path, file_extension)) {
status =
Status(tensorflow::error::Code::UNKNOWN,
StrCat("Failed to create temporary file to dump HLO graph: ",
strerror(errno)));
} else {
- status =
- tensorflow::WriteStringToFile(tensorflow::Env::Default(), path, graph);
- close(fd);
+ status = tensorflow::WriteStringToFile(env, path, graph);
}
if (!status.ok()) {
LOG(WARNING) << "Saving HLO graph failed: " << status;
diff --git a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc
index 5f6f9810c3..23d2d4e87d 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc
+++ b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc
@@ -111,8 +111,15 @@ void KernelSupportLibrary::EmitAndCallOutlinedKernel(
ir_builder->SetInsertPoint(return_inst);
std::vector<llvm::Value*> arg_values;
- std::transform(function->arg_begin(), function->arg_end(),
- std::back_inserter(arg_values), std::addressof<llvm::Value>);
+ /*
+ * clang on OSX doesn't like std::transform or range for loop here.
+ * See https://github.com/tensorflow/tensorflow/issues/15196
+ */
+ for (llvm::Function::arg_iterator arg = function->arg_begin(),
+ arg_e = function->arg_end();
+ arg != arg_e; ++arg) {
+ arg_values.push_back(arg);
+ }
if (null_arg_idx != -1) {
arg_values.insert(arg_values.begin() + null_arg_idx, nullptr);
}
diff --git a/tensorflow/compiler/xla/shape_tree.h b/tensorflow/compiler/xla/shape_tree.h
index bf8d190150..d752619bd6 100644
--- a/tensorflow/compiler/xla/shape_tree.h
+++ b/tensorflow/compiler/xla/shape_tree.h
@@ -238,7 +238,7 @@ class ShapeTree {
// (or compatible).
// index : the index of the element in the shape. See ShapeUtil::GetSubshape
// for definition of index.
- // data : The data value at this elemnt.
+ // data : The data value at this element.
template <typename Fn>
void ForEachElement(const Fn& func) const;
diff --git a/tensorflow/compiler/xla/util.h b/tensorflow/compiler/xla/util.h
index b722095d1f..277cc5ec86 100644
--- a/tensorflow/compiler/xla/util.h
+++ b/tensorflow/compiler/xla/util.h
@@ -239,11 +239,14 @@ std::vector<T> Permute(tensorflow::gtl::ArraySlice<int64> permutation,
// Override of the above that works around compile failures with gcc 7.1.1.
// For details see https://github.com/tensorflow/tensorflow/issues/10843
+// Hide this workaround from MSVC as it causes ambiguous error.
+#ifndef _MSC_VER
template <typename T>
std::vector<T> Permute(tensorflow::gtl::ArraySlice<int64> permutation,
const std::vector<T>& input) {
return Permute<std::vector, T>(permutation, input);
}
+#endif
// Inverts a permutation, i.e., output_permutation[input_permutation[i]] = i.
std::vector<int64> InversePermutation(