diff options
Diffstat (limited to 'tensorflow/compiler/xla')
14 files changed, 66 insertions, 30 deletions
diff --git a/tensorflow/compiler/xla/array.h b/tensorflow/compiler/xla/array.h index 213e0bac6c..71aa057cd3 100644 --- a/tensorflow/compiler/xla/array.h +++ b/tensorflow/compiler/xla/array.h @@ -22,6 +22,7 @@ limitations under the License. #include <initializer_list> #include <iterator> #include <memory> +#include <numeric> #include <random> #include <type_traits> #include <vector> diff --git a/tensorflow/compiler/xla/client/computation_builder.h b/tensorflow/compiler/xla/client/computation_builder.h index daad09364c..7293b35c0f 100644 --- a/tensorflow/compiler/xla/client/computation_builder.h +++ b/tensorflow/compiler/xla/client/computation_builder.h @@ -770,7 +770,7 @@ class ComputationBuilder { // The operand must represent a constant value, which in this case // means that it must not statically depend on any parameter of the // computation that is being built other then the ones specified on the - // paramtere list. The parameters in the list will be indexed by their + // parameter list. The parameters in the list will be indexed by their // parameter id property so the number of parameters specified should be at // least as many as the largest used parameter index. // diff --git a/tensorflow/compiler/xla/service/copy_insertion_test.cc b/tensorflow/compiler/xla/service/copy_insertion_test.cc index 3278fd5f06..8388574716 100644 --- a/tensorflow/compiler/xla/service/copy_insertion_test.cc +++ b/tensorflow/compiler/xla/service/copy_insertion_test.cc @@ -339,7 +339,7 @@ TEST_F(CopyInsertionTest, ElementOfNestedTupleParameter) { ShapeUtil::MakeShape(F32, {42})}), "param0")); - // The return value of the computation is the zero-th elemnt of the nested + // The return value of the computation is the zero-th element of the nested // tuple. This element is itself a tuple. auto gte = builder.AddInstruction(HloInstruction::CreateGetTupleElement( ShapeUtil::GetSubshape(param->shape(), {0}), param, 0)); diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime_avx.h b/tensorflow/compiler/xla/service/cpu/cpu_runtime_avx.h index acfada8540..74ae6d00c9 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_runtime_avx.h +++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime_avx.h @@ -38,14 +38,16 @@ typedef float V8F32AVX __attribute__((__vector_size__(32))); extern "C" { +#ifdef __AVX__ // The following functions are vectorized versions of a selection of libm // library functions. // References to these functions are created by the LLVM vectorizer. xla::cpu::runtime::V8F32AVX __xla_cpu_runtime_ExpV8F32AVX( - xla::cpu::runtime::V8F32AVX x) TF_ATTRIBUTE_WEAK; + xla::cpu::runtime::V8F32AVX x); xla::cpu::runtime::V8F32AVX __xla_cpu_runtime_LogV8F32AVX( - xla::cpu::runtime::V8F32AVX x) TF_ATTRIBUTE_WEAK; + xla::cpu::runtime::V8F32AVX x); +#endif } #endif // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_RUNTIME_AVX_H_ diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime_neon.h b/tensorflow/compiler/xla/service/cpu/cpu_runtime_neon.h index 75cb16b273..645a43858f 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_runtime_neon.h +++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime_neon.h @@ -49,14 +49,16 @@ struct V4F32NEON; extern "C" { +#ifdef __ARM_NEON__ // The following functions are vectorized versions of a selection of libm // library functions. // References to these functions are created by the LLVM vectorizer. xla::cpu::runtime::V4F32NEON __xla_cpu_runtime_ExpV4F32NEON( - xla::cpu::runtime::V4F32NEON x) TF_ATTRIBUTE_WEAK; + xla::cpu::runtime::V4F32NEON x); xla::cpu::runtime::V4F32NEON __xla_cpu_runtime_LogV4F32NEON( - xla::cpu::runtime::V4F32NEON x) TF_ATTRIBUTE_WEAK; + xla::cpu::runtime::V4F32NEON x); +#endif // __ARM_NEON__ } #endif // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_RUNTIME_NEON_H_ diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h b/tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h index 96587d10d2..80ca4243a2 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h +++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h @@ -39,14 +39,16 @@ typedef float V4F32SSE __attribute__((__vector_size__(16))); extern "C" { +#ifdef __SSE4_1__ // The following functions are vectorized versions of a selection of libm // library functions. // References to these functions are created by the LLVM vectorizer. xla::cpu::runtime::V4F32SSE __xla_cpu_runtime_ExpV4F32SSE( - xla::cpu::runtime::V4F32SSE x) TF_ATTRIBUTE_WEAK; + xla::cpu::runtime::V4F32SSE x); xla::cpu::runtime::V4F32SSE __xla_cpu_runtime_LogV4F32SSE( - xla::cpu::runtime::V4F32SSE x) TF_ATTRIBUTE_WEAK; + xla::cpu::runtime::V4F32SSE x); +#endif } #endif // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_RUNTIME_SSE4_1_H_ diff --git a/tensorflow/compiler/xla/service/cpu/llvm_ir_runtime.cc b/tensorflow/compiler/xla/service/cpu/llvm_ir_runtime.cc index 81c29e4726..0f71258ff0 100644 --- a/tensorflow/compiler/xla/service/cpu/llvm_ir_runtime.cc +++ b/tensorflow/compiler/xla/service/cpu/llvm_ir_runtime.cc @@ -65,13 +65,13 @@ llvm::Function* EmitVectorF32TanhIfNeeded(llvm::Module* module, llvm::ConstantFP::get(vector_type, 9.0), &ir_builder); std::array<float, 7> numerator_coeffs( - {{-2.76076847742355e-16f, 2.00018790482477e-13f, -8.60467152213735e-11f, - 5.12229709037114e-08f, 1.48572235717979e-05f, 6.37261928875436e-04f, - 4.89352455891786e-03f}}); + {-2.76076847742355e-16f, 2.00018790482477e-13f, -8.60467152213735e-11f, + 5.12229709037114e-08f, 1.48572235717979e-05f, 6.37261928875436e-04f, + 4.89352455891786e-03f}); std::array<float, 4> denominator_coeffs( - {{1.19825839466702e-06f, 1.18534705686654e-04f, 2.26843463243900e-03f, - 4.89352518554385e-03f}}); + {1.19825839466702e-06f, 1.18534705686654e-04f, 2.26843463243900e-03f, + 4.89352518554385e-03f}); llvm::Value* input_squared = ir_builder.CreateFMul(input_clamped, input_clamped); diff --git a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc index cda2783307..c942cd6bf1 100644 --- a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc +++ b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc @@ -102,9 +102,21 @@ llvm::StringRef GetHostCpuName() { CompilerFunctor::VectorIntrinsics GetAvailableIntrinsics() { CompilerFunctor::VectorIntrinsics intrinsics; - intrinsics.sse_intrinsics = (&__xla_cpu_runtime_ExpV4F32SSE != nullptr); - intrinsics.avx_intrinsics = (&__xla_cpu_runtime_ExpV8F32AVX != nullptr); - intrinsics.neon_intrinsics = (&__xla_cpu_runtime_ExpV4F32NEON != nullptr); +#ifdef __SSE4_1__ + intrinsics.sse_intrinsics = true; +#else + intrinsics.sse_intrinsics = false; +#endif +#ifdef __AVX__ + intrinsics.avx_intrinsics = true; +#else + intrinsics.avx_intrinsics = false; +#endif +#ifdef __ARM_NEON__ + intrinsics.neon_intrinsics = true; +#else + intrinsics.neon_intrinsics = false; +#endif return intrinsics; } @@ -201,12 +213,18 @@ bool RegisterKnownJITSymbols() { REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedConvF32); REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF32); REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF64); +#ifdef __ARM_NEON__ REGISTER_CPU_RUNTIME_SYMBOL(ExpV4F32NEON); - REGISTER_CPU_RUNTIME_SYMBOL(ExpV4F32SSE); - REGISTER_CPU_RUNTIME_SYMBOL(ExpV8F32AVX); REGISTER_CPU_RUNTIME_SYMBOL(LogV4F32NEON); +#endif +#ifdef __SSE4_1__ + REGISTER_CPU_RUNTIME_SYMBOL(ExpV4F32SSE); REGISTER_CPU_RUNTIME_SYMBOL(LogV4F32SSE); +#endif +#ifdef __AVX__ + REGISTER_CPU_RUNTIME_SYMBOL(ExpV8F32AVX); REGISTER_CPU_RUNTIME_SYMBOL(LogV8F32AVX); +#endif REGISTER_CPU_RUNTIME_SYMBOL(ParallelForkJoin); REGISTER_CPU_RUNTIME_SYMBOL(ReleaseInfeedBufferAfterDequeue); REGISTER_CPU_RUNTIME_SYMBOL(ReleaseOutfeedBufferAfterPopulation); @@ -275,7 +293,11 @@ bool RegisterKnownJITSymbols() { REGISTER_LIBM_SYMBOL(scalbln, double (*)(double, long)); REGISTER_LIBM_SYMBOL(scalbn, double (*)(double, int)); REGISTER_LIBM_SYMBOL(sin, double (*)(double)); +#ifdef __APPLE__ + REGISTER_LIBM_SYMBOL(__sincos, void (*)(double, double*, double*)); +#else REGISTER_LIBM_SYMBOL(sincos, void (*)(double, double*, double*)); +#endif REGISTER_LIBM_SYMBOL(sinh, double (*)(double)); REGISTER_LIBM_SYMBOL(sqrt, double (*)(double)); REGISTER_LIBM_SYMBOL(tan, double (*)(double)); diff --git a/tensorflow/compiler/xla/service/gpu/while_transformer.cc b/tensorflow/compiler/xla/service/gpu/while_transformer.cc index ccdd171759..ab94d7d543 100644 --- a/tensorflow/compiler/xla/service/gpu/while_transformer.cc +++ b/tensorflow/compiler/xla/service/gpu/while_transformer.cc @@ -44,7 +44,7 @@ namespace { // // Parameter // | -// Const GetTupleElemet +// Const GetTupleElement // \ / // Add (root) // diff --git a/tensorflow/compiler/xla/service/heap_simulator.h b/tensorflow/compiler/xla/service/heap_simulator.h index a03ad2f37c..88a8698d16 100644 --- a/tensorflow/compiler/xla/service/heap_simulator.h +++ b/tensorflow/compiler/xla/service/heap_simulator.h @@ -264,7 +264,7 @@ class LazyBestFitHeap : public HeapAlgorithm { enum { kLazyAllocOffset = -1 }; struct OrderChunkByIncreasingSize { - bool operator()(const Chunk& a, const Chunk& b) { + bool operator()(const Chunk& a, const Chunk& b) const { if (a.size != b.size) return a.size < b.size; return a.offset < b.offset; } diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc index 0e7ab00713..b9f76531f3 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc @@ -1318,19 +1318,16 @@ string SaveGraph(const string& graph, file_extension = ".pbtxt"; break; } - string path = JoinPath( - dest_path, StrCat("hlo_graph_", output_num++, ".XXXXXX", file_extension)); + string path = JoinPath(dest_path, StrCat("hlo_graph_", output_num++, ".")); auto status = Status::OK(); - int fd = mkstemps(&path[0], file_extension.length()); - if (fd < 0) { + auto env = tensorflow::Env::Default(); + if (!env->CreateUniqueFileName(&path, file_extension)) { status = Status(tensorflow::error::Code::UNKNOWN, StrCat("Failed to create temporary file to dump HLO graph: ", strerror(errno))); } else { - status = - tensorflow::WriteStringToFile(tensorflow::Env::Default(), path, graph); - close(fd); + status = tensorflow::WriteStringToFile(env, path, graph); } if (!status.ok()) { LOG(WARNING) << "Saving HLO graph failed: " << status; diff --git a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc index 5f6f9810c3..23d2d4e87d 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc @@ -111,8 +111,15 @@ void KernelSupportLibrary::EmitAndCallOutlinedKernel( ir_builder->SetInsertPoint(return_inst); std::vector<llvm::Value*> arg_values; - std::transform(function->arg_begin(), function->arg_end(), - std::back_inserter(arg_values), std::addressof<llvm::Value>); + /* + * clang on OSX doesn't like std::transform or range for loop here. + * See https://github.com/tensorflow/tensorflow/issues/15196 + */ + for (llvm::Function::arg_iterator arg = function->arg_begin(), + arg_e = function->arg_end(); + arg != arg_e; ++arg) { + arg_values.push_back(arg); + } if (null_arg_idx != -1) { arg_values.insert(arg_values.begin() + null_arg_idx, nullptr); } diff --git a/tensorflow/compiler/xla/shape_tree.h b/tensorflow/compiler/xla/shape_tree.h index bf8d190150..d752619bd6 100644 --- a/tensorflow/compiler/xla/shape_tree.h +++ b/tensorflow/compiler/xla/shape_tree.h @@ -238,7 +238,7 @@ class ShapeTree { // (or compatible). // index : the index of the element in the shape. See ShapeUtil::GetSubshape // for definition of index. - // data : The data value at this elemnt. + // data : The data value at this element. template <typename Fn> void ForEachElement(const Fn& func) const; diff --git a/tensorflow/compiler/xla/util.h b/tensorflow/compiler/xla/util.h index b722095d1f..277cc5ec86 100644 --- a/tensorflow/compiler/xla/util.h +++ b/tensorflow/compiler/xla/util.h @@ -239,11 +239,14 @@ std::vector<T> Permute(tensorflow::gtl::ArraySlice<int64> permutation, // Override of the above that works around compile failures with gcc 7.1.1. // For details see https://github.com/tensorflow/tensorflow/issues/10843 +// Hide this workaround from MSVC as it causes ambiguous error. +#ifndef _MSC_VER template <typename T> std::vector<T> Permute(tensorflow::gtl::ArraySlice<int64> permutation, const std::vector<T>& input) { return Permute<std::vector, T>(permutation, input); } +#endif // Inverts a permutation, i.e., output_permutation[input_permutation[i]] = i. std::vector<int64> InversePermutation( |