/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ // Generally useful utility functions that are common to (not specific to any // given part of) the XLA code base. #ifndef TENSORFLOW_COMPILER_XLA_UTIL_H_ #define TENSORFLOW_COMPILER_XLA_UTIL_H_ #include #include #include #include #include "absl/algorithm/container.h" #include "absl/container/inlined_vector.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" #include "absl/strings/string_view.h" #include "absl/types/span.h" #include "tensorflow/compiler/xla/status.h" #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/math/math_util.h" #include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/protobuf.h" #include "tensorflow/core/platform/types.h" namespace xla { // Logs the provided status message with a backtrace. // // For use by Status-factories, logs a backtrace at the point where the status // is created, such that we can use --vmodule=util=1 to see all status // creation backtraces. Status WithLogBacktrace(const Status& status); // Ranks greater than 8 are very rare, so use InlinedVector to store // the bounds and indices. And for the rare cases of ranks greater than 8, // the InlinedVector will just behave like an std::vector<> and allocate the // memory to store its values. static constexpr int kInlineRank = 8; using DimensionVector = absl::InlinedVector; // RAII timer that logs with a given label the wall clock time duration in human // readable form. This differs from base's ElapsedTimer primarily in that it // spits out the human-readable duration form. // // By default, the timing traces are only printed at VLOG(1) and above: // // XLA_SCOPED_LOGGING_TIMER("fooing bar"); // nop if !VLOG_IS_ON(1). // // but you can control this via: // // XLA_SCOPED_LOGGING_TIMER_LEVEL("fooing bar", 2); // nop if !VLOG_IS_ON(2) // #define XLA_SCOPED_LOGGING_TIMER(label) \ XLA_SCOPED_LOGGING_TIMER_HELPER(label, 1, __COUNTER__) #define XLA_SCOPED_LOGGING_TIMER_LEVEL(label, level) \ XLA_SCOPED_LOGGING_TIMER_HELPER(label, level, __COUNTER__) // Helper for implementing macros above. Do not use directly. // // Forces the evaluation of "counter", which we expect is equal to __COUNTER__. #define XLA_SCOPED_LOGGING_TIMER_HELPER(label, level, counter) \ XLA_SCOPED_LOGGING_TIMER_HELPER2(label, level, counter) // Helper for macros above. Don't use directly. #define XLA_SCOPED_LOGGING_TIMER_HELPER2(label, level, counter) \ ::xla::ScopedLoggingTimer XLA_ScopedLoggingTimerInstance##counter( \ label, VLOG_IS_ON(level)) // RAII timer for XLA_SCOPED_LOGGING_TIMER and XLA_SCOPED_LOGGING_TIMER_LEVEL // macros above. Recommended usage is via the macros so you don't have to give // the timer a name or worry about calling VLOG_IS_ON yourself. struct ScopedLoggingTimer { // The timer does nothing if enabled is false. This lets you pass in your // file's VLOG_IS_ON value. ScopedLoggingTimer(const string& label, bool enabled); ~ScopedLoggingTimer(); bool enabled; string label; uint64 start_micros; }; // Given a vector, returns a Span that points at its // internals. // // Warning: if the vector is updated its storage pointer may change, so use this // with caution (ideally in limited scopes with temporary lifetimes). template absl::Span MutableByteSlice(std::vector* v) { return absl::Span(reinterpret_cast(v->data()), v->size() * sizeof(T)); } // Turns an immutable slice of type T into an immutable slice of bytes with the // same byte size. template absl::Span CastToByteSlice(absl::Span slice) { return absl::Span(reinterpret_cast(slice.data()), slice.size() * sizeof(T)); } // Casts a byte slice to a non-byte type T, checking that the original slice // length is a multiple of sizeof(T). template absl::Span CastByteSlice(absl::Span slice) { CHECK_EQ(0, slice.size() % sizeof(T)); return absl::Span(reinterpret_cast(slice.data()), slice.size() / sizeof(T)); } // Convenience function to force a vector to convert to an immutable slice. template absl::Span AsSlice(const std::vector& v) { return absl::Span(v); } // Converts a mutable vector pointer into a Span of the same // type. template absl::Span AsMutableSlice(std::vector* v) { return absl::Span(v->data(), v->size()); } // xla::int64 is not the same type as tensorflow::protobuf_int64 in open-source. // Wrapper function that gives an int64 array slice view of a repeated int64 // protobuf field. static inline absl::Span AsInt64Slice( const tensorflow::protobuf::RepeatedField& v) { absl::Span slice(v); return absl::Span(reinterpret_cast(slice.data()), slice.size()); } // As above, but for uint64 types. static inline absl::Span AsUInt64Slice( const tensorflow::protobuf::RepeatedField& v) { absl::Span slice(v); return absl::Span(reinterpret_cast(slice.data()), slice.size()); } // Compares two containers for equality. Returns true iff the two containers // have the same size and all their elements compare equal using their // operator==. Like std::equal, but forces size equality. template bool ContainersEqual(const Container1T& c1, const Container2T& c2) { return ((c1.size() == c2.size()) && std::equal(std::begin(c1), std::end(c1), std::begin(c2))); } template bool ContainersEqual(const Container1T& c1, std::initializer_list il) { absl::Span c2{il}; return ContainersEqual(c1, c2); } // Compares two containers for equality. Returns true iff the two containers // have the same size and all their elements compare equal using the predicate // p. Like std::equal, but forces size equality. template bool ContainersEqual(const Container1T& c1, const Container2T& c2, PredicateT p) { return ((c1.size() == c2.size()) && std::equal(std::begin(c1), std::end(c1), std::begin(c2), p)); } // Performs a copy of count values from src to dest, using different strides for // source and destination. The source starting index is src_base, while the // destination one is dest_base. template void StridedCopy(absl::Span dest, int64 dest_base, int64 dest_stride, absl::Span src, int64 src_base, int64 src_stride, int64 count) { for (; count > 0; --count, dest_base += dest_stride, src_base += src_stride) { dest[dest_base] = static_cast(src[src_base]); } } // Adds some context information to the error message in a // Status. This is useful as Statuses are // propagated upwards. Status AddStatus(Status prior, absl::string_view context); Status AppendStatus(Status prior, absl::string_view context); // Status error shorthands -- StrFormat's the arguments to be used as an error // message and returns a status in the canonical error space. template Status InvalidArgument(const absl::FormatSpec& format, const Args&... args) { return WithLogBacktrace( tensorflow::errors::InvalidArgument(absl::StrFormat(format, args...))); } template Status Unimplemented(const absl::FormatSpec& format, const Args&... args) { return WithLogBacktrace( tensorflow::errors::Unimplemented(absl::StrFormat(format, args...))); } template Status InternalError(const absl::FormatSpec& format, const Args&... args) { return WithLogBacktrace( tensorflow::errors::Internal(absl::StrFormat(format, args...))); } template Status FailedPrecondition(const absl::FormatSpec& format, const Args&... args) { return WithLogBacktrace( tensorflow::errors::FailedPrecondition(absl::StrFormat(format, args...))); } template Status Cancelled(const absl::FormatSpec& format, const Args&... args) { return WithLogBacktrace( tensorflow::errors::Cancelled(absl::StrFormat(format, args...))); } template Status ResourceExhausted(const absl::FormatSpec& format, const Args&... args) { return WithLogBacktrace( tensorflow::errors::ResourceExhausted(absl::StrFormat(format, args...))); } template Status NotFound(const absl::FormatSpec& format, const Args&... args) { return WithLogBacktrace( tensorflow::errors::NotFound(absl::StrFormat(format, args...))); } template Status Unavailable(const absl::FormatSpec& format, const Args&... args) { return WithLogBacktrace( tensorflow::errors::Unavailable(absl::StrFormat(format, args...))); } template Status InvalidArgumentStrCat(Args&&... concat) { return InvalidArgument("%s", absl::StrCat(std::forward(concat)...)); } template Status UnimplementedStrCat(Args&&... concat) { return Unimplemented("%s", absl::StrCat(std::forward(concat)...)); } template Status InternalErrorStrCat(Args&&... concat) { return InternalError("%s", absl::StrCat(std::forward(concat)...)); } template Status ResourceExhaustedStrCat(Args&&... concat) { return ResourceExhausted("%s", absl::StrCat(std::forward(concat)...)); } // Splits the lines of the original, replaces leading whitespace with the prefix // given by "indentation", and returns the string joined by newlines again. As a // side effect, any additional trailing whitespace is removed. // // Note: even different amounts of leading whitespace on different lines will be // uniformly replaced with "indentation". string Reindent(absl::string_view original, absl::string_view indentation); // Checks whether permutation is a permutation of the [0, rank) integer range. bool IsPermutation(absl::Span permutation, int64 rank); // Applies `permutation` on `input` and returns the permuted array. // For each i, output[permutation[i]] = input[i]. // // Precondition: // 1. `permutation` is a permutation of 0..permutation.size()-1. // 2. permutation.size() == input.size(). template std::vector Permute( absl::Span permutation, const Container& input) { using T = typename Container::value_type; absl::Span data(input); CHECK(IsPermutation(permutation, data.size())); std::vector output(data.size()); for (size_t i = 0; i < permutation.size(); ++i) { output[permutation[i]] = data[i]; } return output; } // Inverts a permutation, i.e., output_permutation[input_permutation[i]] = i. std::vector InversePermutation( absl::Span input_permutation); // Composes two permutations: output[i] = p1[p2[i]]. std::vector ComposePermutations(absl::Span p1, absl::Span p2); // Returns true iff permutation == {0, 1, 2, ...}. bool IsIdentityPermutation(absl::Span permutation); template int64 PositionInContainer(const Container& container, int64 value) { return std::distance(container.begin(), std::find(container.begin(), container.end(), value)); } // Formats the container as a comma-separated string. StrAppend must support // appending the elements of the container. Prefix is prepended and suffix is // appended to the returned string. template string CommaSeparatedString(const Container& c, const char* prefix = "", const char* suffix = "") { // Not using Join() since the implementation here is simple anyway and this // avoids copying the string to append prefix. string comma_separated = prefix; const char* separator = ""; for (const auto& entry : c) { absl::StrAppend(&comma_separated, separator, entry); separator = ", "; } comma_separated += suffix; return comma_separated; } // Overload needed to allow the container to be an initializer list. The default // type for T makes an empty initializer list work as well. template string CommaSeparatedString(const std::initializer_list& c, const char* prefix = "", const char* suffix = "") { return CommaSeparatedString>(c, prefix, suffix); } // Formats the container in the mathematical notation for a vector, e.g. (1, 3, // 7). StrAppend must support appending the elements of c. template string VectorString(const Container& c) { return CommaSeparatedString(c, "(", ")"); } // Overload needed to allow the container to be an initializer list. The default // type for T makes an empty initializer list work as well. template string VectorString(const std::initializer_list& c) { return VectorString>(c); } // Returns a PaddingConfig object that represents no padding for the given rank. PaddingConfig MakeNoPaddingConfig(int64 rank); // Returns a PaddingConfig object where 'padding' contains // (low edge padding, high edge padding) pairs for each dimension. PaddingConfig MakeEdgePaddingConfig( absl::Span> padding); // Returns true if the padding configuration has at least one dimension with // non-zero interior padding. bool HasInteriorPadding(const PaddingConfig& config); // Imports the templated FloorOfRatio math function from the TensorFlow // namespace, as it is very commonly used. template T FloorOfRatio(T dividend, T divisor) { return tensorflow::MathUtil::FloorOfRatio(dividend, divisor); } // Imports the templated CeilOfRatio math function from the TensorFlow // namespace, as it is very commonly used. template T CeilOfRatio(T dividend, T divisor) { return tensorflow::MathUtil::CeilOfRatio(dividend, divisor); } // Rounds the value up to a multiple of the divisor by first calling CeilOfRatio // then multiplying by the divisor. For example: RoundUpToNearest(13, 8) => 16 template T RoundUpToNearest(T value, T divisor) { return CeilOfRatio(value, divisor) * divisor; } // Rounds the value down to a multiple of the divisor by first calling // FloorOfRatio then multiplying by the divisor. For example: // RoundDownToNearest(13, 8) => 8 template T RoundDownToNearest(T value, T divisor) { return FloorOfRatio(value, divisor) * divisor; } // Given a number of flops executed in an amount of time, produces a string that // represents the throughput; // e.g. HumanReadableNumFlops(1e9, 1e9) => 1.00GFLOP/s. string HumanReadableNumFlops(double flops, double nanoseconds); // Given a number of transcendental ops executed in an amount of time, produces // a string that represents the throughput; // e.g. HumanReadableNumTranscendentalOps(1e9, 1e9) => 1.00GTROP/s. string HumanReadableNumTranscendentalOps(double trops, double nanoseconds); // Split the text into multiple lines and log each line with the given // severity, filename, and line number. void LogLines(int sev, absl::string_view text, const char* fname, int lineno); template inline bool IsPowerOfTwo(T x) { static_assert(!std::numeric_limits::is_signed, "unsigned types only"); return x != 0 && (x & (x - 1)) == 0; } // Returns a mask with "bits" number of least significant bits set. inline uint32 LsbMaskU32(int bits) { CHECK_GE(bits, 0); return (1U << bits) - 1; } // Utility for performing a static_cast<> on a std::unique_ptr<>. template std::unique_ptr unique_ptr_static_cast(std::unique_ptr ptr) { return std::unique_ptr(static_cast(ptr.release())); } int64 Product(absl::Span xs); // Returns the start indices of consecutive non-overlapping subsequences of `a` // and `b` with the same product, i.e. `(i, j)` so // • a = {a[0 = i_0], ..., a[i_1 - 1], a[i_1], ... , a[i_2 - 1], ...} // • b = {b[0 = j_0], ..., b[j_1 - 1], b[j_1], ... , b[j_2 - 1], ...} // • ∀ k . 0 <= k < CommonFactors(a, b).size - 1 => // a[i_k] × a[i_k + 1] × ... × a[i_(k+1) - 1] = // b[j_k] × b[j_k + 1] × ... × b[j_(k+1) - 1] // where `CommonFactors(a, b)[CommonFactors(a, b).size - 1] = (a.size, b.size)` // // If the given shapes have non-zero size, returns the bounds of the shortest // possible such subsequences; else, returns `{(0, 0), (a.size, b.size)}`. std::vector> CommonFactors(absl::Span a, absl::Span b); // Removes illegal characters from filenames. string SanitizeFileName(string file_name); template int64 FindIndex(const C& c, Value&& value) { auto it = absl::c_find(c, std::forward(value)); return std::distance(c.begin(), it); } template void InsertAt(C* c, int64 index, Value&& value) { c->insert(c->begin() + index, std::forward(value)); } template void EraseAt(C* c, int64 index) { c->erase(c->begin() + index); } template std::vector ArraySliceToVector(absl::Span slice) { return std::vector(slice.begin(), slice.end()); } template std::vector InlinedVectorToVector( const absl::InlinedVector& inlined_vector) { return std::vector(inlined_vector.begin(), inlined_vector.end()); } // Returns true if `x` fits in 32-bits. template bool IsInt32(T x) { // Following conversion rules: "the value is unchanged if it can be // represented in the destination type (and bit-field width); otherwise, the // value is implementation-defined." return static_cast(x) == x; } template Status EraseElementFromVector(std::vector* container, const T& value) { // absl::c_find returns a const_iterator which does not seem to work on // gcc 4.8.4, and this breaks the ubuntu/xla_gpu build bot. auto it = std::find(container->begin(), container->end(), value); TF_RET_CHECK(it != container->end()); container->erase(it); return Status::OK(); } } // namespace xla #define XLA_LOG_LINES(SEV, STRING) \ ::xla::LogLines(SEV, STRING, __FILE__, __LINE__) #define XLA_VLOG_LINES(LEVEL, STRING) \ do { \ if (VLOG_IS_ON(LEVEL)) XLA_LOG_LINES(::tensorflow::INFO, STRING); \ } while (false); // Utility macro that performs the equivalent of what one would expect // LOG_LINES(FATAL, X) to do but can be used at the end of a function that // returns a value without getting a compiler warning that no value is returned. #define XLA_FATAL_LOG(X) \ XLA_LOG_LINES(::tensorflow::ERROR, X); \ LOG(FATAL) << "Aborting in " << __FUNCTION__ << " due to previous errors."; #endif // TENSORFLOW_COMPILER_XLA_UTIL_H_