diff options
Diffstat (limited to 'tensorflow/stream_executor/lib/mathutil.h')
-rw-r--r-- | tensorflow/stream_executor/lib/mathutil.h | 88 |
1 files changed, 88 insertions, 0 deletions
diff --git a/tensorflow/stream_executor/lib/mathutil.h b/tensorflow/stream_executor/lib/mathutil.h new file mode 100644 index 0000000000..dd3d37a19c --- /dev/null +++ b/tensorflow/stream_executor/lib/mathutil.h @@ -0,0 +1,88 @@ +#ifndef TENSORFLOW_STREAM_EXECUTOR_LIB_MATHUTIL_H_ +#define TENSORFLOW_STREAM_EXECUTOR_LIB_MATHUTIL_H_ + +#include <algorithm> +#include <cmath> +#include <limits> +#include <type_traits> +#include <vector> + +#include "tensorflow/stream_executor/platform/logging.h" +#include "tensorflow/stream_executor/platform/port.h" + +namespace perftools { +namespace gputools { +namespace port { + +class MathUtil { + public: + template <typename IntegralType> + static IntegralType CeilOfRatio(IntegralType numerator, + IntegralType denominator) { + return CeilOrFloorOfRatio<IntegralType, true>(numerator, denominator); + } + template <typename IntegralType> + static IntegralType FloorOfRatio(IntegralType numerator, + IntegralType denominator) { + return CeilOrFloorOfRatio<IntegralType, false>(numerator, denominator); + } + template <typename IntegralType, bool ceil> + static IntegralType CeilOrFloorOfRatio(IntegralType numerator, + IntegralType denominator); +}; + +// ---- CeilOrFloorOfRatio ---- +// This is a branching-free, cast-to-double-free implementation. +// +// Casting to double is in general incorrect because of loss of precision +// when casting an int64 into a double. +// +// There's a bunch of 'recipes' to compute a integer ceil (or floor) on the web, +// and most of them are incorrect. +template<typename IntegralType, bool ceil> +IntegralType MathUtil::CeilOrFloorOfRatio(IntegralType numerator, + IntegralType denominator) { + static_assert(std::is_integral<IntegralType>::value, + "CeilOfRatio_is_only_defined_for_integral_types"); + assert(denominator != 0); + // Dividing the smallest signed integer by -1 is not supported: it would + // SIGFPE + assert(!std::is_signed<IntegralType>::value || + numerator != std::numeric_limits<IntegralType>::min() || + denominator != -1); + + const IntegralType rounded_toward_zero = numerator / denominator; + const IntegralType intermediate_product = rounded_toward_zero * denominator; + + if (ceil) { // Compile-time condition: not an actual branching + // When rounded_toward_zero is negative, then an adjustment is never needed: + // the real ratio is negative, and so rounded toward zero is the ceil. + // When rounded_toward_zero is non-negative, an adjustment is needed if the + // sign of the difference numerator - intermediate_product is the same as + // the sign of the denominator. + // + // Using a bool and then a static_cast to IntegralType is not strictly + // necessary, but it makes the code clear, and anyway the compiler should + // get rid of it. + const bool needs_adjustment = (rounded_toward_zero >= 0) && + ((denominator > 0 && numerator > intermediate_product) || + (denominator < 0 && numerator < intermediate_product)); + const IntegralType adjustment = static_cast<IntegralType>(needs_adjustment); + const IntegralType ceil_of_ratio = rounded_toward_zero + adjustment; + return ceil_of_ratio; + } else { + // Floor case: symmetrical to the previous one + const bool needs_adjustment = (rounded_toward_zero <= 0) && + ((denominator > 0 && numerator < intermediate_product) || + (denominator < 0 && numerator > intermediate_product)); + const IntegralType adjustment = static_cast<IntegralType>(needs_adjustment); + const IntegralType floor_of_ratio = rounded_toward_zero - adjustment; + return floor_of_ratio; + } +} + +} // namespace port +} // namespace gputools +} // namespace perftools + +#endif // TENSORFLOW_STREAM_EXECUTOR_LIB_MATHUTIL_H_ |