diff options
author | 2018-08-06 10:42:41 -0700 | |
---|---|---|
committer | 2018-08-06 10:48:09 -0700 | |
commit | b9d6339ccbad7a8266400f69b84ba394574cd105 (patch) | |
tree | acbf2b05cd12264fecd50b81fd8e75f6b4ddade0 /tensorflow/contrib/lite/kernels/internal | |
parent | f3ed7f7e836da4f0ca1cb04cadce938744932b72 (diff) |
Fix more issues with TFLite compilation on Windows
PiperOrigin-RevId: 207569516
Diffstat (limited to 'tensorflow/contrib/lite/kernels/internal')
4 files changed, 23 insertions, 13 deletions
diff --git a/tensorflow/contrib/lite/kernels/internal/common.h b/tensorflow/contrib/lite/kernels/internal/common.h index 310a8980e6..eb4d0108bd 100644 --- a/tensorflow/contrib/lite/kernels/internal/common.h +++ b/tensorflow/contrib/lite/kernels/internal/common.h @@ -117,6 +117,9 @@ template <typename T> int CountLeadingZeros(T integer_input) { static_assert(std::is_unsigned<T>::value, "Only unsigned integer types handled."); +#if defined(__GNUC__) + return integer_input ? __builtin_clz(integer_input) : 0; +#else const T one_in_leading_positive = static_cast<T>(1) << (std::numeric_limits<T>::digits - 1); int leading_zeros = 0; @@ -125,6 +128,7 @@ int CountLeadingZeros(T integer_input) { ++leading_zeros; } return leading_zeros; +#endif } // DO NOT USE THIS STRUCT FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index ebb2c7a8eb..6adb879c71 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -2326,7 +2326,8 @@ inline void GetInvSqrtQuantizedMultiplierExp(int32 input, ++*output_shift; } TFLITE_DCHECK_GT(input, 0); - const unsigned max_left_shift_bits = __builtin_clz(input) - 1; + const unsigned max_left_shift_bits = + CountLeadingZeros(static_cast<uint32>(input)) - 1; const unsigned max_left_shift_bit_pairs = max_left_shift_bits / 2; const unsigned left_shift_bit_pairs = max_left_shift_bit_pairs - 1; *output_shift -= left_shift_bit_pairs; @@ -4034,7 +4035,7 @@ inline void Softmax(const uint8* input_data, const RuntimeShape& input_shape, // perform a division by the above-computed sum-of-exponentials. int32 fixed_sum_of_exps = sum_of_exps.raw(); int headroom_plus_one = - __builtin_clz(static_cast<uint32>(fixed_sum_of_exps)); + CountLeadingZeros(static_cast<uint32>(fixed_sum_of_exps)); // This is the number of bits to the left of the binary point above 1.0. // Consider fixed_sum_of_exps=1.25. In that case shifted_scale=0.8 and // no later adjustment will be needed. @@ -4180,7 +4181,7 @@ log_x_for_x_greater_than_or_equal_to_1_impl( // required shift "ourselves" instead of using, say, Rescale. FixedPoint0 z_a = FixedPoint0::FromRaw(input_val.raw()); // z_a_pow_2 = input_integer_bits - z_a_headroom; - int z_a_headroom_plus_1 = __builtin_clz(static_cast<uint32>(z_a.raw())); + int z_a_headroom_plus_1 = CountLeadingZeros(static_cast<uint32>(z_a.raw())); FixedPoint0 r_a_tmp = SaturatingRoundingMultiplyByPOTParam(z_a, (z_a_headroom_plus_1 - 1)); const int32 r_a_raw = @@ -4195,7 +4196,7 @@ log_x_for_x_greater_than_or_equal_to_1_impl( // z_b is treated like z_a, but premultiplying by sqrt(0.5). FixedPoint0 z_b = z_a * sqrt_half; - int z_b_headroom = __builtin_clz(static_cast<uint32>(z_b.raw())) - 1; + int z_b_headroom = CountLeadingZeros(static_cast<uint32>(z_b.raw())) - 1; const int32 r_b_raw = SaturatingRoundingMultiplyByPOTParam(z_a.raw(), z_b_headroom); const FixedPointAccum z_b_pow_2_adj = SaturatingSub( diff --git a/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc b/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc index 6bd88b5596..e6ccd7a32c 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc +++ b/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc @@ -21,6 +21,10 @@ limitations under the License. #include "tensorflow/contrib/lite/kernels/internal/round.h" #include "tensorflow/contrib/lite/kernels/op_macros.h" +#if defined(_MSC_VER) +#define __restrict__ __restrict +#endif + namespace tflite { namespace tensor_utils { @@ -38,10 +42,8 @@ bool PortableIsZeroVector(const float* vector, int v_size) { } void PortableSymmetricQuantizeFloats(const float* values, const int size, - int8_t* quantized_values, - float* __restrict__ min_value, - float* __restrict__ max_value, - float* __restrict__ scaling_factor) { + int8_t* quantized_values, float* min_value, + float* max_value, float* scaling_factor) { auto minmax = std::minmax_element(values, values + size); *min_value = *minmax.first; *max_value = *minmax.second; @@ -93,9 +95,11 @@ void PortableMatrixBatchVectorMultiplyAccumulate( for (row = 0; row < m_rows; ++row, result += result_stride) { // Initialize the dot product sum for the row to 0. int32_t dotprod = 0; +#if defined(__GNUC__) // Prefetch the row to cache. __builtin_prefetch(row_ptr, 0 /* prefetch for read */, 3 /* temporal locality */); +#endif // For every block of 16 8-bit elements (128-bit register) from each row. for (col = 0; col < m_cols; ++col, ++row_ptr) { dotprod += (*row_ptr) * (vectors[col]); diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index 2b20e79021..7eb6fe34bc 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -903,7 +903,8 @@ inline void GetInvSqrtQuantizedMultiplierExp(int32 input, ++*output_shift; } TFLITE_DCHECK_GT(input, 0); - const unsigned max_left_shift_bits = __builtin_clz(input) - 1; + const unsigned max_left_shift_bits = + CountLeadingZeros(static_cast<uint32>(input)) - 1; const unsigned max_left_shift_bit_pairs = max_left_shift_bits / 2; const unsigned left_shift_bit_pairs = max_left_shift_bit_pairs - 1; *output_shift -= left_shift_bit_pairs; @@ -4190,8 +4191,8 @@ inline void RankOneSelect(const D* input_condition_data, } // For easy implementation, the indices is always a vector of size-4 vectors. -template <typename T, typename I> -inline void SparseToDense(const std::vector<std::vector<I>>& indices, +template <typename T, typename TI> +inline void SparseToDense(const std::vector<std::vector<TI>>& indices, const T* values, T default_value, T* output_data, const Dims<4>& output_dims, bool value_is_scalar) { const int value_count = indices.size(); @@ -4206,7 +4207,7 @@ inline void SparseToDense(const std::vector<std::vector<I>>& indices, // condition within the loop every time. if (value_is_scalar) { for (int i = 0; i < value_count; ++i) { - const std::vector<I>& index = indices[i]; + const std::vector<TI>& index = indices[i]; TFLITE_DCHECK_EQ(index.size(), 4); const T value = *values; // just use the first value. output_data[Offset(output_dims, index[3], index[2], index[1], index[0])] = @@ -4217,7 +4218,7 @@ inline void SparseToDense(const std::vector<std::vector<I>>& indices, // Go through the values and indices to fill the sparse values. for (int i = 0; i < value_count; ++i) { - const std::vector<I>& index = indices[i]; + const std::vector<TI>& index = indices[i]; TFLITE_DCHECK_EQ(index.size(), 4); const T value = values[i]; output_data[Offset(output_dims, index[3], index[2], index[1], index[0])] = |