aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/contrib/lite/kernels/internal
diff options
context:
space:
mode:
authorGravatar Jared Duke <jdduke@google.com>2018-08-06 10:42:41 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-08-06 10:48:09 -0700
commitb9d6339ccbad7a8266400f69b84ba394574cd105 (patch)
treeacbf2b05cd12264fecd50b81fd8e75f6b4ddade0 /tensorflow/contrib/lite/kernels/internal
parentf3ed7f7e836da4f0ca1cb04cadce938744932b72 (diff)
Fix more issues with TFLite compilation on Windows
PiperOrigin-RevId: 207569516
Diffstat (limited to 'tensorflow/contrib/lite/kernels/internal')
-rw-r--r--tensorflow/contrib/lite/kernels/internal/common.h4
-rw-r--r--tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h9
-rw-r--r--tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc12
-rw-r--r--tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h11
4 files changed, 23 insertions, 13 deletions
diff --git a/tensorflow/contrib/lite/kernels/internal/common.h b/tensorflow/contrib/lite/kernels/internal/common.h
index 310a8980e6..eb4d0108bd 100644
--- a/tensorflow/contrib/lite/kernels/internal/common.h
+++ b/tensorflow/contrib/lite/kernels/internal/common.h
@@ -117,6 +117,9 @@ template <typename T>
int CountLeadingZeros(T integer_input) {
static_assert(std::is_unsigned<T>::value,
"Only unsigned integer types handled.");
+#if defined(__GNUC__)
+ return integer_input ? __builtin_clz(integer_input) : 0;
+#else
const T one_in_leading_positive = static_cast<T>(1)
<< (std::numeric_limits<T>::digits - 1);
int leading_zeros = 0;
@@ -125,6 +128,7 @@ int CountLeadingZeros(T integer_input) {
++leading_zeros;
}
return leading_zeros;
+#endif
}
// DO NOT USE THIS STRUCT FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING
diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
index ebb2c7a8eb..6adb879c71 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
@@ -2326,7 +2326,8 @@ inline void GetInvSqrtQuantizedMultiplierExp(int32 input,
++*output_shift;
}
TFLITE_DCHECK_GT(input, 0);
- const unsigned max_left_shift_bits = __builtin_clz(input) - 1;
+ const unsigned max_left_shift_bits =
+ CountLeadingZeros(static_cast<uint32>(input)) - 1;
const unsigned max_left_shift_bit_pairs = max_left_shift_bits / 2;
const unsigned left_shift_bit_pairs = max_left_shift_bit_pairs - 1;
*output_shift -= left_shift_bit_pairs;
@@ -4034,7 +4035,7 @@ inline void Softmax(const uint8* input_data, const RuntimeShape& input_shape,
// perform a division by the above-computed sum-of-exponentials.
int32 fixed_sum_of_exps = sum_of_exps.raw();
int headroom_plus_one =
- __builtin_clz(static_cast<uint32>(fixed_sum_of_exps));
+ CountLeadingZeros(static_cast<uint32>(fixed_sum_of_exps));
// This is the number of bits to the left of the binary point above 1.0.
// Consider fixed_sum_of_exps=1.25. In that case shifted_scale=0.8 and
// no later adjustment will be needed.
@@ -4180,7 +4181,7 @@ log_x_for_x_greater_than_or_equal_to_1_impl(
// required shift "ourselves" instead of using, say, Rescale.
FixedPoint0 z_a = FixedPoint0::FromRaw(input_val.raw());
// z_a_pow_2 = input_integer_bits - z_a_headroom;
- int z_a_headroom_plus_1 = __builtin_clz(static_cast<uint32>(z_a.raw()));
+ int z_a_headroom_plus_1 = CountLeadingZeros(static_cast<uint32>(z_a.raw()));
FixedPoint0 r_a_tmp =
SaturatingRoundingMultiplyByPOTParam(z_a, (z_a_headroom_plus_1 - 1));
const int32 r_a_raw =
@@ -4195,7 +4196,7 @@ log_x_for_x_greater_than_or_equal_to_1_impl(
// z_b is treated like z_a, but premultiplying by sqrt(0.5).
FixedPoint0 z_b = z_a * sqrt_half;
- int z_b_headroom = __builtin_clz(static_cast<uint32>(z_b.raw())) - 1;
+ int z_b_headroom = CountLeadingZeros(static_cast<uint32>(z_b.raw())) - 1;
const int32 r_b_raw =
SaturatingRoundingMultiplyByPOTParam(z_a.raw(), z_b_headroom);
const FixedPointAccum z_b_pow_2_adj = SaturatingSub(
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc b/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc
index 6bd88b5596..e6ccd7a32c 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc
+++ b/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc
@@ -21,6 +21,10 @@ limitations under the License.
#include "tensorflow/contrib/lite/kernels/internal/round.h"
#include "tensorflow/contrib/lite/kernels/op_macros.h"
+#if defined(_MSC_VER)
+#define __restrict__ __restrict
+#endif
+
namespace tflite {
namespace tensor_utils {
@@ -38,10 +42,8 @@ bool PortableIsZeroVector(const float* vector, int v_size) {
}
void PortableSymmetricQuantizeFloats(const float* values, const int size,
- int8_t* quantized_values,
- float* __restrict__ min_value,
- float* __restrict__ max_value,
- float* __restrict__ scaling_factor) {
+ int8_t* quantized_values, float* min_value,
+ float* max_value, float* scaling_factor) {
auto minmax = std::minmax_element(values, values + size);
*min_value = *minmax.first;
*max_value = *minmax.second;
@@ -93,9 +95,11 @@ void PortableMatrixBatchVectorMultiplyAccumulate(
for (row = 0; row < m_rows; ++row, result += result_stride) {
// Initialize the dot product sum for the row to 0.
int32_t dotprod = 0;
+#if defined(__GNUC__)
// Prefetch the row to cache.
__builtin_prefetch(row_ptr, 0 /* prefetch for read */,
3 /* temporal locality */);
+#endif
// For every block of 16 8-bit elements (128-bit register) from each row.
for (col = 0; col < m_cols; ++col, ++row_ptr) {
dotprod += (*row_ptr) * (vectors[col]);
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
index 2b20e79021..7eb6fe34bc 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
@@ -903,7 +903,8 @@ inline void GetInvSqrtQuantizedMultiplierExp(int32 input,
++*output_shift;
}
TFLITE_DCHECK_GT(input, 0);
- const unsigned max_left_shift_bits = __builtin_clz(input) - 1;
+ const unsigned max_left_shift_bits =
+ CountLeadingZeros(static_cast<uint32>(input)) - 1;
const unsigned max_left_shift_bit_pairs = max_left_shift_bits / 2;
const unsigned left_shift_bit_pairs = max_left_shift_bit_pairs - 1;
*output_shift -= left_shift_bit_pairs;
@@ -4190,8 +4191,8 @@ inline void RankOneSelect(const D* input_condition_data,
}
// For easy implementation, the indices is always a vector of size-4 vectors.
-template <typename T, typename I>
-inline void SparseToDense(const std::vector<std::vector<I>>& indices,
+template <typename T, typename TI>
+inline void SparseToDense(const std::vector<std::vector<TI>>& indices,
const T* values, T default_value, T* output_data,
const Dims<4>& output_dims, bool value_is_scalar) {
const int value_count = indices.size();
@@ -4206,7 +4207,7 @@ inline void SparseToDense(const std::vector<std::vector<I>>& indices,
// condition within the loop every time.
if (value_is_scalar) {
for (int i = 0; i < value_count; ++i) {
- const std::vector<I>& index = indices[i];
+ const std::vector<TI>& index = indices[i];
TFLITE_DCHECK_EQ(index.size(), 4);
const T value = *values; // just use the first value.
output_data[Offset(output_dims, index[3], index[2], index[1], index[0])] =
@@ -4217,7 +4218,7 @@ inline void SparseToDense(const std::vector<std::vector<I>>& indices,
// Go through the values and indices to fill the sparse values.
for (int i = 0; i < value_count; ++i) {
- const std::vector<I>& index = indices[i];
+ const std::vector<TI>& index = indices[i];
TFLITE_DCHECK_EQ(index.size(), 4);
const T value = values[i];
output_data[Offset(output_dims, index[3], index[2], index[1], index[0])] =