aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2018-06-29 13:28:40 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-06-29 13:31:21 -0700
commitd3c0598ff7650b9d25eb01f792a6106428163df5 (patch)
treecd2a2e35cc743a8273bd9b34f019b425dbf85e6d
parentf95b37f6d1b5fd3c9ef71d974763c2248f690b54 (diff)
Use the same convention for the scale parameter in hybrid ops as well.
PiperOrigin-RevId: 202698287
-rw-r--r--tensorflow/contrib/lite/kernels/embedding_lookup.cc2
-rw-r--r--tensorflow/contrib/lite/kernels/internal/kernel_utils.cc6
-rw-r--r--tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.cc11
-rw-r--r--tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc9
-rw-r--r--tensorflow/contrib/lite/kernels/internal/tensor_utils_test.cc5
-rw-r--r--tensorflow/contrib/lite/kernels/svdf.cc5
-rw-r--r--tensorflow/contrib/lite/kernels/svdf_test.cc2
7 files changed, 22 insertions, 18 deletions
diff --git a/tensorflow/contrib/lite/kernels/embedding_lookup.cc b/tensorflow/contrib/lite/kernels/embedding_lookup.cc
index 9410bead5e..0ba170a4da 100644
--- a/tensorflow/contrib/lite/kernels/embedding_lookup.cc
+++ b/tensorflow/contrib/lite/kernels/embedding_lookup.cc
@@ -94,7 +94,7 @@ TfLiteStatus EvalHybrid(TfLiteContext* context, TfLiteNode* node,
const TfLiteTensor* lookup, const TfLiteTensor* value,
TfLiteTensor* output) {
const int row_size = SizeOfDimension(value, 0);
- const double scaling_factor = 1.0 / value->params.scale;
+ const double scaling_factor = value->params.scale;
// col_size after we flatten tensor into 2D.
int col_size = 1;
diff --git a/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc b/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc
index 36c25388e8..a0e382edb6 100644
--- a/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc
+++ b/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc
@@ -416,7 +416,7 @@ void LstmStep(
if (!use_cifg) {
if (use_peephole && !is_cell_state_all_zeros) {
VectorMultiply(cell_to_input_weights_ptr, n_cell,
- 1. / cell_to_input_weights_scale, recovered_cell_weights);
+ cell_to_input_weights_scale, recovered_cell_weights);
tensor_utils::VectorBatchVectorCwiseProductAccumulate(
recovered_cell_weights, n_cell, cell_state_ptr, n_batch,
input_gate_scratch);
@@ -428,7 +428,7 @@ void LstmStep(
// For each batch and cell: update forget gate.
if (use_peephole && !is_cell_state_all_zeros) {
VectorMultiply(cell_to_forget_weights_ptr, n_cell,
- 1. / cell_to_forget_weights_scale, recovered_cell_weights);
+ cell_to_forget_weights_scale, recovered_cell_weights);
tensor_utils::VectorBatchVectorCwiseProductAccumulate(
recovered_cell_weights, n_cell, cell_state_ptr, n_batch,
forget_gate_scratch);
@@ -460,7 +460,7 @@ void LstmStep(
// For each batch and cell: update the output gate.
if (use_peephole && !is_cell_state_all_zeros) {
VectorMultiply(cell_to_output_weights_ptr, n_cell,
- 1. / cell_to_output_weights_scale, recovered_cell_weights);
+ cell_to_output_weights_scale, recovered_cell_weights);
tensor_utils::VectorBatchVectorCwiseProductAccumulate(
recovered_cell_weights, n_cell, cell_state_ptr, n_batch,
output_gate_scratch);
diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.cc b/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.cc
index 38ad32c734..5ba7e2af9b 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.cc
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.cc
@@ -162,7 +162,7 @@ void NeonMatrixBatchVectorMultiplyAccumulate(
int batch, row, col;
for (batch = 0; batch < n_batch; ++batch) {
- const float batch_scaling_factor_inv = 1.0 / scaling_factors[batch];
+ const float batch_scaling_factor = scaling_factors[batch];
// Copy the vector data to an aligned vector.
memcpy(aligned_vec, vectors + batch * m_cols, sizeof(int8) * m_cols);
// Compute dot-product for every column.
@@ -232,7 +232,7 @@ void NeonMatrixBatchVectorMultiplyAccumulate(
int32 neon_sum =
vgetq_lane_s64(pairwiseAdded, 0) + vgetq_lane_s64(pairwiseAdded, 1);
- *result += ((neon_sum + postable_sum) * batch_scaling_factor_inv);
+ *result += ((neon_sum + postable_sum) * batch_scaling_factor);
} // for row
} // for batch
@@ -418,13 +418,14 @@ void NeonSymmetricQuantizeFloats(const float* values, const int size,
*scaling_factor = 1;
return;
}
- *scaling_factor = kScale / range;
+ *scaling_factor = range / kScale;
+ const float scaling_factor_inv = 1.0f / *scaling_factor;
const int postamble_start =
size - (size & (2 * kFloatWeightsPerNeonLane - 1));
// Vectorized constants.
- const float32x4_t q_factor_f32x4 = vmovq_n_f32(*scaling_factor);
+ const float32x4_t q_factor_f32x4 = vmovq_n_f32(scaling_factor_inv);
const float32x4_t point5_f32x4 = vmovq_n_f32(0.5);
const float32x4_t zero_f32x4 = vmovq_n_f32(0.0);
const int32x4_t scale_i32x4 = vmovq_n_s32(kScale);
@@ -476,7 +477,7 @@ void NeonSymmetricQuantizeFloats(const float* values, const int size,
for (int i = postamble_start; i < size; ++i) {
const int32 quantized_value =
- static_cast<int32>(TfLiteRound(*scaling_factor * values[i]));
+ static_cast<int32>(TfLiteRound(scaling_factor_inv * values[i]));
quantized_values[i] = std::min(kScale, std::max(-kScale, quantized_value));
}
}
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc b/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc
index f8c6f341f7..ccf112c990 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc
+++ b/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc
@@ -51,10 +51,11 @@ void PortableSymmetricQuantizeFloats(const float* values, const int size,
*scaling_factor = 1;
return;
}
- *scaling_factor = kScale / range;
+ *scaling_factor = range / kScale;
+ const float scaling_factor_inv = 1.0f / *scaling_factor;
for (int i = 0; i < size; ++i) {
const int32_t quantized_value =
- static_cast<int32_t>(TfLiteRound(*scaling_factor * values[i]));
+ static_cast<int32_t>(TfLiteRound(values[i] * scaling_factor_inv));
// Clamp: just in case some odd numeric offset.
quantized_values[i] = std::min(kScale, std::max(-kScale, quantized_value));
}
@@ -85,7 +86,7 @@ void PortableMatrixBatchVectorMultiplyAccumulate(
float* __restrict__ result, int result_stride) {
int batch, row, col;
for (batch = 0; batch < n_batch; ++batch, vectors += m_cols) {
- const float batch_scaling_factor_inv = 1.0 / scaling_factors[batch];
+ const float batch_scaling_factor = scaling_factors[batch];
// Get the address of the first row.
const int8_t* row_ptr = matrix;
for (row = 0; row < m_rows; ++row, result += result_stride) {
@@ -98,7 +99,7 @@ void PortableMatrixBatchVectorMultiplyAccumulate(
for (col = 0; col < m_cols; ++col, ++row_ptr) {
dotprod += (*row_ptr) * (vectors[col]);
} // for col
- *result += (dotprod * batch_scaling_factor_inv);
+ *result += (dotprod * batch_scaling_factor);
} // for row
} // for batch
}
diff --git a/tensorflow/contrib/lite/kernels/internal/tensor_utils_test.cc b/tensorflow/contrib/lite/kernels/internal/tensor_utils_test.cc
index 14ee528394..aa0d49ae4d 100644
--- a/tensorflow/contrib/lite/kernels/internal/tensor_utils_test.cc
+++ b/tensorflow/contrib/lite/kernels/internal/tensor_utils_test.cc
@@ -63,7 +63,8 @@ TEST(uKernels, SymmetricQuantizeFloatsTest) {
EXPECT_EQ(min, -640);
EXPECT_EQ(max, 1000);
- EXPECT_NEAR(scaling_factor, 0.127, 1e-6); // EQ won't work due to fpoint.
+ // EQ won't work due to fpoint.
+ EXPECT_NEAR(scaling_factor, 1000 / 127.0, 1e-6);
EXPECT_THAT(output,
testing::ElementsAreArray({-81, -81, -80, 1, 0, -1, -1, 0, 127}));
}
@@ -95,7 +96,7 @@ TEST(uKernels, SymmetricQuantizeFloatsAllAlmostZeroTest) {
EXPECT_NEAR(min, -9e-05, 1e-6);
EXPECT_NEAR(max, 0.0002, 1e-6);
- EXPECT_EQ(scaling_factor, 635000);
+ EXPECT_NEAR(scaling_factor, 1.57e-6, 1e-6);
EXPECT_THAT(output,
testing::ElementsAreArray({-6, 19, -4, -57, 1, 25, 6, 127, 0}));
}
diff --git a/tensorflow/contrib/lite/kernels/svdf.cc b/tensorflow/contrib/lite/kernels/svdf.cc
index 43ac3a2ce8..22eebdd4ce 100644
--- a/tensorflow/contrib/lite/kernels/svdf.cc
+++ b/tensorflow/contrib/lite/kernels/svdf.cc
@@ -382,11 +382,12 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
// the Eval function.
// TODO(alanchiao): refactor logic out into dequantize function.
if (!op_data->float_weights_time_initialized) {
- const float inv_scale = 1.0 / weights_time->params.scale;
+ const float dequantization_scale = weights_time->params.scale;
const int8_t* weights_time_ptr =
reinterpret_cast<int8_t*>(weights_time->data.uint8);
for (int i = 0; i < NumElements(float_weights_time); ++i) {
- float_weights_time->data.f[i] = weights_time_ptr[i] * inv_scale;
+ float_weights_time->data.f[i] =
+ weights_time_ptr[i] * dequantization_scale;
}
op_data->float_weights_time_initialized = true;
}
diff --git a/tensorflow/contrib/lite/kernels/svdf_test.cc b/tensorflow/contrib/lite/kernels/svdf_test.cc
index 06df509d32..5af3ff8500 100644
--- a/tensorflow/contrib/lite/kernels/svdf_test.cc
+++ b/tensorflow/contrib/lite/kernels/svdf_test.cc
@@ -342,7 +342,7 @@ TEST_F(SVDFOpTest, BlackBoxTestHybridRank1) {
svdf.ResetState();
VerifyGoldens(svdf_input, svdf_golden_output_rank_1, sizeof(svdf_input),
&svdf,
- /*tolerance=*/0.00294435);
+ /*tolerance=*/0.002945);
}
TEST_F(SVDFOpTest, BlackBoxTestHybridRank2) {