diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2018-06-29 13:28:40 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2018-06-29 13:31:21 -0700 |
commit | d3c0598ff7650b9d25eb01f792a6106428163df5 (patch) | |
tree | cd2a2e35cc743a8273bd9b34f019b425dbf85e6d /tensorflow/contrib/lite/kernels | |
parent | f95b37f6d1b5fd3c9ef71d974763c2248f690b54 (diff) |
Use the same convention for the scale parameter in hybrid ops as well.
PiperOrigin-RevId: 202698287
Diffstat (limited to 'tensorflow/contrib/lite/kernels')
7 files changed, 22 insertions, 18 deletions
diff --git a/tensorflow/contrib/lite/kernels/embedding_lookup.cc b/tensorflow/contrib/lite/kernels/embedding_lookup.cc index 9410bead5e..0ba170a4da 100644 --- a/tensorflow/contrib/lite/kernels/embedding_lookup.cc +++ b/tensorflow/contrib/lite/kernels/embedding_lookup.cc @@ -94,7 +94,7 @@ TfLiteStatus EvalHybrid(TfLiteContext* context, TfLiteNode* node, const TfLiteTensor* lookup, const TfLiteTensor* value, TfLiteTensor* output) { const int row_size = SizeOfDimension(value, 0); - const double scaling_factor = 1.0 / value->params.scale; + const double scaling_factor = value->params.scale; // col_size after we flatten tensor into 2D. int col_size = 1; diff --git a/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc b/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc index 36c25388e8..a0e382edb6 100644 --- a/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc +++ b/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc @@ -416,7 +416,7 @@ void LstmStep( if (!use_cifg) { if (use_peephole && !is_cell_state_all_zeros) { VectorMultiply(cell_to_input_weights_ptr, n_cell, - 1. / cell_to_input_weights_scale, recovered_cell_weights); + cell_to_input_weights_scale, recovered_cell_weights); tensor_utils::VectorBatchVectorCwiseProductAccumulate( recovered_cell_weights, n_cell, cell_state_ptr, n_batch, input_gate_scratch); @@ -428,7 +428,7 @@ void LstmStep( // For each batch and cell: update forget gate. if (use_peephole && !is_cell_state_all_zeros) { VectorMultiply(cell_to_forget_weights_ptr, n_cell, - 1. / cell_to_forget_weights_scale, recovered_cell_weights); + cell_to_forget_weights_scale, recovered_cell_weights); tensor_utils::VectorBatchVectorCwiseProductAccumulate( recovered_cell_weights, n_cell, cell_state_ptr, n_batch, forget_gate_scratch); @@ -460,7 +460,7 @@ void LstmStep( // For each batch and cell: update the output gate. if (use_peephole && !is_cell_state_all_zeros) { VectorMultiply(cell_to_output_weights_ptr, n_cell, - 1. / cell_to_output_weights_scale, recovered_cell_weights); + cell_to_output_weights_scale, recovered_cell_weights); tensor_utils::VectorBatchVectorCwiseProductAccumulate( recovered_cell_weights, n_cell, cell_state_ptr, n_batch, output_gate_scratch); diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.cc b/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.cc index 38ad32c734..5ba7e2af9b 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.cc +++ b/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.cc @@ -162,7 +162,7 @@ void NeonMatrixBatchVectorMultiplyAccumulate( int batch, row, col; for (batch = 0; batch < n_batch; ++batch) { - const float batch_scaling_factor_inv = 1.0 / scaling_factors[batch]; + const float batch_scaling_factor = scaling_factors[batch]; // Copy the vector data to an aligned vector. memcpy(aligned_vec, vectors + batch * m_cols, sizeof(int8) * m_cols); // Compute dot-product for every column. @@ -232,7 +232,7 @@ void NeonMatrixBatchVectorMultiplyAccumulate( int32 neon_sum = vgetq_lane_s64(pairwiseAdded, 0) + vgetq_lane_s64(pairwiseAdded, 1); - *result += ((neon_sum + postable_sum) * batch_scaling_factor_inv); + *result += ((neon_sum + postable_sum) * batch_scaling_factor); } // for row } // for batch @@ -418,13 +418,14 @@ void NeonSymmetricQuantizeFloats(const float* values, const int size, *scaling_factor = 1; return; } - *scaling_factor = kScale / range; + *scaling_factor = range / kScale; + const float scaling_factor_inv = 1.0f / *scaling_factor; const int postamble_start = size - (size & (2 * kFloatWeightsPerNeonLane - 1)); // Vectorized constants. - const float32x4_t q_factor_f32x4 = vmovq_n_f32(*scaling_factor); + const float32x4_t q_factor_f32x4 = vmovq_n_f32(scaling_factor_inv); const float32x4_t point5_f32x4 = vmovq_n_f32(0.5); const float32x4_t zero_f32x4 = vmovq_n_f32(0.0); const int32x4_t scale_i32x4 = vmovq_n_s32(kScale); @@ -476,7 +477,7 @@ void NeonSymmetricQuantizeFloats(const float* values, const int size, for (int i = postamble_start; i < size; ++i) { const int32 quantized_value = - static_cast<int32>(TfLiteRound(*scaling_factor * values[i])); + static_cast<int32>(TfLiteRound(scaling_factor_inv * values[i])); quantized_values[i] = std::min(kScale, std::max(-kScale, quantized_value)); } } diff --git a/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc b/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc index f8c6f341f7..ccf112c990 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc +++ b/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc @@ -51,10 +51,11 @@ void PortableSymmetricQuantizeFloats(const float* values, const int size, *scaling_factor = 1; return; } - *scaling_factor = kScale / range; + *scaling_factor = range / kScale; + const float scaling_factor_inv = 1.0f / *scaling_factor; for (int i = 0; i < size; ++i) { const int32_t quantized_value = - static_cast<int32_t>(TfLiteRound(*scaling_factor * values[i])); + static_cast<int32_t>(TfLiteRound(values[i] * scaling_factor_inv)); // Clamp: just in case some odd numeric offset. quantized_values[i] = std::min(kScale, std::max(-kScale, quantized_value)); } @@ -85,7 +86,7 @@ void PortableMatrixBatchVectorMultiplyAccumulate( float* __restrict__ result, int result_stride) { int batch, row, col; for (batch = 0; batch < n_batch; ++batch, vectors += m_cols) { - const float batch_scaling_factor_inv = 1.0 / scaling_factors[batch]; + const float batch_scaling_factor = scaling_factors[batch]; // Get the address of the first row. const int8_t* row_ptr = matrix; for (row = 0; row < m_rows; ++row, result += result_stride) { @@ -98,7 +99,7 @@ void PortableMatrixBatchVectorMultiplyAccumulate( for (col = 0; col < m_cols; ++col, ++row_ptr) { dotprod += (*row_ptr) * (vectors[col]); } // for col - *result += (dotprod * batch_scaling_factor_inv); + *result += (dotprod * batch_scaling_factor); } // for row } // for batch } diff --git a/tensorflow/contrib/lite/kernels/internal/tensor_utils_test.cc b/tensorflow/contrib/lite/kernels/internal/tensor_utils_test.cc index 14ee528394..aa0d49ae4d 100644 --- a/tensorflow/contrib/lite/kernels/internal/tensor_utils_test.cc +++ b/tensorflow/contrib/lite/kernels/internal/tensor_utils_test.cc @@ -63,7 +63,8 @@ TEST(uKernels, SymmetricQuantizeFloatsTest) { EXPECT_EQ(min, -640); EXPECT_EQ(max, 1000); - EXPECT_NEAR(scaling_factor, 0.127, 1e-6); // EQ won't work due to fpoint. + // EQ won't work due to fpoint. + EXPECT_NEAR(scaling_factor, 1000 / 127.0, 1e-6); EXPECT_THAT(output, testing::ElementsAreArray({-81, -81, -80, 1, 0, -1, -1, 0, 127})); } @@ -95,7 +96,7 @@ TEST(uKernels, SymmetricQuantizeFloatsAllAlmostZeroTest) { EXPECT_NEAR(min, -9e-05, 1e-6); EXPECT_NEAR(max, 0.0002, 1e-6); - EXPECT_EQ(scaling_factor, 635000); + EXPECT_NEAR(scaling_factor, 1.57e-6, 1e-6); EXPECT_THAT(output, testing::ElementsAreArray({-6, 19, -4, -57, 1, 25, 6, 127, 0})); } diff --git a/tensorflow/contrib/lite/kernels/svdf.cc b/tensorflow/contrib/lite/kernels/svdf.cc index 43ac3a2ce8..22eebdd4ce 100644 --- a/tensorflow/contrib/lite/kernels/svdf.cc +++ b/tensorflow/contrib/lite/kernels/svdf.cc @@ -382,11 +382,12 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { // the Eval function. // TODO(alanchiao): refactor logic out into dequantize function. if (!op_data->float_weights_time_initialized) { - const float inv_scale = 1.0 / weights_time->params.scale; + const float dequantization_scale = weights_time->params.scale; const int8_t* weights_time_ptr = reinterpret_cast<int8_t*>(weights_time->data.uint8); for (int i = 0; i < NumElements(float_weights_time); ++i) { - float_weights_time->data.f[i] = weights_time_ptr[i] * inv_scale; + float_weights_time->data.f[i] = + weights_time_ptr[i] * dequantization_scale; } op_data->float_weights_time_initialized = true; } diff --git a/tensorflow/contrib/lite/kernels/svdf_test.cc b/tensorflow/contrib/lite/kernels/svdf_test.cc index 06df509d32..5af3ff8500 100644 --- a/tensorflow/contrib/lite/kernels/svdf_test.cc +++ b/tensorflow/contrib/lite/kernels/svdf_test.cc @@ -342,7 +342,7 @@ TEST_F(SVDFOpTest, BlackBoxTestHybridRank1) { svdf.ResetState(); VerifyGoldens(svdf_input, svdf_golden_output_rank_1, sizeof(svdf_input), &svdf, - /*tolerance=*/0.00294435); + /*tolerance=*/0.002945); } TEST_F(SVDFOpTest, BlackBoxTestHybridRank2) { |