aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2018-05-31 15:11:26 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-05-31 15:13:58 -0700
commit269a4ed1c27251b55cffe578b7bd969ec5975487 (patch)
tree83dd602a71ad69b3fcb7b5ff5adc59c7adac3758 /tensorflow
parentf21816ecefe3f6e554d3b7daae3bb7f7a03bad20 (diff)
Internal change.
PiperOrigin-RevId: 198787391
Diffstat (limited to 'tensorflow')
-rw-r--r--tensorflow/contrib/lite/kernels/basic_rnn.cc41
-rw-r--r--tensorflow/contrib/lite/kernels/internal/kernel_utils.cc7
-rw-r--r--tensorflow/contrib/lite/kernels/internal/kernel_utils.h6
-rw-r--r--tensorflow/contrib/lite/kernels/unidirectional_sequence_rnn.cc41
4 files changed, 60 insertions, 35 deletions
diff --git a/tensorflow/contrib/lite/kernels/basic_rnn.cc b/tensorflow/contrib/lite/kernels/basic_rnn.cc
index 7dc0c5656d..c09b15b3d2 100644
--- a/tensorflow/contrib/lite/kernels/basic_rnn.cc
+++ b/tensorflow/contrib/lite/kernels/basic_rnn.cc
@@ -36,7 +36,7 @@ constexpr int kOutputTensor = 1;
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
auto* scratch_tensor_index = new int;
- context->AddTensors(context, /*tensors_to_add=*/2, scratch_tensor_index);
+ context->AddTensors(context, /*tensors_to_add=*/3, scratch_tensor_index);
return scratch_tensor_index;
}
@@ -91,7 +91,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
if (input->type == kTfLiteFloat32 && input_weights->type == kTfLiteUInt8) {
int* scratch_tensor_index = reinterpret_cast<int*>(node->user_data);
TfLiteIntArrayFree(node->temporaries);
- node->temporaries = TfLiteIntArrayCreate(2);
+ node->temporaries = TfLiteIntArrayCreate(3);
node->temporaries->data[0] = *scratch_tensor_index;
TfLiteTensor* input_quantized = GetTemporary(context, node, /*index=*/0);
input_quantized->type = kTfLiteUInt8;
@@ -114,6 +114,16 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
context->ResizeTensor(context, hidden_state_quantized,
hidden_state_quantized_size));
}
+ node->temporaries->data[2] = *scratch_tensor_index + 2;
+ TfLiteTensor* scaling_factors = GetTemporary(context, node, /*index=*/2);
+ scaling_factors->type = kTfLiteFloat32;
+ scaling_factors->allocation_type = kTfLiteArenaRw;
+ TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1);
+ scaling_factors_size->data[0] = batch_size;
+ if (!TfLiteIntArrayEqual(scaling_factors->dims, scaling_factors_size)) {
+ TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scaling_factors,
+ scaling_factors_size));
+ }
}
return kTfLiteOk;
@@ -145,14 +155,14 @@ TfLiteStatus EvalFloat(const TfLiteTensor* input,
return kTfLiteOk;
}
-TfLiteStatus EvalQuantized(const TfLiteTensor* input,
- const TfLiteTensor* input_weights,
- const TfLiteTensor* recurrent_weights,
- const TfLiteTensor* bias,
- const TfLiteRNNParams* params,
- TfLiteTensor* input_scratch,
- TfLiteTensor* hidden_state_scratch,
- TfLiteTensor* hidden_state, TfLiteTensor* output) {
+TfLiteStatus EvalHybrid(const TfLiteTensor* input,
+ const TfLiteTensor* input_weights,
+ const TfLiteTensor* recurrent_weights,
+ const TfLiteTensor* bias, const TfLiteRNNParams* params,
+ TfLiteTensor* input_scratch,
+ TfLiteTensor* hidden_state_scratch,
+ TfLiteTensor* scaling_factors,
+ TfLiteTensor* hidden_state, TfLiteTensor* output) {
const int batch_size = input->dims->data[0];
const int num_units = input_weights->dims->data[0];
const int input_size = input->dims->data[1];
@@ -176,12 +186,14 @@ TfLiteStatus EvalQuantized(const TfLiteTensor* input,
reinterpret_cast<int8_t*>(input_scratch->data.uint8);
int8_t* quantized_hidden_state_ptr =
reinterpret_cast<int8_t*>(hidden_state_scratch->data.uint8);
+ float* scaling_factors_ptr = scaling_factors->data.f;
kernel_utils::RnnBatchStep(
input_ptr_batch, input_weights_ptr, input_weights_scale,
recurrent_weights_ptr, recurrent_weights_scale, bias_ptr, input_size,
num_units, batch_size, params->activation, quantized_input_ptr,
- quantized_hidden_state_ptr, hidden_state_ptr_batch, output_ptr_batch);
+ quantized_hidden_state_ptr, scaling_factors_ptr, hidden_state_ptr_batch,
+ output_ptr_batch);
return kTfLiteOk;
}
@@ -205,9 +217,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
// TODO(mirkov): implement eval with quantized inputs as well.
TfLiteTensor* input_quantized = GetTemporary(context, node, 0);
TfLiteTensor* hidden_state_quantized = GetTemporary(context, node, 1);
- return EvalQuantized(input, input_weights, recurrent_weights, bias,
- params, input_quantized, hidden_state_quantized,
- hidden_state, output);
+ TfLiteTensor* scaling_factors = GetTemporary(context, node, 2);
+ return EvalHybrid(input, input_weights, recurrent_weights, bias, params,
+ input_quantized, hidden_state_quantized,
+ scaling_factors, hidden_state, output);
}
default:
context->ReportError(context, "Type %d not currently supported.",
diff --git a/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc b/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc
index 3bbaaa6a9d..67e3810479 100644
--- a/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc
+++ b/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc
@@ -52,7 +52,8 @@ void RnnBatchStep(const float* input_ptr_batch, const int8_t* input_weights_ptr,
TfLiteFusedActivation activation,
int8_t* quantized_input_ptr_batch,
int8_t* quantized_hidden_state_ptr_batch,
- float* hidden_state_ptr_batch, float* output_ptr_batch) {
+ float* scaling_factors, float* hidden_state_ptr_batch,
+ float* output_ptr_batch) {
// Output = bias
tensor_utils::VectorBatchVectorAssign(bias_ptr, num_units, batch_size,
output_ptr_batch);
@@ -62,7 +63,6 @@ void RnnBatchStep(const float* input_ptr_batch, const int8_t* input_weights_ptr,
// Quantize input from float to uint8 + quantization params (scaling
// factor).
float unused_min, unused_max;
- float* scaling_factors = new float[batch_size];
for (int b = 0; b < batch_size; ++b) {
const int offset = b * input_size;
tensor_utils::SymmetricQuantizeFloats(
@@ -76,7 +76,6 @@ void RnnBatchStep(const float* input_ptr_batch, const int8_t* input_weights_ptr,
tensor_utils::MatrixBatchVectorMultiplyAccumulate(
input_weights_ptr, num_units, input_size, quantized_input_ptr_batch,
scaling_factors, batch_size, output_ptr_batch, /*result_stride=*/1);
- delete[] scaling_factors;
}
// Save quantization and matmul computation for all zero input.
@@ -84,7 +83,6 @@ void RnnBatchStep(const float* input_ptr_batch, const int8_t* input_weights_ptr,
batch_size * num_units)) {
// Quantize hidden_state
float unused_min, unused_max;
- float* scaling_factors = new float[batch_size];
for (int b = 0; b < batch_size; ++b) {
const int offset = b * num_units;
tensor_utils::SymmetricQuantizeFloats(
@@ -99,7 +97,6 @@ void RnnBatchStep(const float* input_ptr_batch, const int8_t* input_weights_ptr,
recurrent_weights_ptr, num_units, num_units,
quantized_hidden_state_ptr_batch, scaling_factors, batch_size,
output_ptr_batch, /*result_stride=*/1);
- delete[] scaling_factors;
}
// Output = activation(Output) and update hidden_state
diff --git a/tensorflow/contrib/lite/kernels/internal/kernel_utils.h b/tensorflow/contrib/lite/kernels/internal/kernel_utils.h
index cbfbcbeefc..f3f42f0840 100644
--- a/tensorflow/contrib/lite/kernels/internal/kernel_utils.h
+++ b/tensorflow/contrib/lite/kernels/internal/kernel_utils.h
@@ -41,6 +41,9 @@ void RnnBatchStep(const float* input_ptr_batch, const float* input_weights_ptr,
// values of hidden_state_ptr_batch and input_ptr_batch, respectively.
// These temporary storages are expected to be preallocated to the same size as
// the respective pointers.
+// An additional preallocated temporary storage 'scaling_factors' (of size
+// batch_size) is used to store the scaling factors of the quantization (used
+// for recovery).
// {input,recurrent}_weights_scale params are used for dequantization/recovery.
void RnnBatchStep(const float* input_ptr_batch, const int8_t* input_weights_ptr,
float input_weights_scale,
@@ -50,7 +53,8 @@ void RnnBatchStep(const float* input_ptr_batch, const int8_t* input_weights_ptr,
TfLiteFusedActivation activation,
int8_t* quantized_input_ptr_batch,
int8_t* quantized_hidden_state_ptr_batch,
- float* hidden_state_ptr_batch, float* output_ptr_batch);
+ float* scaling_factors, float* hidden_state_ptr_batch,
+ float* output_ptr_batch);
// Performs an LSTM batch inference step for input specified by input_ptr_batch.
// The LSTM cell is specified by the pointers to its weights (*_weights_ptr) and
diff --git a/tensorflow/contrib/lite/kernels/unidirectional_sequence_rnn.cc b/tensorflow/contrib/lite/kernels/unidirectional_sequence_rnn.cc
index 8429dba54b..164a0cbd08 100644
--- a/tensorflow/contrib/lite/kernels/unidirectional_sequence_rnn.cc
+++ b/tensorflow/contrib/lite/kernels/unidirectional_sequence_rnn.cc
@@ -41,7 +41,7 @@ constexpr int kOutputTensor = 1;
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
auto* scratch_tensor_index = new int;
- context->AddTensors(context, /*tensors_to_add=*/2, scratch_tensor_index);
+ context->AddTensors(context, /*tensors_to_add=*/3, scratch_tensor_index);
return scratch_tensor_index;
}
@@ -102,7 +102,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
if (input->type == kTfLiteFloat32 && input_weights->type == kTfLiteUInt8) {
int* scratch_tensor_index = reinterpret_cast<int*>(node->user_data);
TfLiteIntArrayFree(node->temporaries);
- node->temporaries = TfLiteIntArrayCreate(2);
+ node->temporaries = TfLiteIntArrayCreate(3);
node->temporaries->data[0] = *scratch_tensor_index;
TfLiteTensor* input_quantized = GetTemporary(context, node, /*index=*/0);
input_quantized->type = kTfLiteUInt8;
@@ -125,6 +125,16 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
context->ResizeTensor(context, hidden_state_quantized,
hidden_state_quantized_size));
}
+ node->temporaries->data[2] = *scratch_tensor_index + 2;
+ TfLiteTensor* scaling_factors = GetTemporary(context, node, /*index=*/2);
+ scaling_factors->type = kTfLiteFloat32;
+ scaling_factors->allocation_type = kTfLiteArenaRw;
+ TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1);
+ scaling_factors_size->data[0] = batch_size;
+ if (!TfLiteIntArrayEqual(scaling_factors->dims, scaling_factors_size)) {
+ TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scaling_factors,
+ scaling_factors_size));
+ }
}
return kTfLiteOk;
}
@@ -187,14 +197,12 @@ TfLiteStatus EvalFloat(const TfLiteTensor* input,
return kTfLiteOk;
}
-TfLiteStatus EvalQuantized(const TfLiteTensor* input,
- const TfLiteTensor* input_weights,
- const TfLiteTensor* recurrent_weights,
- const TfLiteTensor* bias,
- const TfLiteSequenceRNNParams* params,
- TfLiteTensor* input_scratch,
- TfLiteTensor* hidden_state_scratch,
- TfLiteTensor* hidden_state, TfLiteTensor* output) {
+TfLiteStatus EvalHybrid(
+ const TfLiteTensor* input, const TfLiteTensor* input_weights,
+ const TfLiteTensor* recurrent_weights, const TfLiteTensor* bias,
+ const TfLiteSequenceRNNParams* params, TfLiteTensor* input_scratch,
+ TfLiteTensor* hidden_state_scratch, TfLiteTensor* scaling_factors,
+ TfLiteTensor* hidden_state, TfLiteTensor* output) {
const bool time_major = params->time_major;
const int batch_size =
(time_major) ? input->dims->data[1] : input->dims->data[0];
@@ -218,6 +226,7 @@ TfLiteStatus EvalQuantized(const TfLiteTensor* input,
reinterpret_cast<int8_t*>(input_scratch->data.uint8);
int8_t* quantized_hidden_state_ptr =
reinterpret_cast<int8_t*>(hidden_state_scratch->data.uint8);
+ float* scaling_factors_ptr = scaling_factors->data.f;
if (time_major) {
// Initialize the pointer to hidden state.
@@ -233,7 +242,8 @@ TfLiteStatus EvalQuantized(const TfLiteTensor* input,
input_ptr_batch, input_weights_ptr, input_weights_scale,
recurrent_weights_ptr, recurrent_weights_scale, bias_ptr, input_size,
num_units, batch_size, params->activation, quantized_input_ptr,
- quantized_hidden_state_ptr, hidden_state_ptr_batch, output_ptr_batch);
+ quantized_hidden_state_ptr, scaling_factors_ptr,
+ hidden_state_ptr_batch, output_ptr_batch);
}
} else {
// For each batch
@@ -252,7 +262,7 @@ TfLiteStatus EvalQuantized(const TfLiteTensor* input,
recurrent_weights_ptr, recurrent_weights_scale, bias_ptr,
input_size, num_units, /*batch_size=*/1, params->activation,
quantized_input_ptr, quantized_hidden_state_ptr,
- hidden_state_ptr_batch, output_ptr_batch);
+ scaling_factors_ptr, hidden_state_ptr_batch, output_ptr_batch);
}
}
}
@@ -278,9 +288,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
// TODO(mirkov): implement eval with quantized inputs as well.
TfLiteTensor* input_quantized = GetTemporary(context, node, 0);
TfLiteTensor* hidden_state_quantized = GetTemporary(context, node, 1);
- return EvalQuantized(input, input_weights, recurrent_weights, bias,
- params, input_quantized, hidden_state_quantized,
- hidden_state, output);
+ TfLiteTensor* scaling_factors = GetTemporary(context, node, 2);
+ return EvalHybrid(input, input_weights, recurrent_weights, bias, params,
+ input_quantized, hidden_state_quantized,
+ scaling_factors, hidden_state, output);
}
default:
context->ReportError(context, "Type %d not currently supported.",