Add the option of merging bidirectional RNN and LSTM outputs into a single output tensor.

This is useful if the output of both directions will be passed to the next layer as a single output, as it avoids adding a concatenation op, which can be expensive on mobile devices where memory movement is relatively expensive. PiperOrigin-RevId: 215616140
author: A. Unique TensorFlower <gardener@tensorflow.org> 2018-10-03 13:25:22 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2018-10-03 13:32:42 -0700
commit: c2c8cfe22492cf7fab804d32283b623632270035 (patch)
tree: 6003bf547117f97cd65ed598c4cec39cba7d5510 /tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm_test.cc
parent: 7566f3d5ad690c71c36e78611b1ae5913ec3e845 (diff)
1 files changed, 175 insertions, 11 deletions
diff --git a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm_test.cc b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm_test.cc
index 74ba8021c2..9cc04907e1 100644
--- a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm_test.cc
+++ b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm_test.cc
@@ -35,8 +35,8 @@ class BidirectionalLSTMOpModel : public SingleOpModel {
   BidirectionalLSTMOpModel(int n_batch, int n_input, int n_cell, int n_output,
                            int sequence_length, bool use_cifg,
                            bool use_peephole, bool use_projection_weights,
-                           bool use_projection_bias, float cell_clip,
-                           float proj_clip,
+                           bool use_projection_bias, bool merge_outputs,
+                           float cell_clip, float proj_clip,
                            const std::vector<std::vector<int>>& input_shapes)
       : n_batch_(n_batch),
         n_input_(n_input),
@@ -175,7 +175,9 @@ class BidirectionalLSTMOpModel : public SingleOpModel {
 
     fw_output_ = AddOutput(TensorType_FLOAT32);
 
-    bw_output_ = AddOutput(TensorType_FLOAT32);
+    if (!merge_outputs) {
+      bw_output_ = AddOutput(TensorType_FLOAT32);
+    }
 
     aux_input_ = AddNullInput();
     fw_aux_input_to_input_weights_ = AddNullInput();
@@ -188,9 +190,10 @@ class BidirectionalLSTMOpModel : public SingleOpModel {
     bw_aux_input_to_output_weights_ = AddNullInput();
 
     SetBuiltinOp(BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM,
-                 BuiltinOptions_LSTMOptions,
-                 CreateLSTMOptions(builder_, ActivationFunctionType_TANH,
-                                   cell_clip, proj_clip)
+                 BuiltinOptions_BidirectionalSequenceLSTMOptions,
+                 CreateBidirectionalSequenceLSTMOptions(
+                     builder_, ActivationFunctionType_TANH, cell_clip,
+                     proj_clip, merge_outputs)
                      .Union());
     BuildInterpreter(input_shapes);
   }
@@ -380,7 +383,8 @@ TEST(LSTMOpTest, BlackBoxTestNoCifgNoPeepholeNoProjectionNoClipping) {
   BidirectionalLSTMOpModel lstm(
       n_batch, n_input, n_cell, n_output, sequence_length, /*use_cifg=*/false,
       /*use_peephole=*/false, /*use_projection_weights=*/false,
-      /*use_projection_bias=*/false, /*cell_clip=*/0.0, /*proj_clip=*/0.0,
+      /*use_projection_bias=*/false, /*merge_outputs=*/false, /*cell_clip=*/0.0,
+      /*proj_clip=*/0.0,
       {
           {sequence_length, n_batch, n_input},  // input tensor
 
@@ -526,6 +530,162 @@ TEST(LSTMOpTest, BlackBoxTestNoCifgNoPeepholeNoProjectionNoClipping) {
               ElementsAreArray(ArrayFloatNear(bw_expected)));
 }
 
+// Same as the previous test, yet with a single merged output tensor.
+TEST(LSTMOpTest, BlackBoxTestMergedOutput) {
+  const int n_batch = 1;
+  const int n_input = 2;
+  // n_cell and n_output have the same size when there is no projection.
+  const int n_cell = 4;
+  const int n_output = 4;
+  const int sequence_length = 3;
+
+  BidirectionalLSTMOpModel lstm(
+      n_batch, n_input, n_cell, n_output, sequence_length, /*use_cifg=*/false,
+      /*use_peephole=*/false, /*use_projection_weights=*/false,
+      /*use_projection_bias=*/false, /*merge_outputs=*/true, /*cell_clip=*/0.0,
+      /*proj_clip=*/0.0,
+      {
+          {sequence_length, n_batch, n_input},  // input tensor
+
+          // Forward cell
+          {n_cell, n_input},  // input_to_input_weight tensor
+          {n_cell, n_input},  // input_to_forget_weight tensor
+          {n_cell, n_input},  // input_to_cell_weight tensor
+          {n_cell, n_input},  // input_to_output_weight tensor
+
+          {n_cell, n_output},  // recurrent_to_input_weight tensor
+          {n_cell, n_output},  // recurrent_to_forget_weight tensor
+          {n_cell, n_output},  // recurrent_to_cell_weight tensor
+          {n_cell, n_output},  // recurrent_to_output_weight tensor
+
+          {0},  // cell_to_input_weight tensor
+          {0},  // cell_to_forget_weight tensor
+          {0},  // cell_to_output_weight tensor
+
+          {n_cell},  // input_gate_bias tensor
+          {n_cell},  // forget_gate_bias tensor
+          {n_cell},  // cell_bias tensor
+          {n_cell},  // output_gate_bias tensor
+
+          {0, 0},  // projection_weight tensor
+          {0},     // projection_bias tensor
+
+          // Backward cell
+          {n_cell, n_input},  // input_to_input_weight tensor
+          {n_cell, n_input},  // input_to_forget_weight tensor
+          {n_cell, n_input},  // input_to_cell_weight tensor
+          {n_cell, n_input},  // input_to_output_weight tensor
+
+          {n_cell, n_output},  // recurrent_to_input_weight tensor
+          {n_cell, n_output},  // recurrent_to_forget_weight tensor
+          {n_cell, n_output},  // recurrent_to_cell_weight tensor
+          {n_cell, n_output},  // recurrent_to_output_weight tensor
+
+          {0},  // cell_to_input_weight tensor
+          {0},  // cell_to_forget_weight tensor
+          {0},  // cell_to_output_weight tensor
+
+          {n_cell},  // input_gate_bias tensor
+          {n_cell},  // forget_gate_bias tensor
+          {n_cell},  // cell_bias tensor
+          {n_cell},  // output_gate_bias tensor
+
+          {0, 0},  // projection_weight tensor
+          {0},     // projection_bias tensor
+
+          {n_batch, n_output},  // activation_state tensor
+          {n_batch, n_cell},    // cell_state tensor
+
+          {n_batch, n_output},  // activation_state tensor
+          {n_batch, n_cell},    // cell_state tensor
+
+          {n_batch, sequence_length, 0},  // aux_input tensor
+          {n_cell, 0},                    // aux_fw_input_to_input tensor
+          {n_cell, 0},                    // aux_fw_input_to_forget tensor
+          {n_cell, 0},                    // aux_fw_input_to_cell tensor
+          {n_cell, 0},                    // aux_fw_input_to_output tensor
+          {n_cell, 0},                    // aux_bw_input_to_input tensor
+          {n_cell, 0},                    // aux_bw_input_to_forget tensor
+          {n_cell, 0},                    // aux_bw_input_to_cell tensor
+          {n_cell, 0},                    // aux_bw_input_to_output tensor
+      });
+
+  lstm.SetInputToInputWeights({-0.45018822, -0.02338299, -0.0870589,
+                               -0.34550029, 0.04266912, -0.15680569,
+                               -0.34856534, 0.43890524});
+
+  lstm.SetInputToCellWeights({-0.50013041, 0.1370284, 0.11810488, 0.2013163,
+                              -0.20583314, 0.44344562, 0.22077113,
+                              -0.29909778});
+
+  lstm.SetInputToForgetWeights({0.09701663, 0.20334584, -0.50592935,
+                                -0.31343272, -0.40032279, 0.44781327,
+                                0.01387155, -0.35593212});
+
+  lstm.SetInputToOutputWeights({-0.25065863, -0.28290087, 0.04613829,
+                                0.40525138, 0.44272184, 0.03897077, -0.1556896,
+                                0.19487578});
+
+  lstm.SetInputGateBias({0., 0., 0., 0.});
+
+  lstm.SetCellBias({0., 0., 0., 0.});
+
+  lstm.SetForgetGateBias({1., 1., 1., 1.});
+
+  lstm.SetOutputGateBias({0., 0., 0., 0.});
+
+  lstm.SetRecurrentToInputWeights(
+      {-0.0063535, -0.2042388, 0.31454784, -0.35746509, 0.28902304, 0.08183324,
+       -0.16555229, 0.02286911, -0.13566875, 0.03034258, 0.48091322,
+       -0.12528998, 0.24077177, -0.51332325, -0.33502164, 0.10629296});
+
+  lstm.SetRecurrentToCellWeights(
+      {-0.3407414, 0.24443203, -0.2078532, 0.26320225, 0.05695659, -0.00123841,
+       -0.4744786, -0.35869038, -0.06418842, -0.13502428, -0.501764, 0.22830659,
+       -0.46367589, 0.26016325, -0.03894562, -0.16368064});
+
+  lstm.SetRecurrentToForgetWeights(
+      {-0.48684245, -0.06655136, 0.42224967, 0.2112639, 0.27654213, 0.20864892,
+       -0.07646349, 0.45877004, 0.00141793, -0.14609534, 0.36447752, 0.09196436,
+       0.28053468, 0.01560611, -0.20127171, -0.01140004});
+
+  lstm.SetRecurrentToOutputWeights(
+      {0.43385774, -0.17194885, 0.2718237, 0.09215671, 0.24107647, -0.39835793,
+       0.18212086, 0.01301402, 0.48572797, -0.50656658, 0.20047462, -0.20607421,
+       -0.51818722, -0.15390486, 0.0468148, 0.39922136});
+
+  // Input should have n_input * sequence_length many values.
+  static float lstm_input[] = {2., 3., 3., 4., 1., 1.};
+  static float lstm_fw_golden_output[] = {
+      -0.02973187, 0.1229473,  0.20885126, -0.15358765,
+      -0.03716109, 0.12507336, 0.41193449, -0.20860538,
+      -0.15053082, 0.09120187, 0.24278517, -0.12222792};
+  static float lstm_bw_golden_output[] = {
+      -0.0806187, 0.139077, 0.400476,   -0.197842, -0.0332076, 0.123838,
+      0.309777,   -0.17621, -0.0490733, 0.0739237, 0.067706,   -0.0208124};
+
+  float* batch0_start = lstm_input;
+  float* batch0_end = batch0_start + lstm.num_inputs() * lstm.sequence_length();
+
+  lstm.SetInput(0, batch0_start, batch0_end);
+
+  lstm.Invoke();
+
+  std::vector<float> merged_expected;
+  for (int k = 0; k < lstm.sequence_length(); k++) {
+    merged_expected.insert(
+        merged_expected.end(),
+        lstm_fw_golden_output + k * lstm.num_fw_outputs(),
+        lstm_fw_golden_output + (k + 1) * lstm.num_fw_outputs());
+    merged_expected.insert(
+        merged_expected.end(),
+        lstm_bw_golden_output + k * lstm.num_bw_outputs(),
+        lstm_bw_golden_output + (k + 1) * lstm.num_bw_outputs());
+  }
+  EXPECT_THAT(lstm.GetFwOutput(),
+              ElementsAreArray(ArrayFloatNear(merged_expected)));
+}
+
 TEST(LSTMOpTest, BlackBoxTestNoCifgNoPeepholeNoProjectionNoClippingReverse) {
   const int n_batch = 1;
   const int n_input = 2;
@@ -537,7 +697,8 @@ TEST(LSTMOpTest, BlackBoxTestNoCifgNoPeepholeNoProjectionNoClippingReverse) {
   BidirectionalLSTMOpModel lstm(
       n_batch, n_input, n_cell, n_output, sequence_length, /*use_cifg=*/false,
       /*use_peephole=*/false, /*use_projection_weights=*/false,
-      /*use_projection_bias=*/false, /*cell_clip=*/0.0, /*proj_clip=*/0.0,
+      /*use_projection_bias=*/false, /*merge_outputs=*/false, /*cell_clip=*/0.0,
+      /*proj_clip=*/0.0,
       {
           {sequence_length, n_batch, n_input},  // input tensor
 
@@ -696,7 +857,8 @@ TEST(LSTMOpTest, BlackBoxTestWithCifgWithPeepholeNoProjectionNoClipping) {
   BidirectionalLSTMOpModel lstm(
       n_batch, n_input, n_cell, n_output, sequence_length, /*use_cifg=*/true,
       /*use_peephole=*/true, /*use_projection_weights=*/false,
-      /*use_projection_bias=*/false, /*cell_clip=*/0.0, /*proj_clip=*/0.0,
+      /*use_projection_bias=*/false, /*merge_outputs=*/false, /*cell_clip=*/0.0,
+      /*proj_clip=*/0.0,
       {
           {sequence_length, n_batch, n_input},  // input tensor
 
@@ -845,7 +1007,8 @@ TEST(LSTMOpTest,
   BidirectionalLSTMOpModel lstm(
       n_batch, n_input, n_cell, n_output, sequence_length, /*use_cifg=*/true,
       /*use_peephole=*/true, /*use_projection_weights=*/false,
-      /*use_projection_bias=*/false, /*cell_clip=*/0.0, /*proj_clip=*/0.0,
+      /*use_projection_bias=*/false, /*merge_outputs=*/false, /*cell_clip=*/0.0,
+      /*proj_clip=*/0.0,
       {
           {sequence_length, n_batch, n_input},  // input tensor
 
@@ -994,7 +1157,8 @@ TEST(LSTMOpTest, BlackBoxTestWithPeepholeWithProjectionNoClipping) {
   BidirectionalLSTMOpModel lstm(
       n_batch, n_input, n_cell, n_output, sequence_length, /*use_cifg=*/false,
       /*use_peephole=*/true, /*use_projection_weights=*/true,
-      /*use_projection_bias=*/false, /*cell_clip=*/0.0, /*proj_clip=*/0.0,
+      /*use_projection_bias=*/false, /*merge_outputs=*/false, /*cell_clip=*/0.0,
+      /*proj_clip=*/0.0,
       {
           {sequence_length, n_batch, n_input},  // input tensor
author	A. Unique TensorFlower <gardener@tensorflow.org>	2018-10-03 13:25:22 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2018-10-03 13:32:42 -0700
commit	c2c8cfe22492cf7fab804d32283b623632270035 (patch)
tree	6003bf547117f97cd65ed598c4cec39cba7d5510 /tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm_test.cc
parent	7566f3d5ad690c71c36e78611b1ae5913ec3e845 (diff)