diff options
author | Michael Case <mikecase@google.com> | 2018-04-10 18:44:13 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2018-04-10 18:46:38 -0700 |
commit | 5ad9e4588874f30d0d079acc60e07f2eddc0480f (patch) | |
tree | ab800846cc505d867b2961578869aec97eeb81a3 /tensorflow/contrib/seq2seq | |
parent | fad74785d12ea7463e5d0474522cd7d754699656 (diff) |
Merge changes from github.
PiperOrigin-RevId: 192388250
Diffstat (limited to 'tensorflow/contrib/seq2seq')
-rw-r--r-- | tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py | 36 | ||||
-rw-r--r-- | tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py | 3 |
2 files changed, 38 insertions, 1 deletions
diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py index 07b3ad71d4..d508cf3f9d 100644 --- a/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py +++ b/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py @@ -353,6 +353,42 @@ class AttentionWrapperTest(test.TestCase): attention_mechanism_depth=9, name='testLuongNotNormalized') + def testLuongScaledDType(self): + # Test case for GitHub issue 18099 + for dtype in [np.float16, np.float32, np.float64]: + num_units = 128 + encoder_outputs = array_ops.placeholder(dtype, shape=[64, None, 256]) + encoder_sequence_length = array_ops.placeholder(dtypes.int32, shape=[64]) + decoder_inputs = array_ops.placeholder(dtype, shape=[64, None, 128]) + decoder_sequence_length = array_ops.placeholder(dtypes.int32, shape=[64]) + batch_size = 64 + attention_mechanism = wrapper.LuongAttention( + num_units=num_units, + memory=encoder_outputs, + memory_sequence_length=encoder_sequence_length, + scale=True, + dtype=dtype, + ) + cell = rnn_cell.LSTMCell(num_units) + cell = wrapper.AttentionWrapper(cell, attention_mechanism) + + helper = helper_py.TrainingHelper(decoder_inputs, + decoder_sequence_length) + my_decoder = basic_decoder.BasicDecoder( + cell=cell, + helper=helper, + initial_state=cell.zero_state( + dtype=dtype, batch_size=batch_size)) + + final_outputs, final_state, _ = decoder.dynamic_decode(my_decoder) + self.assertTrue( + isinstance(final_outputs, basic_decoder.BasicDecoderOutput)) + self.assertEqual(final_outputs.rnn_output.dtype, dtype) + self.assertTrue( + isinstance(final_state, wrapper.AttentionWrapperState)) + self.assertTrue( + isinstance(final_state.cell_state, rnn_cell.LSTMStateTuple)) + def testLuongScaled(self): create_attention_mechanism = functools.partial( wrapper.LuongAttention, scale=True) diff --git a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py index be53779826..9e0d69593f 100644 --- a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py +++ b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py @@ -339,7 +339,8 @@ def _luong_score(query, keys, scale): if scale: # Scalar used in weight scaling g = variable_scope.get_variable( - "attention_g", dtype=dtype, initializer=1.) + "attention_g", dtype=dtype, + initializer=init_ops.ones_initializer, shape=()) score = g * score return score |