diff options
Diffstat (limited to 'tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py')
-rw-r--r-- | tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py index 9ff8a343f1..be53779826 100644 --- a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py +++ b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py @@ -736,7 +736,7 @@ class _BaseMonotonicAttentionMechanism(_BaseAttentionMechanism): """Base attention mechanism for monotonic attention. Simply overrides the initial_alignments function to provide a dirac - distribution,which is needed in order for the monotonic attention + distribution, which is needed in order for the monotonic attention distributions to have the correct behavior. """ @@ -763,7 +763,7 @@ class _BaseMonotonicAttentionMechanism(_BaseAttentionMechanism): class BahdanauMonotonicAttention(_BaseMonotonicAttentionMechanism): """Monotonic attention mechanism with Bahadanau-style energy function. - This type of attention encorces a monotonic constraint on the attention + This type of attention enforces a monotonic constraint on the attention distributions; that is once the model attends to a given point in the memory it can't attend to any prior points at subsequence output timesteps. It achieves this by using the _monotonic_probability_fn instead of softmax to @@ -867,7 +867,7 @@ class BahdanauMonotonicAttention(_BaseMonotonicAttentionMechanism): class LuongMonotonicAttention(_BaseMonotonicAttentionMechanism): """Monotonic attention mechanism with Luong-style energy function. - This type of attention encorces a monotonic constraint on the attention + This type of attention enforces a monotonic constraint on the attention distributions; that is once the model attends to a given point in the memory it can't attend to any prior points at subsequence output timesteps. It achieves this by using the _monotonic_probability_fn instead of softmax to @@ -1133,7 +1133,7 @@ class AttentionWrapper(rnn_cell_impl.RNNCell): output_attention: Python bool. If `True` (default), the output at each time step is the attention value. This is the behavior of Luong-style attention mechanisms. If `False`, the output at each time step is - the output of `cell`. This is the beahvior of Bhadanau-style + the output of `cell`. This is the behavior of Bhadanau-style attention mechanisms. In both cases, the `attention` tensor is propagated to the next time step via the state and is used there. This flag only controls whether the attention mechanism is propagated |