From 9cc5098388295fabde838433ccfc5c418f4c327a Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Thu, 16 Mar 2017 18:24:44 -0800 Subject: Initial cut of documentation for tf.contrib.seq2seq Change: 150400474 --- tensorflow/contrib/seq2seq/README.md | 9 -- tensorflow/contrib/seq2seq/__init__.py | 14 +-- .../python/ops/dynamic_attention_wrapper.py | 4 +- .../docs_src/api_guides/python/contrib.seq2seq.md | 108 +++++++++++++++++++++ 4 files changed, 114 insertions(+), 21 deletions(-) delete mode 100644 tensorflow/contrib/seq2seq/README.md create mode 100644 tensorflow/docs_src/api_guides/python/contrib.seq2seq.md diff --git a/tensorflow/contrib/seq2seq/README.md b/tensorflow/contrib/seq2seq/README.md deleted file mode 100644 index 50ac32ec15..0000000000 --- a/tensorflow/contrib/seq2seq/README.md +++ /dev/null @@ -1,9 +0,0 @@ -# TensorFlow contrib seq2seq layers and losses - -## Layers - -Information to be added. - -## Losses - -Information to be added. diff --git a/tensorflow/contrib/seq2seq/__init__.py b/tensorflow/contrib/seq2seq/__init__.py index f4fc557085..29bce7bbae 100644 --- a/tensorflow/contrib/seq2seq/__init__.py +++ b/tensorflow/contrib/seq2seq/__init__.py @@ -15,15 +15,14 @@ """Ops for building neural network seq2seq decoders and losses. -## Decoder base class and functions +See the @{$python/contrib.seq2seq} guide. + @@Decoder @@dynamic_decode -## Basic Decoder @@BasicDecoderOutput @@BasicDecoder -## Decoder Helpers @@Helper @@CustomHelper @@GreedyEmbeddingHelper @@ -31,16 +30,11 @@ @@ScheduledOutputTrainingHelper @@TrainingHelper -## Attention - -### Scorers -@@BahdanauScorer -@@LuongScorer +@@BahdanauAttention +@@LuongAttention -### Helper functions @@hardmax -### RNNCells @@DynamicAttentionWrapperState @@DynamicAttentionWrapper """ diff --git a/tensorflow/contrib/seq2seq/python/ops/dynamic_attention_wrapper.py b/tensorflow/contrib/seq2seq/python/ops/dynamic_attention_wrapper.py index 0f6bb72fff..94678629c8 100644 --- a/tensorflow/contrib/seq2seq/python/ops/dynamic_attention_wrapper.py +++ b/tensorflow/contrib/seq2seq/python/ops/dynamic_attention_wrapper.py @@ -425,8 +425,8 @@ class DynamicAttentionWrapper(core_rnn_cell.RNNCell): cell_input_fn: (optional) A `callable`. The default is: `lambda inputs, attention: array_ops.concat([inputs, attention], -1)`. probability_fn: (optional) A `callable`. Converts the score to - probabilities. The default is `tf.nn.softmax`. Other options include - `tf.contrib.seq2seq.hardmax` and `tf.contrib.sparsemax.sparsemax`. + probabilities. The default is @{tf.nn.softmax}. Other options include + @{tf.contrib.seq2seq.hardmax} and @{tf.contrib.sparsemax.sparsemax}. output_attention: Python bool. If `True` (default), the output at each time step is the attention value. This is the behavior of Luong-style attention mechanisms. If `False`, the output at each time step is diff --git a/tensorflow/docs_src/api_guides/python/contrib.seq2seq.md b/tensorflow/docs_src/api_guides/python/contrib.seq2seq.md new file mode 100644 index 0000000000..223bf4a0a3 --- /dev/null +++ b/tensorflow/docs_src/api_guides/python/contrib.seq2seq.md @@ -0,0 +1,108 @@ +# Seq2seq Library (contrib) +[TOC] + +Module for constructing seq2seq models and dynamic decoding. Builds on top of +libraries in @{tf.contrib.rnn}. + +This library is composed of two primary components: + +* New attention wrappers for @{tf.contrib.rnn.RNNCell} objects. +* A new object-oriented dynamic decoding framework. + +## Attention + +Attention wrappers are `RNNCell` objects that wrap other `RNNCell` objects and +implement attention. The form of attention is determined by a subclass of +@{tf.contrib.seq2seq.AttentionMechanism}. These subclasses describe the form +of attention (e.g. additive vs. multiplicative) to use when creating the +wrapper. An instance of an `AttentionMechanism` is constructed with a +`memory` tensor, from which lookup keys and values tensors are created. + +### Attention Mechanisms + +The two basic attention mechanisms are: +* @{tf.contrib.seq2seq.BahdanauAttention} (additive attention, + [ref.](https://arxiv.org/abs/1409.0473)) +* @{tf.contrib.seq2seq.LuongAttention} (multiplicative attention, + [ref.](https://arxiv.org/abs/1508.04025)) + +The `memory` tensor passed the attention mechanism's constructor is expected to +be shaped `[batch_size, memory_max_time, memory_depth]`; and often an additional +`memory_sequence_length` vector is accepted. If provided, the `memory` +tensors' rows are masked with zeros past their true sequence lengths. + +Attention mechanisms also have a concept of depth, usually determined as a +construction parameter `num_units`. For some kinds of attention (like +`BahdanauAttention`), both queries and memory are projected to tensors of depth +`num_units`. For other kinds (like `LuongAttention`), `num_units` should match +the depth of the queries; and the `memory` tensor will be projected to this +depth. + +### Attention Wrappers + +The basic attention wrapper is @{tf.contrib.seq2seq.DynamicAttentionWrapper}. +This wrapper accepts an `RNNCell` instance, an instance of `AttentionMechanism`, +and an attention depth parameter (`attention_size`); as well as several +optional arguments that allow one to customize intermediate calculations. + +At each time step, the basic calculation performed by this wrapper is: + +```python +cell_inputs = concat([inputs, prev_state.attention], -1) +cell_output, next_cell_state = cell(cell_inputs, prev_state.cell_state) +score = attention_mechanism(cell_output) +alignments = softmax(score) +context = matmul(alignments, attention_mechanism.values) +attention = tf.layers.Dense(attention_size)(concat([cell_output, context], 1)) +next_state = DynamicAttentionWrapperState( + cell_state=next_cell_state, + attention=attention) +output = attention +return output, next_state +``` + +In practice, a number of the intermediate calculations are configurable. +For example, the initial concatenation of `inputs` and `prev_state.attention` +can be replaced with another mixing function. The function `softmax` can +be replaced with alternative options when calculating `alignments` from the +`score`. Finally, the outputs returned by the wrapper can be configured to +be the value `cell_output` instead of `attention`. + +The benefit of using a `DynamicAttentionWrapper` is that it plays nicely with +other wrappers and the dynamic decoder described below. For example, one can +write: + +```python +cell = tf.contrib.rnn.DeviceWrapper(LSTMCell(512), "/gpu:0") +attention_mechanism = tf.contrib.seq2seq.LuongAttention(512, encoder_outputs) +attn_cell = tf.contrib.seq2seq.DynamicAttentionWrapper( + cell, attention_mechanism, attention_size=256) +attn_cell = tf.contrib.rnn.DeviceWrapper(attn_cell, "/gpu:1") +top_cell = tf.contrib.rnn.DeviceWrapper(LSTMCell(512), "/gpu:1") +multi_cell = MultiRNNCell([attn_cell, top_cell]) +``` + +The `multi_rnn` cell will perform the bottom layer calculations on GPU 0; +attention calculations will be performed on GPU 1 and immediately passed +up to the top layer which is also calculated on GPU 1. The attention is +also passed forward in time to the next time step and copied to GPU 0 for the +next time step of `cell`. (*Note*: This is just an example of use, +not a suggested device partitioning strategy.) + +## Dynamic Decoding + +### Decoder base class and functions +* @{tf.contrib.seq2seq.Decoder} +* @{tf.contrib.seq2seq.dynamic_decode} + +### Basic Decoder +* @{tf.contrib.seq2seq.BasicDecoderOutput} +* @{tf.contrib.seq2seq.BasicDecoder} + +### Decoder Helpers +* @{tf.contrib.seq2seq.Helper} +* @{tf.contrib.seq2seq.CustomHelper} +* @{tf.contrib.seq2seq.GreedyEmbeddingHelper} +* @{tf.contrib.seq2seq.ScheduledEmbeddingTrainingHelper} +* @{tf.contrib.seq2seq.ScheduledOutputTrainingHelper} +* @{tf.contrib.seq2seq.TrainingHelper} -- cgit v1.2.3