1 files changed, 434 insertions, 0 deletions
diff --git a/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py b/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py
new file mode 100644
index 0000000000..152431d1b2
--- /dev/null
+++ b/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py
@@ -0,0 +1,434 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Deep Neural Network estimators with layer annotations."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import contextlib
+import pickle
+
+from google.protobuf.any_pb2 import Any
+
+from tensorflow.python.estimator import estimator
+from tensorflow.python.estimator import model_fn
+from tensorflow.python.estimator.canned import dnn
+from tensorflow.python.feature_column import feature_column as feature_column_lib
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import nn
+from tensorflow.python.ops.losses import losses
+from tensorflow.python.saved_model import utils as saved_model_utils
+
+
+class LayerAnnotationsCollectionNames(object):
+  """Names for the collections containing the annotations."""
+
+  UNPROCESSED_FEATURES = 'layer_annotations/unprocessed_features'
+  PROCESSED_FEATURES = 'layer_annotatons/processed_features'
+  FEATURE_COLUMNS = 'layer_annotations/feature_columns'
+
+  @classmethod
+  def keys(cls, collection_name):
+    return '%s/keys' % collection_name
+
+  @classmethod
+  def values(cls, collection_name):
+    return '%s/values' % collection_name
+
+
+def serialize_feature_column(feature_column):
+  if isinstance(feature_column, feature_column_lib._EmbeddingColumn):  # pylint: disable=protected-access
+    # We can't pickle nested functions, and we don't need the value of
+    # layer_creator in most cases anyway, so just discard its value.
+    args = feature_column._asdict()
+    args['layer_creator'] = None
+    temp = type(feature_column)(**args)
+    return pickle.dumps(temp)
+  return pickle.dumps(feature_column)
+
+
+def _to_any_wrapped_tensor_info(tensor):
+  """Converts a `Tensor` to a `TensorInfo` wrapped in a proto `Any`."""
+  any_buf = Any()
+  tensor_info = saved_model_utils.build_tensor_info(tensor)
+  any_buf.Pack(tensor_info)
+  return any_buf
+
+
+def make_input_layer_with_layer_annotations(original_input_layer, mode):
+  """Make an input_layer replacement function that adds layer annotations."""
+
+  def input_layer_with_layer_annotations(features,
+                                         feature_columns,
+                                         weight_collections=None,
+                                         trainable=True,
+                                         cols_to_vars=None,
+                                         cols_to_output_tensors=None):
+    """Returns a dense `Tensor` as input layer based on given `feature_columns`.
+
+    Generally a single example in training data is described with
+    FeatureColumns.
+    At the first layer of the model, this column oriented data should be
+    converted
+    to a single `Tensor`.
+
+    This is like tf.feature_column.input_layer, except with added
+    Integrated-Gradient annotations.
+
+    Args:
+      features: A mapping from key to tensors. `_FeatureColumn`s look up via
+        these keys. For example `numeric_column('price')` will look at 'price'
+        key in this dict. Values can be a `SparseTensor` or a `Tensor` depends
+        on corresponding `_FeatureColumn`.
+      feature_columns: An iterable containing the FeatureColumns to use as
+        inputs to your model. All items should be instances of classes derived
+        from `_DenseColumn` such as `numeric_column`, `embedding_column`,
+        `bucketized_column`, `indicator_column`. If you have categorical
+        features, you can wrap them with an `embedding_column` or
+        `indicator_column`.
+      weight_collections: A list of collection names to which the Variable will
+        be added. Note that variables will also be added to collections
+        `tf.GraphKeys.GLOBAL_VARIABLES` and `ops.GraphKeys.MODEL_VARIABLES`.
+      trainable: If `True` also add the variable to the graph collection
+        `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
+      cols_to_vars: If not `None`, must be a dictionary that will be filled with
+        a mapping from `_FeatureColumn` to list of `Variable`s.  For example,
+        after the call, we might have cols_to_vars = {_EmbeddingColumn(
+        categorical_column=_HashedCategoricalColumn( key='sparse_feature',
+        hash_bucket_size=5, dtype=tf.string), dimension=10): [<tf.Variable
+        'some_variable:0' shape=(5, 10), <tf.Variable 'some_variable:1'
+          shape=(5, 10)]} If a column creates no variables, its value will be an
+          empty list.
+      cols_to_output_tensors: If not `None`, must be a dictionary that will be
+        filled with a mapping from '_FeatureColumn' to the associated output
+        `Tensor`s.
+
+    Returns:
+      A `Tensor` which represents input layer of a model. Its shape
+      is (batch_size, first_layer_dimension) and its dtype is `float32`.
+      first_layer_dimension is determined based on given `feature_columns`.
+
+    Raises:
+      ValueError: features and feature_columns have different lengths.
+    """
+
+    local_cols_to_output_tensors = {}
+    input_layer = original_input_layer(
+        features=features,
+        feature_columns=feature_columns,
+        weight_collections=weight_collections,
+        trainable=trainable,
+        cols_to_vars=cols_to_vars,
+        cols_to_output_tensors=local_cols_to_output_tensors)
+
+    if cols_to_output_tensors is not None:
+      cols_to_output_tensors = local_cols_to_output_tensors
+
+    if mode and mode == model_fn.ModeKeys.PREDICT:
+      # Only annotate in PREDICT mode.
+
+      # Annotate features.
+      # These are the parsed Tensors, before embedding.
+
+      # Only annotate features used by FeatureColumns.
+      # We figure which ones are used by FeatureColumns by creating a parsing
+      # spec and looking at the keys.
+      spec = feature_column_lib.make_parse_example_spec(feature_columns)
+      for key in spec.keys():
+        tensor = features[key]
+        ops.add_to_collection(
+            LayerAnnotationsCollectionNames.keys(
+                LayerAnnotationsCollectionNames.UNPROCESSED_FEATURES), key)
+        ops.add_to_collection(
+            LayerAnnotationsCollectionNames.values(
+                LayerAnnotationsCollectionNames.UNPROCESSED_FEATURES),
+            _to_any_wrapped_tensor_info(tensor))
+
+      # Annotate feature columns.
+      for column in feature_columns:
+        # TODO(cyfoo): Find a better way to serialize and deserialize
+        # _FeatureColumn.
+        ops.add_to_collection(LayerAnnotationsCollectionNames.FEATURE_COLUMNS,
+                              serialize_feature_column(column))
+
+      for column, tensor in local_cols_to_output_tensors.items():
+        ops.add_to_collection(
+            LayerAnnotationsCollectionNames.keys(
+                LayerAnnotationsCollectionNames.PROCESSED_FEATURES),
+            column.name)
+        ops.add_to_collection(
+            LayerAnnotationsCollectionNames.values(
+                LayerAnnotationsCollectionNames.PROCESSED_FEATURES),
+            _to_any_wrapped_tensor_info(tensor))
+
+    return input_layer
+
+  return input_layer_with_layer_annotations
+
+
+@contextlib.contextmanager
+def _monkey_patch(module, function, replacement):
+  old_function = getattr(module, function)
+  setattr(module, function, replacement)
+  yield
+  setattr(module, function, old_function)
+
+
+def DNNClassifierWithLayerAnnotations(  # pylint: disable=invalid-name
+    hidden_units,
+    feature_columns,
+    model_dir=None,
+    n_classes=2,
+    weight_column=None,
+    label_vocabulary=None,
+    optimizer='Adagrad',
+    activation_fn=nn.relu,
+    dropout=None,
+    input_layer_partitioner=None,
+    config=None,
+    warm_start_from=None,
+    loss_reduction=losses.Reduction.SUM):
+  """A classifier for TensorFlow DNN models with layer annotations.
+
+  This classifier is fuctionally identical to estimator.DNNClassifier as far as
+  training and evaluating models is concerned. The key difference is that this
+  classifier adds additional layer annotations, which can be used for computing
+  Integrated Gradients.
+
+  Integrated Gradients is a method for attributing a classifier's predictions
+  to its input features (https://arxiv.org/pdf/1703.01365.pdf). Given an input
+  instance, the method assigns attribution scores to individual features in
+  proportion to the feature's importance to the classifier's prediction.
+
+  See estimator.DNNClassifer for example code for training and evaluating models
+  using this classifier.
+
+  This classifier is checkpoint-compatible with estimator.DNNClassifier and
+  therefore the following should work seamlessly:
+
+  # Instantiate ordinary estimator as usual.
+  estimator = tf.estimator.DNNClassifier(
+    config, feature_columns, hidden_units, ...)
+
+  # Train estimator, export checkpoint.
+  tf.estimator.train_and_evaluate(estimator, ...)
+
+  # Instantiate estimator with annotations with the same configuration as the
+  # ordinary estimator.
+  estimator_with_annotations = (
+    tf.contrib.estimator.DNNClassifierWithLayerAnnotations(
+      config, feature_columns, hidden_units, ...))
+
+  # Call export_savedmodel with the same arguments as the ordinary estimator,
+  # using the checkpoint produced for the ordinary estimator.
+  estimator_with_annotations.export_saved_model(
+    export_dir_base, serving_input_receiver, ...
+    checkpoint_path='/path/to/ordinary/estimator/checkpoint/model.ckpt-1234')
+
+  Args:
+    hidden_units: Iterable of number hidden units per layer. All layers are
+      fully connected. Ex. `[64, 32]` means first layer has 64 nodes and second
+      one has 32.
+    feature_columns: An iterable containing all the feature columns used by the
+      model. All items in the set should be instances of classes derived from
+      `_FeatureColumn`.
+    model_dir: Directory to save model parameters, graph and etc. This can also
+      be used to load checkpoints from the directory into a estimator to
+      continue training a previously saved model.
+    n_classes: Number of label classes. Defaults to 2, namely binary
+      classification. Must be > 1.
+    weight_column: A string or a `_NumericColumn` created by
+      `tf.feature_column.numeric_column` defining feature column representing
+      weights. It is used to down weight or boost examples during training. It
+      will be multiplied by the loss of the example. If it is a string, it is
+      used as a key to fetch weight tensor from the `features`. If it is a
+      `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, then
+      weight_column.normalizer_fn is applied on it to get weight tensor.
+    label_vocabulary: A list of strings represents possible label values. If
+      given, labels must be string type and have any value in
+      `label_vocabulary`. If it is not given, that means labels are already
+      encoded as integer or float within [0, 1] for `n_classes=2` and encoded as
+      integer values in {0, 1,..., n_classes-1} for `n_classes`>2 . Also there
+      will be errors if vocabulary is not provided and labels are string.
+    optimizer: An instance of `tf.Optimizer` used to train the model. Defaults
+      to Adagrad optimizer.
+    activation_fn: Activation function applied to each layer. If `None`, will
+      use `tf.nn.relu`.
+    dropout: When not `None`, the probability we will drop out a given
+      coordinate.
+    input_layer_partitioner: Optional. Partitioner for input layer. Defaults to
+      `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
+    config: `RunConfig` object to configure the runtime settings.
+    warm_start_from: A string filepath to a checkpoint to warm-start from, or a
+      `WarmStartSettings` object to fully configure warm-starting.  If the
+      string filepath is provided instead of a `WarmStartSettings`, then all
+      weights are warm-started, and it is assumed that vocabularies and Tensor
+      names are unchanged.
+    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
+      reduce training loss over batch. Defaults to `SUM`.
+
+  Returns:
+    DNNClassifier with layer annotations.
+  """
+
+  original = dnn.DNNClassifier(
+      hidden_units=hidden_units,
+      feature_columns=feature_columns,
+      model_dir=model_dir,
+      n_classes=n_classes,
+      weight_column=weight_column,
+      label_vocabulary=label_vocabulary,
+      optimizer=optimizer,
+      activation_fn=activation_fn,
+      dropout=dropout,
+      input_layer_partitioner=input_layer_partitioner,
+      config=config,
+      warm_start_from=warm_start_from,
+      loss_reduction=loss_reduction)
+
+  def _model_fn(features, labels, mode, config):
+    with _monkey_patch(
+        feature_column_lib, 'input_layer',
+        make_input_layer_with_layer_annotations(feature_column_lib.input_layer,
+                                                mode)):
+      return original.model_fn(features, labels, mode, config)
+
+  return estimator.Estimator(
+      model_fn=_model_fn,
+      model_dir=model_dir,
+      config=config,
+      warm_start_from=warm_start_from)
+
+
+def DNNRegressorWithLayerAnnotations(  # pylint: disable=invalid-name
+    hidden_units,
+    feature_columns,
+    model_dir=None,
+    label_dimension=1,
+    weight_column=None,
+    optimizer='Adagrad',
+    activation_fn=nn.relu,
+    dropout=None,
+    input_layer_partitioner=None,
+    config=None,
+    warm_start_from=None,
+    loss_reduction=losses.Reduction.SUM,
+):
+  """A regressor for TensorFlow DNN models with layer annotations.
+
+  This regressor is fuctionally identical to estimator.DNNRegressor as far as
+  training and evaluating models is concerned. The key difference is that this
+  classifier adds additional layer annotations, which can be used for computing
+  Integrated Gradients.
+
+  Integrated Gradients is a method for attributing a classifier's predictions
+  to its input features (https://arxiv.org/pdf/1703.01365.pdf). Given an input
+  instance, the method assigns attribution scores to individual features in
+  proportion to the feature's importance to the classifier's prediction.
+
+  See estimator.DNNRegressor for example code for training and evaluating models
+  using this regressor.
+
+  This regressor is checkpoint-compatible with estimator.DNNRegressor and
+  therefore the following should work seamlessly:
+
+  # Instantiate ordinary estimator as usual.
+  estimator = tf.estimator.DNNRegressor(
+    config, feature_columns, hidden_units, ...)
+
+  # Train estimator, export checkpoint.
+  tf.estimator.train_and_evaluate(estimator, ...)
+
+  # Instantiate estimator with annotations with the same configuration as the
+  # ordinary estimator.
+  estimator_with_annotations = (
+    tf.contrib.estimator.DNNRegressorWithLayerAnnotations(
+      config, feature_columns, hidden_units, ...))
+
+  # Call export_savedmodel with the same arguments as the ordinary estimator,
+  # using the checkpoint produced for the ordinary estimator.
+  estimator_with_annotations.export_saved_model(
+    export_dir_base, serving_input_receiver, ...
+    checkpoint_path='/path/to/ordinary/estimator/checkpoint/model.ckpt-1234')
+
+  Args:
+    hidden_units: Iterable of number hidden units per layer. All layers are
+      fully connected. Ex. `[64, 32]` means first layer has 64 nodes and second
+      one has 32.
+    feature_columns: An iterable containing all the feature columns used by the
+      model. All items in the set should be instances of classes derived from
+      `_FeatureColumn`.
+    model_dir: Directory to save model parameters, graph and etc. This can also
+      be used to load checkpoints from the directory into a estimator to
+      continue training a previously saved model.
+    label_dimension: Number of regression targets per example. This is the size
+      of the last dimension of the labels and logits `Tensor` objects
+      (typically, these have shape `[batch_size, label_dimension]`).
+    weight_column: A string or a `_NumericColumn` created by
+      `tf.feature_column.numeric_column` defining feature column representing
+      weights. It is used to down weight or boost examples during training. It
+      will be multiplied by the loss of the example. If it is a string, it is
+      used as a key to fetch weight tensor from the `features`. If it is a
+      `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, then
+      weight_column.normalizer_fn is applied on it to get weight tensor.
+    optimizer: An instance of `tf.Optimizer` used to train the model. Defaults
+      to Adagrad optimizer.
+    activation_fn: Activation function applied to each layer. If `None`, will
+      use `tf.nn.relu`.
+    dropout: When not `None`, the probability we will drop out a given
+      coordinate.
+    input_layer_partitioner: Optional. Partitioner for input layer. Defaults to
+      `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
+    config: `RunConfig` object to configure the runtime settings.
+    warm_start_from: A string filepath to a checkpoint to warm-start from, or a
+      `WarmStartSettings` object to fully configure warm-starting.  If the
+      string filepath is provided instead of a `WarmStartSettings`, then all
+      weights are warm-started, and it is assumed that vocabularies and Tensor
+      names are unchanged.
+    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
+      reduce training loss over batch. Defaults to `SUM`.
+
+  Returns:
+    DNNRegressor with layer annotations.
+  """
+
+  original = dnn.DNNRegressor(
+      hidden_units=hidden_units,
+      feature_columns=feature_columns,
+      model_dir=model_dir,
+      label_dimension=label_dimension,
+      weight_column=weight_column,
+      optimizer=optimizer,
+      activation_fn=activation_fn,
+      dropout=dropout,
+      input_layer_partitioner=input_layer_partitioner,
+      config=config,
+      warm_start_from=warm_start_from,
+      loss_reduction=loss_reduction,
+  )
+
+  def _model_fn(features, labels, mode, config):
+    with _monkey_patch(
+        feature_column_lib, 'input_layer',
+        make_input_layer_with_layer_annotations(feature_column_lib.input_layer,
+                                                mode)):
+      return original.model_fn(features, labels, mode, config)
+
+  return estimator.Estimator(
+      model_fn=_model_fn,
+      model_dir=model_dir,
+      config=config,
+      warm_start_from=warm_start_from)