Add a custom estimator example to the regression cookbook.

PiperOrigin-RevId: 171375399
author: Mark Daoust <markdaoust@google.com> 2017-10-06 18:34:17 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2017-10-06 18:42:34 -0700
commit: 394e5601c13da603237063d436d87867727ecf68 (patch)
tree: db3cf0499dd5fb9c4b21accf9da60dadde394f1b /tensorflow/examples/get_started
parent: 5a107a9a278e98f2fcb77c8ac6c224d40c06e8c2 (diff)
4 files changed, 174 insertions, 3 deletions
diff --git a/tensorflow/examples/get_started/regression/BUILD b/tensorflow/examples/get_started/regression/BUILD
index 334c8096c1..577b970c90 100644
--- a/tensorflow/examples/get_started/regression/BUILD
+++ b/tensorflow/examples/get_started/regression/BUILD
@@ -18,6 +18,7 @@ py_test(
     name = "test",
     size = "medium",
     srcs = [
+        "custom_regression.py",
         "dnn_regression.py",
         "imports85.py",
         "linear_regression.py",
diff --git a/tensorflow/examples/get_started/regression/custom_regression.py b/tensorflow/examples/get_started/regression/custom_regression.py
new file mode 100644
index 0000000000..2e34362c5c
--- /dev/null
+++ b/tensorflow/examples/get_started/regression/custom_regression.py
@@ -0,0 +1,163 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Regression using the DNNRegressor Estimator."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+import imports85  # pylint: disable=g-bad-import-order
+
+STEPS = 1000
+PRICE_NORM_FACTOR = 1000
+
+
+def my_dnn_regression_fn(features, labels, mode, params):
+  """A model function implementing DNN regression for a custom Estimator."""
+
+  # Extract the input into a dense layer, according to the feature_columns.
+  top = tf.feature_column.input_layer(features, params["feature_columns"])
+
+  # Iterate over the "hidden_units" list of layer sizes, default is [20].
+  for units in params.get("hidden_units", [20]):
+    # Add a hidden layer, densely connected on top of the previous layer.
+    top = tf.layers.dense(inputs=top, units=units, activation=tf.nn.relu)
+
+  # Connect a linear output layer on top.
+  output_layer = tf.layers.dense(inputs=top, units=1)
+
+  # Reshape the output layer to a 1-dim Tensor to return predictions
+  predictions = tf.squeeze(output_layer, 1)
+
+  if mode == tf.estimator.ModeKeys.PREDICT:
+    # In `PREDICT` mode we only need to return predictions.
+    return tf.estimator.EstimatorSpec(
+        mode=mode, predictions={"price": predictions})
+
+  # Calculate loss using mean squared error
+  average_loss = tf.losses.mean_squared_error(labels, predictions)
+
+  # Pre-made estimators use the total_loss instead of the average,
+  # so report total_loss for compatibility.
+  batch_size = tf.shape(labels)[0]
+  total_loss = tf.to_float(batch_size) * average_loss
+
+  if mode == tf.estimator.ModeKeys.TRAIN:
+    optimizer = params.get("optimizer", tf.train.AdamOptimizer)
+    optimizer = optimizer(params.get("learning_rate", None))
+    train_op = optimizer.minimize(
+        loss=average_loss, global_step=tf.train.get_global_step())
+
+    return tf.estimator.EstimatorSpec(
+        mode=mode, loss=total_loss, train_op=train_op)
+
+  # In evaluation mode we will calculate evaluation metrics.
+  assert mode == tf.estimator.ModeKeys.EVAL
+
+  # Calculate root mean squared error
+  rmse = tf.metrics.root_mean_squared_error(labels, predictions)
+
+  # Add the rmse to the collection of evaluation metrics.
+  eval_metrics = {"rmse": rmse}
+
+  return tf.estimator.EstimatorSpec(
+      mode=mode,
+      # Report sum of error for compatibility with pre-made estimators
+      loss=total_loss,
+      eval_metric_ops=eval_metrics)
+
+
+def main(argv):
+  """Builds, trains, and evaluates the model."""
+  assert len(argv) == 1
+  (train, test) = imports85.dataset()
+
+  # Switch the labels to units of thousands for better convergence.
+  def normalize_price(features, labels):
+    return features, labels / PRICE_NORM_FACTOR
+
+  train = train.map(normalize_price)
+  test = test.map(normalize_price)
+
+  # Build the training input_fn.
+  def input_train():
+    return (
+        # Shuffling with a buffer larger than the data set ensures
+        # that the examples are well mixed.
+        train.shuffle(1000).batch(128)
+        # Repeat forever
+        .repeat().make_one_shot_iterator().get_next())
+
+  # Build the validation input_fn.
+  def input_test():
+    return (test.shuffle(1000).batch(128)
+            .make_one_shot_iterator().get_next())
+
+  # The first way assigns a unique weight to each category. To do this you must
+  # specify the category's vocabulary (values outside this specification will
+  # receive a weight of zero). Here we specify the vocabulary using a list of
+  # options. The vocabulary can also be specified with a vocabulary file (using
+  # `categorical_column_with_vocabulary_file`). For features covering a
+  # range of positive integers use `categorical_column_with_identity`.
+  body_style_vocab = ["hardtop", "wagon", "sedan", "hatchback", "convertible"]
+  body_style = tf.feature_column.categorical_column_with_vocabulary_list(
+      key="body-style", vocabulary_list=body_style_vocab)
+  make = tf.feature_column.categorical_column_with_hash_bucket(
+      key="make", hash_bucket_size=50)
+
+  feature_columns = [
+      tf.feature_column.numeric_column(key="curb-weight"),
+      tf.feature_column.numeric_column(key="highway-mpg"),
+      # Since this is a DNN model, convert categorical columns from sparse
+      # to dense.
+      # Wrap them in an `indicator_column` to create a
+      # one-hot vector from the input.
+      tf.feature_column.indicator_column(body_style),
+      # Or use an `embedding_column` to create a trainable vector for each
+      # index.
+      tf.feature_column.embedding_column(make, dimension=3),
+  ]
+
+  # Build a custom Estimator, using the model_fn.
+  # `params` is passed through to the `model_fn`.
+  model = tf.estimator.Estimator(
+      model_fn=my_dnn_regression_fn,
+      params={
+          "feature_columns": feature_columns,
+          "learning_rate": 0.001,
+          "optimizer": tf.train.AdamOptimizer,
+          "hidden_units": [20, 20]
+      })
+
+  # Train the model.
+  model.train(input_fn=input_train, steps=STEPS)
+
+  # Evaluate how the model performs on data it has not yet seen.
+  eval_result = model.evaluate(input_fn=input_test)
+
+  # Print the Root Mean Square Error (RMSE).
+  print("\n" + 80 * "*")
+  print("\nRMS error for the test set: ${:.0f}"
+        .format(PRICE_NORM_FACTOR * eval_result["rmse"]))
+
+  print()
+
+
+if __name__ == "__main__":
+  # The Estimator periodically generates "INFO" logs; make these logs visible.
+  tf.logging.set_verbosity(tf.logging.INFO)
+  tf.app.run(main=main)
diff --git a/tensorflow/examples/get_started/regression/imports85.py b/tensorflow/examples/get_started/regression/imports85.py
index c165f0175d..96a464920a 100644
--- a/tensorflow/examples/get_started/regression/imports85.py
+++ b/tensorflow/examples/get_started/regression/imports85.py
@@ -140,10 +140,10 @@ def dataset(y_name="price", train_fraction=0.7):
   train = (base_dataset
            # Take only the training-set lines.
            .filter(in_training_set)
-           # Cache data so you only read the file once.
-           .cache()
            # Decode each line into a (features_dict, label) pair.
-           .map(decode_line))
+           .map(decode_line)
+           # Cache data so you only decode the file once.
+           .cache())
 
   # Do the same for the test-set.
   test = (base_dataset.filter(in_test_set).cache().map(decode_line))
diff --git a/tensorflow/examples/get_started/regression/test.py b/tensorflow/examples/get_started/regression/test.py
index fa06dde9ae..652b44f543 100644
--- a/tensorflow/examples/get_started/regression/test.py
+++ b/tensorflow/examples/get_started/regression/test.py
@@ -34,6 +34,7 @@ import tensorflow.contrib.data as data
 import tensorflow.examples.get_started.regression.dnn_regression as dnn_regression
 import tensorflow.examples.get_started.regression.linear_regression as linear_regression
 import tensorflow.examples.get_started.regression.linear_regression_categorical as linear_regression_categorical
+import tensorflow.examples.get_started.regression.custom_regression as custom_regression
 
 from tensorflow.python.platform import googletest
 from tensorflow.python.platform import test
@@ -86,6 +87,12 @@ class RegressionTest(googletest.TestCase):
   def test_dnn_regression(self):
     dnn_regression.main([""])
 
+  @test.mock.patch.dict(data.__dict__, {"TextLineDataset": four_lines_dataset})
+  @test.mock.patch.dict(imports85.__dict__, {"_get_imports85": (lambda: None)})
+  @test.mock.patch.dict(custom_regression.__dict__, {"STEPS": 1})
+  def test_custom_regression(self):
+    custom_regression.main([""])
+
 
 if __name__ == "__main__":
   googletest.main()
author	Mark Daoust <markdaoust@google.com>	2017-10-06 18:34:17 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2017-10-06 18:42:34 -0700
commit	394e5601c13da603237063d436d87867727ecf68 (patch)
tree	db3cf0499dd5fb9c4b21accf9da60dadde394f1b /tensorflow/examples/get_started
parent	5a107a9a278e98f2fcb77c8ac6c224d40c06e8c2 (diff)