From b0f2eee339a041de4e7837b68a9ff4fc77ca7c4a Mon Sep 17 00:00:00 2001
From: Billy Lamberta <blamb@google.com>
Date: Fri, 22 Jun 2018 17:40:27 -0700
Subject: Rename programmers_guide/ directory to guide/. Update references in
 source files and docs in tensorflow and related projects.

PiperOrigin-RevId: 201766994
---
 README.md                                          |   2 +-
 RELEASE.md                                         |   6 +-
 tensorflow/contrib/autograph/README.md             |   2 +-
 tensorflow/contrib/data/__init__.py                |   2 +-
 tensorflow/contrib/eager/README.md                 |   2 +-
 .../python/examples/notebooks/3_datasets.ipynb     |   6 +-
 tensorflow/contrib/eager/python/g3doc/guide.md     |   4 +-
 tensorflow/contrib/lite/toco/README.md             |   2 +-
 tensorflow/contrib/tpu/python/tpu/tpu_estimator.py |   2 +-
 tensorflow/core/protobuf/config.proto              |   6 +-
 tensorflow/docs_src/api_guides/python/client.md    |   2 +-
 .../docs_src/api_guides/python/input_dataset.md    |   3 +-
 .../docs_src/api_guides/python/reading_data.md     |   8 +-
 tensorflow/docs_src/deploy/distributed.md          |   2 +-
 tensorflow/docs_src/extend/architecture.md         |   5 +-
 tensorflow/docs_src/get_started/_index.yaml        |  12 +-
 tensorflow/docs_src/get_started/next_steps.md      |   4 +-
 tensorflow/docs_src/guide/checkpoints.md           | 238 +++++
 tensorflow/docs_src/guide/custom_estimators.md     | 602 +++++++++++++
 tensorflow/docs_src/guide/datasets.md              | 823 +++++++++++++++++
 .../docs_src/guide/datasets_for_estimators.md      | 387 ++++++++
 tensorflow/docs_src/guide/debugger.md              | 804 +++++++++++++++++
 tensorflow/docs_src/guide/eager.md                 | 849 +++++++++++++++++
 tensorflow/docs_src/guide/embedding.md             | 262 ++++++
 tensorflow/docs_src/guide/estimators.md            | 193 ++++
 tensorflow/docs_src/guide/faq.md                   | 297 ++++++
 tensorflow/docs_src/guide/feature_columns.md       | 572 ++++++++++++
 tensorflow/docs_src/guide/graph_viz.md             | 316 +++++++
 tensorflow/docs_src/guide/graphs.md                | 558 ++++++++++++
 tensorflow/docs_src/guide/index.md                 |  86 ++
 tensorflow/docs_src/guide/keras.md                 | 623 +++++++++++++
 tensorflow/docs_src/guide/leftnav_files            |  40 +
 tensorflow/docs_src/guide/low_level_intro.md       | 604 +++++++++++++
 tensorflow/docs_src/guide/premade_estimators.md    | 430 +++++++++
 tensorflow/docs_src/guide/saved_model.md           | 999 +++++++++++++++++++++
 .../docs_src/guide/summaries_and_tensorboard.md    | 225 +++++
 .../docs_src/guide/tensorboard_histograms.md       | 245 +++++
 tensorflow/docs_src/guide/tensors.md               | 330 +++++++
 tensorflow/docs_src/guide/using_gpu.md             | 215 +++++
 tensorflow/docs_src/guide/using_tpu.md             | 395 ++++++++
 tensorflow/docs_src/guide/variables.md             | 319 +++++++
 tensorflow/docs_src/guide/version_compat.md        | 319 +++++++
 tensorflow/docs_src/install/install_go.md          |   2 +-
 tensorflow/docs_src/install/install_java.md        |   2 +-
 .../docs_src/programmers_guide/checkpoints.md      | 240 -----
 .../programmers_guide/custom_estimators.md         | 602 -------------
 tensorflow/docs_src/programmers_guide/datasets.md  | 823 -----------------
 .../programmers_guide/datasets_for_estimators.md   | 387 --------
 tensorflow/docs_src/programmers_guide/debugger.md  | 804 -----------------
 tensorflow/docs_src/programmers_guide/eager.md     | 849 -----------------
 tensorflow/docs_src/programmers_guide/embedding.md | 262 ------
 .../docs_src/programmers_guide/estimators.md       | 193 ----
 tensorflow/docs_src/programmers_guide/faq.md       | 297 ------
 .../docs_src/programmers_guide/feature_columns.md  | 572 ------------
 tensorflow/docs_src/programmers_guide/graph_viz.md | 316 -------
 tensorflow/docs_src/programmers_guide/graphs.md    | 558 ------------
 tensorflow/docs_src/programmers_guide/index.md     |  86 --
 tensorflow/docs_src/programmers_guide/keras.md     | 623 -------------
 .../docs_src/programmers_guide/leftnav_files       |  40 -
 .../docs_src/programmers_guide/low_level_intro.md  | 604 -------------
 .../programmers_guide/premade_estimators.md        | 430 ---------
 .../docs_src/programmers_guide/saved_model.md      | 999 ---------------------
 .../programmers_guide/summaries_and_tensorboard.md | 225 -----
 .../programmers_guide/tensorboard_histograms.md    | 245 -----
 tensorflow/docs_src/programmers_guide/tensors.md   | 330 -------
 tensorflow/docs_src/programmers_guide/using_gpu.md | 215 -----
 tensorflow/docs_src/programmers_guide/using_tpu.md | 395 --------
 tensorflow/docs_src/programmers_guide/variables.md | 319 -------
 .../docs_src/programmers_guide/version_compat.md   | 319 -------
 tensorflow/docs_src/tutorials/deep_cnn.md          |   2 +-
 tensorflow/docs_src/tutorials/layers.md            |   2 +-
 .../how_tos/reading_data/fully_connected_reader.py |   2 +-
 tensorflow/java/README.md                          |   5 +-
 .../src/main/java/org/tensorflow/package-info.java |   2 +-
 tensorflow/python/data/__init__.py                 |   2 +-
 tensorflow/python/data/ops/dataset_ops.py          |  14 +
 tensorflow/python/debug/BUILD                      |   2 +-
 tensorflow/python/debug/README.md                  |   4 +-
 tensorflow/python/debug/examples/README.md         |   4 +-
 tensorflow/python/estimator/keras.py               |   2 +-
 tensorflow/python/ops/script_ops.py                |   2 +-
 tensorflow/python/tools/saved_model_cli.py         |   4 +-
 third_party/examples/eager/spinn/README.md         |   2 +-
 83 files changed, 10798 insertions(+), 10789 deletions(-)
 create mode 100644 tensorflow/docs_src/guide/checkpoints.md
 create mode 100644 tensorflow/docs_src/guide/custom_estimators.md
 create mode 100644 tensorflow/docs_src/guide/datasets.md
 create mode 100644 tensorflow/docs_src/guide/datasets_for_estimators.md
 create mode 100644 tensorflow/docs_src/guide/debugger.md
 create mode 100644 tensorflow/docs_src/guide/eager.md
 create mode 100644 tensorflow/docs_src/guide/embedding.md
 create mode 100644 tensorflow/docs_src/guide/estimators.md
 create mode 100644 tensorflow/docs_src/guide/faq.md
 create mode 100644 tensorflow/docs_src/guide/feature_columns.md
 create mode 100644 tensorflow/docs_src/guide/graph_viz.md
 create mode 100644 tensorflow/docs_src/guide/graphs.md
 create mode 100644 tensorflow/docs_src/guide/index.md
 create mode 100644 tensorflow/docs_src/guide/keras.md
 create mode 100644 tensorflow/docs_src/guide/leftnav_files
 create mode 100644 tensorflow/docs_src/guide/low_level_intro.md
 create mode 100644 tensorflow/docs_src/guide/premade_estimators.md
 create mode 100644 tensorflow/docs_src/guide/saved_model.md
 create mode 100644 tensorflow/docs_src/guide/summaries_and_tensorboard.md
 create mode 100644 tensorflow/docs_src/guide/tensorboard_histograms.md
 create mode 100644 tensorflow/docs_src/guide/tensors.md
 create mode 100644 tensorflow/docs_src/guide/using_gpu.md
 create mode 100644 tensorflow/docs_src/guide/using_tpu.md
 create mode 100644 tensorflow/docs_src/guide/variables.md
 create mode 100644 tensorflow/docs_src/guide/version_compat.md
 delete mode 100644 tensorflow/docs_src/programmers_guide/checkpoints.md
 delete mode 100644 tensorflow/docs_src/programmers_guide/custom_estimators.md
 delete mode 100644 tensorflow/docs_src/programmers_guide/datasets.md
 delete mode 100644 tensorflow/docs_src/programmers_guide/datasets_for_estimators.md
 delete mode 100644 tensorflow/docs_src/programmers_guide/debugger.md
 delete mode 100644 tensorflow/docs_src/programmers_guide/eager.md
 delete mode 100644 tensorflow/docs_src/programmers_guide/embedding.md
 delete mode 100644 tensorflow/docs_src/programmers_guide/estimators.md
 delete mode 100644 tensorflow/docs_src/programmers_guide/faq.md
 delete mode 100644 tensorflow/docs_src/programmers_guide/feature_columns.md
 delete mode 100644 tensorflow/docs_src/programmers_guide/graph_viz.md
 delete mode 100644 tensorflow/docs_src/programmers_guide/graphs.md
 delete mode 100644 tensorflow/docs_src/programmers_guide/index.md
 delete mode 100644 tensorflow/docs_src/programmers_guide/keras.md
 delete mode 100644 tensorflow/docs_src/programmers_guide/leftnav_files
 delete mode 100644 tensorflow/docs_src/programmers_guide/low_level_intro.md
 delete mode 100644 tensorflow/docs_src/programmers_guide/premade_estimators.md
 delete mode 100644 tensorflow/docs_src/programmers_guide/saved_model.md
 delete mode 100644 tensorflow/docs_src/programmers_guide/summaries_and_tensorboard.md
 delete mode 100644 tensorflow/docs_src/programmers_guide/tensorboard_histograms.md
 delete mode 100644 tensorflow/docs_src/programmers_guide/tensors.md
 delete mode 100644 tensorflow/docs_src/programmers_guide/using_gpu.md
 delete mode 100644 tensorflow/docs_src/programmers_guide/using_tpu.md
 delete mode 100644 tensorflow/docs_src/programmers_guide/variables.md
 delete mode 100644 tensorflow/docs_src/programmers_guide/version_compat.md

diff --git a/README.md b/README.md
index 6fb4486d0d..4e4d139bd1 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,7 @@ data flow graphs.  The graph nodes represent mathematical operations, while
 the graph edges represent the multidimensional data arrays (tensors) that flow
 between them.  This flexible architecture enables you to deploy computation to one
 or more CPUs or GPUs in a desktop, server, or mobile device without rewriting
-code.  TensorFlow also includes [TensorBoard](https://www.tensorflow.org/programmers_guide/summaries_and_tensorboard), a data visualization toolkit.
+code.  TensorFlow also includes [TensorBoard](https://www.tensorflow.org/guide/summaries_and_tensorboard), a data visualization toolkit.
 
 TensorFlow was originally developed by researchers and engineers
 working on the Google Brain team within Google's Machine Intelligence Research
diff --git a/RELEASE.md b/RELEASE.md
index 510eca5467..5fec61af7e 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -467,7 +467,7 @@ answered questions, and were part of inspiring discussions.
 
 ## Major Features And Improvements
 * `tf.keras` is now part of the core TensorFlow API.
-* [`tf.data`](http://tensorflow.org/programmers_guide/datasets) is now part of
+* [`tf.data`](http://tensorflow.org/guide/datasets) is now part of
   the core TensorFlow API.
   * The API is now subject to backwards compatibility guarantees.
 
@@ -495,7 +495,7 @@ answered questions, and were part of inspiring discussions.
 * TensorFlow Debugger (tfdbg):
   * Add `eval` command to allow evaluation of arbitrary Python/numpy expressions
     in tfdbg command-line interface. See
-    [Debugging TensorFlow Programs](https://www.tensorflow.org/programmers_guide/debugger)
+    [Debugging TensorFlow Programs](https://www.tensorflow.org/guide/debugger)
     for more details.
   * Usability improvement: The frequently used tensor filter `has_inf_or_nan` is
     now added to `Session` wrappers and hooks by default. So there is no need
@@ -782,7 +782,7 @@ answered questions, and were part of inspiring discussions.
 * Support client-provided ClusterSpec's and propagate them to all workers to enable the creation of dynamic TensorFlow clusters.
 * TensorFlow C library now available for Windows.
 * We released a new open-source version of TensorBoard.
-* [`SavedModel CLI`](https://www.tensorflow.org/versions/master/programmers_guide/saved_model_cli) tool available to inspect and execute MetaGraph in SavedModel
+* [`SavedModel CLI`](https://www.tensorflow.org/versions/master/guide/saved_model_cli) tool available to inspect and execute MetaGraph in SavedModel
 * Android releases of TensorFlow are now pushed to jcenter for easier
   integration into apps. See
   https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/android/README.md
diff --git a/tensorflow/contrib/autograph/README.md b/tensorflow/contrib/autograph/README.md
index 674859bed4..47b1d4a99a 100644
--- a/tensorflow/contrib/autograph/README.md
+++ b/tensorflow/contrib/autograph/README.md
@@ -4,7 +4,7 @@ IMPORTANT: AutoGraph is alpha software, and under active development. Expect rou
 
 AutoGraph is a Python to TensorFlow compiler.
 
-With AutoGraph, you can write [Eager style](https://www.tensorflow.org/programmers_guide/eager) code in a concise manner, and run it as a TensorFlow graph. AutoGraph uses source code transformation and partial evaluation to generate Python code that builds an equivalent TensorFlow subgraph. The result is code that behaves like ops and can be freely combined with other TensorFlow ops.
+With AutoGraph, you can write [Eager style](https://www.tensorflow.org/guide/eager) code in a concise manner, and run it as a TensorFlow graph. AutoGraph uses source code transformation and partial evaluation to generate Python code that builds an equivalent TensorFlow subgraph. The result is code that behaves like ops and can be freely combined with other TensorFlow ops.
 
 For example, this Python function:
 
diff --git a/tensorflow/contrib/data/__init__.py b/tensorflow/contrib/data/__init__.py
index 9c6a13333e..3510e7b1ad 100644
--- a/tensorflow/contrib/data/__init__.py
+++ b/tensorflow/contrib/data/__init__.py
@@ -20,7 +20,7 @@ be used in conjunction with the @{tf.data.Dataset} API. Note that the
 guarantees as `tf.data`, but we will provide deprecation advice in advance of
 removing existing functionality.
 
-See the @{$datasets$Importing Data} Programmer's Guide for an overview.
+See @{$guide/datasets$Importing Data} for an overview.
 
 @@Counter
 @@CheckpointInputPipelineHook
diff --git a/tensorflow/contrib/eager/README.md b/tensorflow/contrib/eager/README.md
index 4384431e7b..86d203452e 100644
--- a/tensorflow/contrib/eager/README.md
+++ b/tensorflow/contrib/eager/README.md
@@ -44,7 +44,7 @@ Installation instructions at https://www.tensorflow.org/install/
 
 For an introduction to eager execution in TensorFlow, see:
 
-- [User Guide](https://www.tensorflow.org/programmers_guide/eager) ([source](../../docs_src/programmers_guide/eager.md))
+- [User Guide](https://www.tensorflow.org/guide/eager) ([source](../../docs_src/guide/eager.md))
 - Notebook: [Basic Usage](python/examples/notebooks/1_basics.ipynb)
 - Notebook: [Gradients](python/examples/notebooks/2_gradients.ipynb)
 - Notebook: [Importing Data](python/examples/notebooks/3_datasets.ipynb)
diff --git a/tensorflow/contrib/eager/python/examples/notebooks/3_datasets.ipynb b/tensorflow/contrib/eager/python/examples/notebooks/3_datasets.ipynb
index bfcc7feb07..d268cbcd91 100644
--- a/tensorflow/contrib/eager/python/examples/notebooks/3_datasets.ipynb
+++ b/tensorflow/contrib/eager/python/examples/notebooks/3_datasets.ipynb
@@ -9,7 +9,7 @@
       "source": [
         "# Eager Execution Tutorial: Importing Data\n",
         "\n",
-        "This notebook demonstrates the use of the [`tf.data.Dataset` API](https://www.tensorflow.org/programmers_guide/datasets) to build pipelines to feed data to your program. It covers:\n",
+        "This notebook demonstrates the use of the [`tf.data.Dataset` API](https://www.tensorflow.org/guide/datasets) to build pipelines to feed data to your program. It covers:\n",
         "\n",
         "* Creating a `Dataset`.\n",
         "* Iteration over a `Dataset` with eager execution enabled.\n",
@@ -18,7 +18,7 @@
         "\n",
         "If you're familiar with TensorFlow graphs, the API for constructing the `Dataset` object remains exactly the same when eager execution is enabled, but the process of iterating over elements of the dataset is slightly simpler.\n",
         "You can use Python iteration over the `tf.data.Dataset` object and do not need to explicitly create an `tf.data.Iterator` object.\n",
-        "As a result, the discussion on iterators in the [Programmer's Guide](https://www.tensorflow.org/programmers_guide/datasets) is not relevant when eager execution is enabled."
+        "As a result, the discussion on iterators in the [TensorFlow Guide](https://www.tensorflow.org/guide/datasets) is not relevant when eager execution is enabled."
       ]
     },
     {
@@ -63,7 +63,7 @@
       "source": [
         "# Step 1: Create a source `Dataset`\n",
         "\n",
-        "Create a _source_ dataset using one of the factory functions like [`Dataset.from_tensors`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensors), [`Dataset.from_tensor_slices`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensor_slices) or using objects that read from files like [`TextLineDataset`](https://www.tensorflow.org/api_docs/python/tf/data/TextLineDataset) or [`TFRecordDataset`](https://www.tensorflow.org/api_docs/python/tf/data/TFRecordDataset). See the [Programmer's Guide](https://www.google.com/url?sa=D\u0026q=https%3A%2F%2Fwww.tensorflow.org%2Fprogrammers_guide%2Fdatasets%23reading_input_data) for more information."
+        "Create a _source_ dataset using one of the factory functions like [`Dataset.from_tensors`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensors), [`Dataset.from_tensor_slices`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensor_slices) or using objects that read from files like [`TextLineDataset`](https://www.tensorflow.org/api_docs/python/tf/data/TextLineDataset) or [`TFRecordDataset`](https://www.tensorflow.org/api_docs/python/tf/data/TFRecordDataset). See the [TensorFlow Guide](https://www.tensorflow.org/guide/datasets#reading_input_data) for more information."
       ]
     },
     {
diff --git a/tensorflow/contrib/eager/python/g3doc/guide.md b/tensorflow/contrib/eager/python/g3doc/guide.md
index 2d2aba6908..23f33d0230 100644
--- a/tensorflow/contrib/eager/python/g3doc/guide.md
+++ b/tensorflow/contrib/eager/python/g3doc/guide.md
@@ -4,8 +4,8 @@ Eager execution is a feature that makes TensorFlow execute operations
 immediately: concrete values are returned, instead of creating a computational
 graph that is executed later.
 
-A user guide is available: https://www.tensorflow.org/programmers_guide/eager
-([source file](../../../../docs_src/programmers_guide/eager.md))
+A user guide is available: https://www.tensorflow.org/guide/eager
+([source file](../../../../docs_src/guide/eager.md))
 
 We welcome feedback through [GitHub issues](https://github.com/tensorflow/tensorflow/labels/comp:eager).
 
diff --git a/tensorflow/contrib/lite/toco/README.md b/tensorflow/contrib/lite/toco/README.md
index 522e260ad2..ee83c7a6e3 100644
--- a/tensorflow/contrib/lite/toco/README.md
+++ b/tensorflow/contrib/lite/toco/README.md
@@ -17,7 +17,7 @@ Usage information is given in these documents:
 Once an application developer has a trained TensorFlow model, TOCO will accept
 that model and generate a TensorFlow Lite
 [FlatBuffer](https://google.github.io/flatbuffers/) file. TOCO currently supports
-[SavedModels](https://www.tensorflow.org/programmers_guide/saved_model#using_savedmodel_with_estimators)
+[SavedModels](https://www.tensorflow.org/guide/saved_model#using_savedmodel_with_estimators)
 and frozen graphs (models generated via
 [freeze_graph.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/freeze_graph.py)).
 The TensorFlow Lite FlatBuffer file can be shipped to client devices, generally
diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
index 7c770912b4..c57acd0a2d 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
@@ -1103,7 +1103,7 @@ class _InputPipeline(object):
       err_msg = ('Input pipeline contains one or more QueueRunners. '
                  'It could be slow and not scalable. Please consider '
                  'converting your input pipeline to use `tf.data` instead (see '
-                 'https://www.tensorflow.org/programmers_guide/datasets for '
+                 'https://www.tensorflow.org/guide/datasets for '
                  'instructions.')
       if _WRAP_INPUT_FN_INTO_WHILE_LOOP:
         raise RuntimeError(err_msg)
diff --git a/tensorflow/core/protobuf/config.proto b/tensorflow/core/protobuf/config.proto
index 9a48f43a63..d83215d5c2 100644
--- a/tensorflow/core/protobuf/config.proto
+++ b/tensorflow/core/protobuf/config.proto
@@ -147,7 +147,7 @@ message GPUOptions {
 
   // Everything inside experimental is subject to change and is not subject
   // to API stability guarantees in
-  // https://www.tensorflow.org/programmers_guide/version_compat.
+  // https://www.tensorflow.org/guide/version_compat.
   Experimental experimental = 9;
 };
 
@@ -381,7 +381,7 @@ message ConfigProto {
 
   // Everything inside Experimental is subject to change and is not subject
   // to API stability guarantees in
-  // https://www.tensorflow.org/programmers_guide/version_compat.
+  // https://www.tensorflow.org/guide/version_compat.
   message Experimental {
     // Task name for group resolution.
     string collective_group_leader = 1;
@@ -426,7 +426,7 @@ message RunOptions {
 
   // Everything inside Experimental is subject to change and is not subject
   // to API stability guarantees in
-  // https://www.tensorflow.org/programmers_guide/version_compat.
+  // https://www.tensorflow.org/guide/version_compat.
   message Experimental {
     // If non-zero, declares that this graph is going to use collective
     // ops and must synchronize step_ids with any other graph with this
diff --git a/tensorflow/docs_src/api_guides/python/client.md b/tensorflow/docs_src/api_guides/python/client.md
index eef23696db..27fc8610bf 100644
--- a/tensorflow/docs_src/api_guides/python/client.md
+++ b/tensorflow/docs_src/api_guides/python/client.md
@@ -3,7 +3,7 @@
 
 This library contains classes for launching graphs and executing operations.
 
-@{$programmers_guide/low_level_intro$This guide} has examples of how a graph
+@{$guide/low_level_intro$This guide} has examples of how a graph
 is launched in a @{tf.Session}.
 
 ## Session management
diff --git a/tensorflow/docs_src/api_guides/python/input_dataset.md b/tensorflow/docs_src/api_guides/python/input_dataset.md
index a6e2fc48e0..a6612d1bf7 100644
--- a/tensorflow/docs_src/api_guides/python/input_dataset.md
+++ b/tensorflow/docs_src/api_guides/python/input_dataset.md
@@ -2,8 +2,7 @@
 [TOC]
 
 @{tf.data.Dataset} allows you to build complex input pipelines. See the
-@{$datasets$programmer's guide} for an in-depth explanation of how to use this
-API.
+@{$guide/datasets} for an in-depth explanation of how to use this API.
 
 ## Reader classes
 
diff --git a/tensorflow/docs_src/api_guides/python/reading_data.md b/tensorflow/docs_src/api_guides/python/reading_data.md
index 5bbbfd3216..d7d0904ae2 100644
--- a/tensorflow/docs_src/api_guides/python/reading_data.md
+++ b/tensorflow/docs_src/api_guides/python/reading_data.md
@@ -16,8 +16,8 @@ There are four methods of getting data into a TensorFlow program:
 
 ## `tf.data` API
 
-See the @{$datasets$programmer's guide} for an in-depth explanation of
-@{tf.data.Dataset}. The `tf.data` API enables you to extract and preprocess data
+See the @{$guide/datasets} for an in-depth explanation of @{tf.data.Dataset}.
+The `tf.data` API enables you to extract and preprocess data
 from different input/file formats, and apply transformations such as batching,
 shuffling, and mapping functions over the dataset. This is an improved version
 of the old input methods---feeding and `QueueRunner`---which are described
@@ -511,8 +511,8 @@ You can have the train and eval in the same graph in the same process, and share
 their trained variables or layers. See @{$variables$the shared variables tutorial}.
 
 To support the single-graph approach
-@{$programmers_guide/datasets$`tf.data`} also supplies
-@{$programmers_guide/datasets#creating_an_iterator$advanced iterator types} that
+@{$guide/datasets$`tf.data`} also supplies
+@{$guide/datasets#creating_an_iterator$advanced iterator types} that
 that allow the user to change the input pipeline without rebuilding the graph or
 session.
 
diff --git a/tensorflow/docs_src/deploy/distributed.md b/tensorflow/docs_src/deploy/distributed.md
index d7ed6b1deb..8e2c818e39 100644
--- a/tensorflow/docs_src/deploy/distributed.md
+++ b/tensorflow/docs_src/deploy/distributed.md
@@ -2,7 +2,7 @@
 
 This document shows how to create a cluster of TensorFlow servers, and how to
 distribute a computation graph across that cluster. We assume that you are
-familiar with the @{$programmers_guide/low_level_intro$basic concepts} of
+familiar with the @{$guide/low_level_intro$basic concepts} of
 writing low level TensorFlow programs.
 
 ## Hello distributed TensorFlow!
diff --git a/tensorflow/docs_src/extend/architecture.md b/tensorflow/docs_src/extend/architecture.md
index c8f522a03a..84435a57f2 100644
--- a/tensorflow/docs_src/extend/architecture.md
+++ b/tensorflow/docs_src/extend/architecture.md
@@ -7,9 +7,8 @@ learning models and system-level optimizations.
 This document describes the system architecture that makes this
 combination of scale and flexibility possible. It assumes that you have basic familiarity
 with TensorFlow programming concepts such as the computation graph, operations,
-and sessions. See @{$programmers_guide/low_level_intro$this document}
-for an introduction to these topics. Some familiarity
-with @{$distributed$distributed TensorFlow}
+and sessions. See @{$guide/low_level_intro$this document} for an introduction to
+these topics. Some familiarity with @{$distributed$distributed TensorFlow}
 will also be helpful.
 
 This document is for developers who want to extend TensorFlow in some way not
diff --git a/tensorflow/docs_src/get_started/_index.yaml b/tensorflow/docs_src/get_started/_index.yaml
index af255a482d..277fc852fb 100644
--- a/tensorflow/docs_src/get_started/_index.yaml
+++ b/tensorflow/docs_src/get_started/_index.yaml
@@ -74,7 +74,7 @@ landing_page:
               The high-level Keras API provides building blocks to create and
               train deep learning models. Start with these beginner-friendly
               notebook examples, then read the
-              <a href="/programmers_guide/keras">TensorFlow Keras guide</a>.
+              <a href="/guide/keras">TensorFlow Keras guide</a>.
             </p>
             <ol style="padding-left:20px;">
               <li><a href="/get_started/basic_classification">Basic classification</a></li>
@@ -85,7 +85,7 @@ landing_page:
             </ol>
           </div>
           <div class="devsite-landing-row-item-buttons" style="margin-top:0;">
-            <a class="button button-primary tfo-button-primary" href="/programmers_guide/keras">Read the Keras guide</a>
+            <a class="button button-primary tfo-button-primary" href="/guide/keras">Read the Keras guide</a>
           </div>
         </div>
     - classname: tfo-landing-row-item-code-block
@@ -123,7 +123,7 @@ landing_page:
           <div class="devsite-landing-row-item-description-content">
             <p>
               Eager execution provides an imperative, define-by-run interface for advanced operations. Write custom layers, forward passes, and training loops with auto‑differentiation. Start with
-              these notebooks, then read the <a href="/programmers_guide/eager">eager execution guide</a>.
+              these notebooks, then read the <a href="/guide/eager">eager execution guide</a>.
             </p>
             <ol style="padding-left:20px;">
               <li>
@@ -165,7 +165,7 @@ landing_page:
             </ol>
           </div>
           <div class="devsite-landing-row-item-buttons">
-            <a class="button button-primary tfo-button-primary" href="/programmers_guide/eager">Read the eager execution guide</a>
+            <a class="button button-primary tfo-button-primary" href="/guide/eager">Read the eager execution guide</a>
           </div>
         </div>
     - custom_html: >
@@ -177,7 +177,7 @@ landing_page:
             <p>
               Estimators can train large models on multiple machines in a
               production environment. Try the examples below and read the
-              <a href="/programmers_guide/estimators">Estimators guide</a>.
+              <a href="/guide/estimators">Estimators guide</a>.
             </p>
             <ol style="padding-left: 20px;">
               <li><a href="/tutorials/text_classification_with_tf_hub">How to build a simple text classifier with TF-Hub</a></li>
@@ -186,7 +186,7 @@ landing_page:
             </ol>
           </div>
           <div class="devsite-landing-row-item-buttons">
-            <a class="button button-primary tfo-button-primary" href="/programmers_guide/estimators">Read the Estimators guide</a>
+            <a class="button button-primary tfo-button-primary" href="/guide/estimators">Read the Estimators guide</a>
           </div>
         </div>
 
diff --git a/tensorflow/docs_src/get_started/next_steps.md b/tensorflow/docs_src/get_started/next_steps.md
index 79c0ef3346..6318a39c6c 100644
--- a/tensorflow/docs_src/get_started/next_steps.md
+++ b/tensorflow/docs_src/get_started/next_steps.md
@@ -2,9 +2,9 @@
 
 ## Learn more about TensorFlow
 
-* The [TensorFlow Guide](/programmers_guide) includes usage guides for the
+* The [TensorFlow Guide](/guide) includes usage guides for the
   high-level APIs, as well as advanced TensorFlow operations.
-* [Premade Estimators](/programmers_guide/premade_estimators) are designed to
+* [Premade Estimators](/guide/premade_estimators) are designed to
   get results out of the box. Use TensorFlow without building your own models.
 * [TensorFlow.js](https://js.tensorflow.org/) allows web developers to train and
   deploy ML models in the browser and using Node.js.
diff --git a/tensorflow/docs_src/guide/checkpoints.md b/tensorflow/docs_src/guide/checkpoints.md
new file mode 100644
index 0000000000..dfb2626b86
--- /dev/null
+++ b/tensorflow/docs_src/guide/checkpoints.md
@@ -0,0 +1,238 @@
+# Checkpoints
+
+This document examines how to save and restore TensorFlow models built with
+Estimators. TensorFlow provides two model formats:
+
+*   checkpoints, which is a format dependent on the code that created
+    the model.
+*   SavedModel, which is a format independent of the code that created
+    the model.
+
+This document focuses on checkpoints. For details on `SavedModel`, see the
+@{$saved_model$Saving and Restoring} guide.
+
+
+## Sample code
+
+This document relies on the same
+[Iris classification example](https://github.com/tensorflow/models/blob/master/samples/core/get_started/premade_estimator.py) detailed in @{$premade_estimators$Getting Started with TensorFlow}.
+To download and access the example, invoke the following two commands:
+
+```shell
+git clone https://github.com/tensorflow/models/
+cd models/samples/core/get_started
+```
+
+Most of the code snippets in this document are minor variations
+on `premade_estimator.py`.
+
+
+## Saving partially-trained models
+
+Estimators automatically write the following to disk:
+
+*   **checkpoints**, which are versions of the model created during training.
+*   **event files**, which contain information that
+    [TensorBoard](https://developers.google.com/machine-learning/glossary/#TensorBoard)
+    uses to create visualizations.
+
+To specify the top-level directory in which the Estimator stores its
+information, assign a value to the optional `model_dir` argument of *any*
+`Estimator`'s constructor.
+Taking `DNNClassifier` as an example,
+the following code sets the `model_dir`
+argument to the `models/iris` directory:
+
+```python
+classifier = tf.estimator.DNNClassifier(
+    feature_columns=my_feature_columns,
+    hidden_units=[10, 10],
+    n_classes=3,
+    model_dir='models/iris')
+```
+
+Suppose you call the Estimator's `train` method. For example:
+
+
+```python
+classifier.train(
+        input_fn=lambda:train_input_fn(train_x, train_y, batch_size=100),
+                steps=200)
+```
+
+As suggested by the following diagrams, the first call to `train`
+adds checkpoints and other files to the `model_dir` directory:
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/first_train_calls.png">
+</div>
+<div style="text-align: center">
+The first call to train().
+</div>
+
+
+To see the objects in the created `model_dir` directory on a
+UNIX-based system, just call `ls` as follows:
+
+```none
+$ ls -1 models/iris
+checkpoint
+events.out.tfevents.timestamp.hostname
+graph.pbtxt
+model.ckpt-1.data-00000-of-00001
+model.ckpt-1.index
+model.ckpt-1.meta
+model.ckpt-200.data-00000-of-00001
+model.ckpt-200.index
+model.ckpt-200.meta
+```
+
+The preceding `ls` command shows that the Estimator created checkpoints
+at steps 1 (the start of training) and 200 (the end of training).
+
+
+### Default checkpoint directory
+
+If you don't specify `model_dir` in an Estimator's constructor, the Estimator
+writes checkpoint files to a temporary directory chosen by Python's
+[tempfile.mkdtemp](https://docs.python.org/3/library/tempfile.html#tempfile.mkdtemp)
+function. For example, the following Estimator constructor does *not* specify
+the `model_dir` argument:
+
+```python
+classifier = tf.estimator.DNNClassifier(
+    feature_columns=my_feature_columns,
+    hidden_units=[10, 10],
+    n_classes=3)
+
+print(classifier.model_dir)
+```
+
+The `tempfile.mkdtemp` function picks a secure, temporary directory
+appropriate for your operating system. For example, a typical temporary
+directory on macOS might be something like the following:
+
+```None
+/var/folders/0s/5q9kfzfj3gx2knj0vj8p68yc00dhcr/T/tmpYm1Rwa
+```
+
+### Checkpointing Frequency
+
+By default, the Estimator saves
+[checkpoints](https://developers.google.com/machine-learning/glossary/#checkpoint)
+in the `model_dir` according to the following schedule:
+
+*   Writes a checkpoint every 10 minutes (600 seconds).
+*   Writes a checkpoint when the `train` method starts (first iteration)
+    and completes (final iteration).
+*   Retains only the 5 most recent checkpoints in the directory.
+
+You may alter the default schedule by taking the following steps:
+
+1.  Create a @{tf.estimator.RunConfig$`RunConfig`} object that defines the
+    desired schedule.
+2.  When instantiating the Estimator, pass that `RunConfig` object to the
+    Estimator's `config` argument.
+
+For example, the following code changes the checkpointing schedule to every
+20 minutes and retains the 10 most recent checkpoints:
+
+```python
+my_checkpointing_config = tf.estimator.RunConfig(
+    save_checkpoints_secs = 20*60,  # Save checkpoints every 20 minutes.
+    keep_checkpoint_max = 10,       # Retain the 10 most recent checkpoints.
+)
+
+classifier = tf.estimator.DNNClassifier(
+    feature_columns=my_feature_columns,
+    hidden_units=[10, 10],
+    n_classes=3,
+    model_dir='models/iris',
+    config=my_checkpointing_config)
+```
+
+## Restoring your model
+
+The first time you call an Estimator's `train` method, TensorFlow saves a
+checkpoint to the `model_dir`. Each subsequent call to the Estimator's
+`train`, `evaluate`, or `predict` method causes the following:
+
+1.  The Estimator builds the model's
+    [graph](https://developers.google.com/machine-learning/glossary/#graph)
+    by running the `model_fn()`.  (For details on the `model_fn()`, see
+    @{$custom_estimators$Creating Custom Estimators.})
+2.  The Estimator initializes the weights of the new model from the data
+    stored in the most recent checkpoint.
+
+In other words, as the following illustration suggests, once checkpoints
+exist, TensorFlow rebuilds the model each time you call `train()`,
+`evaluate()`, or `predict()`.
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/subsequent_calls.png">
+</div>
+<div style="text-align: center">
+Subsequent calls to train(), evaluate(), or predict()
+</div>
+
+
+### Avoiding a bad restoration
+
+Restoring a model's state from a checkpoint only works if the model
+and checkpoint are compatible.  For example, suppose you trained a
+`DNNClassifier` Estimator containing two hidden layers,
+each having 10 nodes:
+
+```python
+classifier = tf.estimator.DNNClassifier(
+    feature_columns=feature_columns,
+    hidden_units=[10, 10],
+    n_classes=3,
+    model_dir='models/iris')
+
+classifier.train(
+    input_fn=lambda:train_input_fn(train_x, train_y, batch_size=100),
+        steps=200)
+```
+
+After training (and, therefore, after creating checkpoints in `models/iris`),
+imagine that you changed the number of neurons in each hidden layer from 10 to
+20 and then attempted to retrain the model:
+
+``` python
+classifier2 = tf.estimator.DNNClassifier(
+    feature_columns=my_feature_columns,
+    hidden_units=[20, 20],  # Change the number of neurons in the model.
+    n_classes=3,
+    model_dir='models/iris')
+
+classifier.train(
+    input_fn=lambda:train_input_fn(train_x, train_y, batch_size=100),
+        steps=200)
+```
+
+Since the state in the checkpoint is incompatible with the model described
+in `classifier2`, retraining fails with the following error:
+
+```None
+...
+InvalidArgumentError (see above for traceback): tensor_name =
+dnn/hiddenlayer_1/bias/t_0/Adagrad; shape in shape_and_slice spec [10]
+does not match the shape stored in checkpoint: [20]
+```
+
+To run experiments in which you train and compare slightly different
+versions of a model, save a copy of the code that created each
+`model_dir`, possibly by creating a separate git branch for each version.
+This separation will keep your checkpoints recoverable.
+
+## Summary
+
+Checkpoints provide an easy automatic mechanism for saving and restoring
+models created by Estimators.
+
+See the @{$saved_model$Saving and Restoring} guide for details about:
+
+*   Saving and restoring models using low-level TensorFlow APIs.
+*   Exporting and importing models in the SavedModel format, which is a
+    language-neutral, recoverable, serialization format.
diff --git a/tensorflow/docs_src/guide/custom_estimators.md b/tensorflow/docs_src/guide/custom_estimators.md
new file mode 100644
index 0000000000..fb20b35c12
--- /dev/null
+++ b/tensorflow/docs_src/guide/custom_estimators.md
@@ -0,0 +1,602 @@
+
+# Creating Custom Estimators
+
+This document introduces custom Estimators. In particular, this document
+demonstrates how to create a custom @{tf.estimator.Estimator$Estimator} that
+mimics the behavior of the pre-made Estimator
+@{tf.estimator.DNNClassifier$`DNNClassifier`} in solving the Iris problem. See
+the @{$premade_estimators$Pre-Made Estimators chapter} for details
+on the Iris problem.
+
+To download and access the example code invoke the following two commands:
+
+```shell
+git clone https://github.com/tensorflow/models/
+cd models/samples/core/get_started
+```
+
+In this document we will be looking at
+[`custom_estimator.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/custom_estimator.py).
+You can run it with the following command:
+
+```bsh
+python custom_estimator.py
+```
+
+If you are feeling impatient, feel free to compare and contrast
+[`custom_estimator.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/custom_estimator.py)
+with
+[`premade_estimator.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/premade_estimator.py).
+(which is in the same directory).
+
+
+
+## Pre-made vs. custom
+
+As the following figure shows, pre-made Estimators are subclasses of the
+@{tf.estimator.Estimator} base class, while custom Estimators are an instance
+of tf.estimator.Estimator:
+
+<div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="display:block; margin: 0 auto"
+  alt="Premade estimators are sub-classes of `Estimator`. Custom Estimators are usually (direct) instances of `Estimator`"
+  src="../images/custom_estimators/estimator_types.png">
+</div>
+<div style="text-align: center">
+Pre-made and custom Estimators are all Estimators.
+</div>
+
+Pre-made Estimators are fully baked. Sometimes though, you need more control
+over an Estimator's behavior.  That's where custom Estimators come in. You can
+create a custom Estimator to do just about anything. If you want hidden layers
+connected in some unusual fashion, write a custom Estimator. If you want to
+calculate a unique
+[metric](https://developers.google.com/machine-learning/glossary/#metric)
+for your model, write a custom Estimator.  Basically, if you want an Estimator
+optimized for your specific problem, write a custom Estimator.
+
+A model function (or `model_fn`) implements the ML algorithm. The
+only difference between working with pre-made Estimators and custom Estimators
+is:
+
+* With pre-made Estimators, someone already wrote the model function for you.
+* With custom Estimators, you must write the model function.
+
+Your model function could implement a wide range of algorithms, defining all
+sorts of hidden layers and metrics.  Like input functions, all model functions
+must accept a standard group of input parameters and return a standard group of
+output values. Just as input functions can leverage the Dataset API, model
+functions can leverage the Layers API and the Metrics API.
+
+Let's see how to solve the Iris problem with a custom Estimator. A quick
+reminder--here's the organization of the Iris model that we're trying to mimic:
+
+<div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="display:block; margin: 0 auto"
+  alt="A diagram of the network architecture: Inputs, 2 hidden layers, and outputs"
+  src="../images/custom_estimators/full_network.png">
+</div>
+<div style="text-align: center">
+Our implementation of Iris contains four features, two hidden layers,
+and a logits output layer.
+</div>
+
+## Write an Input function
+
+Our custom Estimator implementation uses the same input function as our
+@{$premade_estimators$pre-made Estimator implementation}, from
+[`iris_data.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/iris_data.py).
+Namely:
+
+```python
+def train_input_fn(features, labels, batch_size):
+    """An input function for training"""
+    # Convert the inputs to a Dataset.
+    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
+
+    # Shuffle, repeat, and batch the examples.
+    dataset = dataset.shuffle(1000).repeat().batch(batch_size)
+
+    # Return the read end of the pipeline.
+    return dataset.make_one_shot_iterator().get_next()
+```
+
+This input function builds an input pipeline that yields batches of
+`(features, labels)` pairs, where `features` is a dictionary features.
+
+## Create feature columns
+
+As detailed in the @{$premade_estimators$Premade Estimators} and
+@{$feature_columns$Feature Columns} chapters, you must define
+your model's feature columns to specify how the model should use each feature.
+Whether working with pre-made Estimators or custom Estimators, you define
+feature columns in the same fashion.
+
+The following code creates a simple `numeric_column` for each input feature,
+indicating that the value of the input feature should be used directly as an
+input to the model:
+
+```python
+# Feature columns describe how to use the input.
+my_feature_columns = []
+for key in train_x.keys():
+    my_feature_columns.append(tf.feature_column.numeric_column(key=key))
+```
+
+## Write a model function
+
+The model function we'll use has the following call signature:
+
+```python
+def my_model_fn(
+   features, # This is batch_features from input_fn
+   labels,   # This is batch_labels from input_fn
+   mode,     # An instance of tf.estimator.ModeKeys
+   params):  # Additional configuration
+```
+
+The first two arguments are the batches of features and labels returned from
+the input function; that is, `features` and `labels` are the handles to the
+data your model will use. The `mode` argument indicates whether the caller is
+requesting training, predicting, or evaluation.
+
+The caller may pass `params` to an Estimator's constructor. Any `params` passed
+to the constructor are in turn passed on to the `model_fn`. In
+[`custom_estimator.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/custom_estimator.py)
+the following lines create the estimator and set the params to configure the
+model. This configuration step is similar to how we configured the @{tf.estimator.DNNClassifier} in
+@{$premade_estimators}.
+
+```python
+classifier = tf.estimator.Estimator(
+    model_fn=my_model,
+    params={
+        'feature_columns': my_feature_columns,
+        # Two hidden layers of 10 nodes each.
+        'hidden_units': [10, 10],
+        # The model must choose between 3 classes.
+        'n_classes': 3,
+    })
+```
+
+To implement a typical model function, you must do the following:
+
+* [Define the model](#define_the_model).
+* Specify additional calculations for each of
+  the [three different modes](#modes):
+    * [Predict](#predict)
+    * [Evaluate](#evaluate)
+    * [Train](#train)
+
+## Define the model
+
+The basic deep neural network model must define the following three sections:
+
+* An [input layer](https://developers.google.com/machine-learning/glossary/#input_layer)
+* One or more [hidden layers](https://developers.google.com/machine-learning/glossary/#hidden_layer)
+* An [output layer](https://developers.google.com/machine-learning/glossary/#output_layer)
+
+### Define the input layer
+
+The first line of the `model_fn` calls @{tf.feature_column.input_layer} to
+convert the feature dictionary and `feature_columns` into input for your model,
+as follows:
+
+```python
+    # Use `input_layer` to apply the feature columns.
+    net = tf.feature_column.input_layer(features, params['feature_columns'])
+```
+
+The preceding line applies the transformations defined by your feature columns,
+creating the model's input layer.
+
+<div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="display:block; margin: 0 auto"
+  alt="A diagram of the input layer, in this case a 1:1 mapping from raw-inputs to features."
+  src="../images/custom_estimators/input_layer.png">
+</div>
+
+
+### Hidden Layers
+
+If you are creating a deep neural network, you must define one or more hidden
+layers. The Layers API provides a rich set of functions to define all types of
+hidden layers, including convolutional, pooling, and dropout layers. For Iris,
+we're simply going to call @{tf.layers.dense} to create hidden layers, with
+dimensions defined by `params['hidden_layers']`. In a `dense` layer each node
+is connected to every node in the preceding layer.  Here's the relevant code:
+
+``` python
+    # Build the hidden layers, sized according to the 'hidden_units' param.
+    for units in params['hidden_units']:
+        net = tf.layers.dense(net, units=units, activation=tf.nn.relu)
+```
+
+* The `units` parameter defines the number of output neurons in a given layer.
+* The `activation` parameter defines the [activation function](https://developers.google.com/machine-learning/glossary/#activation_function) —
+  [Relu](https://developers.google.com/machine-learning/glossary/#ReLU) in this
+  case.
+
+The variable `net` here signifies the current top layer of the network. During
+the first iteration, `net` signifies the input layer. On each loop iteration
+`tf.layers.dense` creates a new layer, which takes the previous layer's output
+as its input, using the variable `net`.
+
+After creating two hidden layers, our network looks as follows. For
+simplicity, the figure does not show all the units in each layer.
+
+<div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="display:block; margin: 0 auto"
+  alt="The input layer with two hidden layers added."
+  src="../images/custom_estimators/add_hidden_layer.png">
+</div>
+
+Note that @{tf.layers.dense} provides many additional capabilities, including
+the ability to set a multitude of regularization parameters. For the sake of
+simplicity, though, we're going to simply accept the default values of the
+other parameters.
+
+### Output Layer
+
+We'll define the output layer by calling @{tf.layers.dense} yet again, this
+time without an activation function:
+
+```python
+    # Compute logits (1 per class).
+    logits = tf.layers.dense(net, params['n_classes'], activation=None)
+```
+
+Here, `net` signifies the final hidden layer. Therefore, the full set of layers
+is now connected as follows:
+
+<div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="display:block; margin: 0 auto"
+  alt="A logit output layer connected to the top hidden layer"
+  src="../images/custom_estimators/add_logits.png">
+</div>
+<div style="text-align: center">
+The final hidden layer feeds into the output layer.
+</div>
+
+When defining an output layer, the `units` parameter specifies the number of
+outputs. So, by setting `units` to `params['n_classes']`, the model produces
+one output value per class. Each element of the output vector will contain the
+score, or "logit", calculated for the associated class of Iris: Setosa,
+Versicolor, or Virginica, respectively.
+
+Later on, these logits will be transformed into probabilities by the
+@{tf.nn.softmax} function.
+
+## Implement training, evaluation, and prediction {#modes}
+
+The final step in creating a model function is to write branching code that
+implements prediction, evaluation, and training.
+
+The model function gets invoked whenever someone calls the Estimator's `train`,
+`evaluate`, or `predict` methods. Recall that the signature for the model
+function looks like this:
+
+``` python
+def my_model_fn(
+   features, # This is batch_features from input_fn
+   labels,   # This is batch_labels from input_fn
+   mode,     # An instance of tf.estimator.ModeKeys, see below
+   params):  # Additional configuration
+```
+
+Focus on that third argument, mode. As the following table shows, when someone
+calls `train`, `evaluate`, or `predict`, the Estimator framework invokes your model
+function with the mode parameter set as follows:
+
+| Estimator method                 |    Estimator Mode |
+|:---------------------------------|:------------------|
+|@{tf.estimator.Estimator.train$`train()`} |@{tf.estimator.ModeKeys.TRAIN$`ModeKeys.TRAIN`} |
+|@{tf.estimator.Estimator.evaluate$`evaluate()`}  |@{tf.estimator.ModeKeys.EVAL$`ModeKeys.EVAL`}      |
+|@{tf.estimator.Estimator.predict$`predict()`}|@{tf.estimator.ModeKeys.PREDICT$`ModeKeys.PREDICT`} |
+
+For example, suppose you instantiate a custom Estimator to generate an object
+named `classifier`. Then, you make the following call:
+
+``` python
+classifier = tf.estimator.Estimator(...)
+classifier.train(input_fn=lambda: my_input_fn(FILE_TRAIN, True, 500))
+```
+The Estimator framework then calls your model function with mode set to
+`ModeKeys.TRAIN`.
+
+Your model function must provide code to handle all three of the mode values.
+For each mode value, your code must return an instance of
+`tf.estimator.EstimatorSpec`, which contains the information the caller
+requires. Let's examine each mode.
+
+### Predict
+
+When the Estimator's `predict` method is called, the `model_fn` receives
+`mode = ModeKeys.PREDICT`. In this case, the model function must return a
+`tf.estimator.EstimatorSpec` containing the prediction.
+
+The model must have been trained prior to making a prediction. The trained model
+is stored on disk in the `model_dir` directory established when you
+instantiated the Estimator.
+
+The code to generate the prediction for this model looks as follows:
+
+```python
+# Compute predictions.
+predicted_classes = tf.argmax(logits, 1)
+if mode == tf.estimator.ModeKeys.PREDICT:
+    predictions = {
+        'class_ids': predicted_classes[:, tf.newaxis],
+        'probabilities': tf.nn.softmax(logits),
+        'logits': logits,
+    }
+    return tf.estimator.EstimatorSpec(mode, predictions=predictions)
+```
+The prediction dictionary contains everything that your model returns when run
+in prediction mode.
+
+<div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="display:block; margin: 0 auto"
+  alt="Additional outputs added to the output layer."
+  src="../images/custom_estimators/add_predictions.png">
+</div>
+
+The `predictions` holds the following three key/value pairs:
+
+*   `class_ids` holds the class id (0, 1, or 2) representing the model's
+    prediction of the most likely species for this example.
+*   `probabilities` holds the three probabilities (in this example, 0.02, 0.95,
+    and 0.03)
+*   `logit` holds the raw logit values (in this example, -1.3, 2.6, and -0.9)
+
+We return that dictionary to the caller via the `predictions` parameter of the
+@{tf.estimator.EstimatorSpec}. The Estimator's
+@{tf.estimator.Estimator.predict$`predict`} method will yield these
+dictionaries.
+
+### Calculate the loss
+
+For both [training](#train) and [evaluation](#evaluate) we need to calculate the
+model's loss. This is the
+[objective](https://developers.google.com/machine-learning/glossary/#objective)
+that will be optimized.
+
+We can calculate the loss by calling @{tf.losses.sparse_softmax_cross_entropy}.
+The value returned by this function will be lowest, approximately 0,
+probability of the correct class (at index `label`) is near 1.0. The loss value
+returned is progressively larger as the probability of the correct class
+decreases.
+
+This function returns the average over the whole batch.
+
+```python
+# Compute loss.
+loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
+```
+
+### Evaluate
+
+When the Estimator's `evaluate` method is called, the `model_fn` receives
+`mode = ModeKeys.EVAL`. In this case, the model function must return a
+`tf.estimator.EstimatorSpec` containing the model's loss and optionally one
+or more metrics.
+
+Although returning metrics is optional, most custom Estimators do return at
+least one metric. TensorFlow provides a Metrics module @{tf.metrics} to
+calculate common metrics.  For brevity's sake, we'll only return accuracy. The
+@{tf.metrics.accuracy} function compares our predictions against the
+true values, that is, against the labels provided by the input function. The
+@{tf.metrics.accuracy} function requires the labels and predictions to have the
+same shape. Here's the call to @{tf.metrics.accuracy}:
+
+``` python
+# Compute evaluation metrics.
+accuracy = tf.metrics.accuracy(labels=labels,
+                               predictions=predicted_classes,
+                               name='acc_op')
+```
+
+The @{tf.estimator.EstimatorSpec$`EstimatorSpec`} returned for evaluation
+typically contains the following information:
+
+* `loss`, which is the model's loss
+* `eval_metric_ops`, which is an optional dictionary of metrics.
+
+So, we'll create a dictionary containing our sole metric. If we had calculated
+other metrics, we would have added them as additional key/value pairs to that
+same dictionary.  Then, we'll pass that dictionary in the `eval_metric_ops`
+argument of `tf.estimator.EstimatorSpec`. Here's the code:
+
+```python
+metrics = {'accuracy': accuracy}
+tf.summary.scalar('accuracy', accuracy[1])
+
+if mode == tf.estimator.ModeKeys.EVAL:
+    return tf.estimator.EstimatorSpec(
+        mode, loss=loss, eval_metric_ops=metrics)
+```
+
+The @{tf.summary.scalar} will make accuracy available to TensorBoard
+in both `TRAIN` and `EVAL` modes. (More on this later).
+
+### Train
+
+When the Estimator's `train` method is called, the `model_fn` is called
+with `mode = ModeKeys.TRAIN`. In this case, the model function must return an
+`EstimatorSpec` that contains the loss and a training operation.
+
+Building the training operation will require an optimizer. We will use
+@{tf.train.AdagradOptimizer} because we're mimicking the `DNNClassifier`, which
+also uses `Adagrad` by default. The `tf.train` package provides many other
+optimizers—feel free to experiment with them.
+
+Here is the code that builds the optimizer:
+
+``` python
+optimizer = tf.train.AdagradOptimizer(learning_rate=0.1)
+```
+
+Next, we build the training operation using the optimizer's
+@{tf.train.Optimizer.minimize$`minimize`} method on the loss we calculated
+earlier.
+
+The `minimize` method also takes a `global_step` parameter. TensorFlow uses this
+parameter to count the number of training steps that have been processed
+(to know when to end a training run). Furthermore, the `global_step` is
+essential for TensorBoard graphs to work correctly. Simply call
+@{tf.train.get_global_step} and pass the result to the `global_step`
+argument of `minimize`.
+
+Here's the code to train the model:
+
+``` python
+train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
+```
+
+The @{tf.estimator.EstimatorSpec$`EstimatorSpec`} returned for training
+must have the following fields set:
+
+* `loss`, which contains the value of the loss function.
+* `train_op`, which executes a training step.
+
+Here's our code to call `EstimatorSpec`:
+
+```python
+return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
+```
+
+The model function is now complete.
+
+## The custom Estimator
+
+Instantiate the custom Estimator through the Estimator base class as follows:
+
+```python
+    # Build 2 hidden layer DNN with 10, 10 units respectively.
+    classifier = tf.estimator.Estimator(
+        model_fn=my_model,
+        params={
+            'feature_columns': my_feature_columns,
+            # Two hidden layers of 10 nodes each.
+            'hidden_units': [10, 10],
+            # The model must choose between 3 classes.
+            'n_classes': 3,
+        })
+```
+Here the `params` dictionary serves the same purpose as the key-word
+arguments of `DNNClassifier`; that is, the `params` dictionary lets you
+configure your Estimator without modifying the code in the `model_fn`.
+
+The rest of the code to train, evaluate, and generate predictions using our
+Estimator is the same as in the
+@{$premade_estimators$Premade Estimators} chapter. For
+example, the following line will train the model:
+
+```python
+# Train the Model.
+classifier.train(
+    input_fn=lambda:iris_data.train_input_fn(train_x, train_y, args.batch_size),
+    steps=args.train_steps)
+```
+
+## TensorBoard
+
+You can view training results for your custom Estimator in TensorBoard. To see
+this reporting, start TensorBoard from your command line as follows:
+
+```bsh
+# Replace PATH with the actual path passed as model_dir
+tensorboard --logdir=PATH
+```
+
+Then, open TensorBoard by browsing to: [http://localhost:6006](http://localhost:6006)
+
+All the pre-made Estimators automatically log a lot of information to
+TensorBoard. With custom Estimators, however, TensorBoard only provides one
+default log (a graph of the loss) plus the information you explicitly tell
+TensorBoard to log. For the custom Estimator you just created, TensorBoard
+generates the following:
+
+<div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
+
+<img style="display:block; margin: 0 auto"
+  alt="Accuracy, 'scalar' graph from tensorboard"
+  src="../images/custom_estimators/accuracy.png">
+
+<img style="display:block; margin: 0 auto"
+  alt="loss 'scalar' graph from tensorboard"
+  src="../images/custom_estimators/loss.png">
+
+<img style="display:block; margin: 0 auto"
+  alt="steps/second 'scalar' graph from tensorboard"
+  src="../images/custom_estimators/steps_per_second.png">
+</div>
+
+<div style="text-align: center">
+TensorBoard displays three graphs.
+</div>
+
+
+In brief, here's what the three graphs tell you:
+
+* global_step/sec: A performance indicator showing how many batches (gradient
+  updates) we processed per second as the model trains.
+
+* loss: The loss reported.
+
+* accuracy: The accuracy is recorded by the following two lines:
+
+    * `eval_metric_ops={'my_accuracy': accuracy}`, during evaluation.
+    * `tf.summary.scalar('accuracy', accuracy[1])`, during training.
+
+These tensorboard graphs are one of the main reasons it's important to pass a
+`global_step` to your optimizer's `minimize` method. The model can't record
+the x-coordinate for these graphs without it.
+
+Note the following in the `my_accuracy` and `loss` graphs:
+
+* The orange line represents training.
+* The blue dot represents evaluation.
+
+During training, summaries (the orange line) are recorded periodically as
+batches are processed, which is why it becomes a graph spanning x-axis range.
+
+By contrast, evaluation produces only a single point on the graph for each call
+to `evaluate`. This point contains the average over the entire evaluation call.
+This has no width on the graph as it is evaluated entirely from the model state
+at a particular training step (from a single checkpoint).
+
+As suggested in the following figure, you may see and also selectively
+disable/enable the reporting using the controls on the left side.
+
+<div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="display:block; margin: 0 auto"
+  alt="Check-boxes allowing the user to select which runs are shown."
+  src="../images/custom_estimators/select_run.jpg">
+</div>
+<div style="text-align: center">
+Enable or disable reporting.
+</div>
+
+
+## Summary
+
+Although pre-made Estimators can be an effective way to quickly create new
+models, you will often need the additional flexibility that custom Estimators
+provide. Fortunately, pre-made and custom Estimators follow the same
+programming model. The only practical difference is that you must write a model
+function for custom Estimators; everything else is the same.
+
+For more details, be sure to check out:
+
+* The
+  [official TensorFlow implementation of MNIST](https://github.com/tensorflow/models/tree/master/official/mnist),
+  which uses a custom estimator.
+* The TensorFlow
+  [official models repository](https://github.com/tensorflow/models/tree/master/official),
+  which contains more curated examples using custom estimators.
+* This [TensorBoard video](https://youtu.be/eBbEDRsCmv4), which introduces
+  TensorBoard.
+* The @{$low_level_intro$Low Level Introduction}, which demonstrates
+  how to experiment directly with TensorFlow's low level APIs, making debugging
+  easier.
diff --git a/tensorflow/docs_src/guide/datasets.md b/tensorflow/docs_src/guide/datasets.md
new file mode 100644
index 0000000000..8b69860a68
--- /dev/null
+++ b/tensorflow/docs_src/guide/datasets.md
@@ -0,0 +1,823 @@
+# Importing Data
+
+The @{tf.data} API enables you to build complex input pipelines from
+simple, reusable pieces. For example, the pipeline for an image model might
+aggregate data from files in a distributed file system, apply random
+perturbations to each image, and merge randomly selected images into a batch
+for training. The pipeline for a text model might involve extracting symbols
+from raw text data, converting them to embedding identifiers with a lookup
+table, and batching together sequences of different lengths. The `tf.data` API
+makes it easy to deal with large amounts of data, different data formats, and
+complicated transformations.
+
+The `tf.data` API introduces two new abstractions to TensorFlow:
+
+* A `tf.data.Dataset` represents a sequence of elements, in which
+  each element contains one or more `Tensor` objects. For example, in an image
+  pipeline, an element might be a single training example, with a pair of
+  tensors representing the image data and a label. There are two distinct
+  ways to create a dataset:
+
+    * Creating a **source** (e.g. `Dataset.from_tensor_slices()`) constructs a
+    dataset from
+    one or more `tf.Tensor` objects.
+
+    * Applying a **transformation** (e.g. `Dataset.batch()`) constructs a dataset
+    from one or more `tf.data.Dataset` objects.
+
+* A `tf.data.Iterator` provides the main way to extract elements from a
+  dataset. The operation returned by `Iterator.get_next()` yields the next
+  element of a `Dataset` when executed, and typically acts as the interface
+  between input pipeline code and your model. The simplest iterator is a
+  "one-shot iterator", which is associated with a particular `Dataset` and
+  iterates through it once. For more sophisticated uses, the
+  `Iterator.initializer` operation enables you to reinitialize and parameterize
+  an iterator with different datasets, so that you can, for example, iterate
+  over training and validation data multiple times in the same program.
+
+## Basic mechanics
+
+This section of the guide describes the fundamentals of creating different kinds
+of `Dataset` and `Iterator` objects, and how to extract data from them.
+
+To start an input pipeline, you must define a *source*. For example, to
+construct a `Dataset` from some tensors in memory, you can use
+`tf.data.Dataset.from_tensors()` or
+`tf.data.Dataset.from_tensor_slices()`. Alternatively, if your input
+data are on disk in the recommended TFRecord format, you can construct a
+`tf.data.TFRecordDataset`.
+
+Once you have a `Dataset` object, you can *transform* it into a new `Dataset` by
+chaining method calls on the `tf.data.Dataset` object. For example, you
+can apply per-element transformations such as `Dataset.map()` (to apply a
+function to each element), and multi-element transformations such as
+`Dataset.batch()`. See the documentation for @{tf.data.Dataset}
+for a complete list of transformations.
+
+The most common way to consume values from a `Dataset` is to make an
+**iterator** object that provides access to one element of the dataset at a time
+(for example, by calling `Dataset.make_one_shot_iterator()`). A
+`tf.data.Iterator` provides two operations: `Iterator.initializer`,
+which enables you to (re)initialize the iterator's state; and
+`Iterator.get_next()`, which returns `tf.Tensor` objects that correspond to the
+symbolic next element. Depending on your use case, you might choose a different
+type of iterator, and the options are outlined below.
+
+### Dataset structure
+
+A dataset comprises elements that each have the same structure. An element
+contains one or more `tf.Tensor` objects, called *components*. Each component
+has a `tf.DType` representing the type of elements in the tensor, and a
+`tf.TensorShape` representing the (possibly partially specified) static shape of
+each element. The `Dataset.output_types` and `Dataset.output_shapes` properties
+allow you to inspect the inferred types and shapes of each component of a
+dataset element. The *nested structure* of these properties map to the structure
+of an element, which may be a single tensor, a tuple of tensors, or a nested
+tuple of tensors. For example:
+
+```python
+dataset1 = tf.data.Dataset.from_tensor_slices(tf.random_uniform([4, 10]))
+print(dataset1.output_types)  # ==> "tf.float32"
+print(dataset1.output_shapes)  # ==> "(10,)"
+
+dataset2 = tf.data.Dataset.from_tensor_slices(
+   (tf.random_uniform([4]),
+    tf.random_uniform([4, 100], maxval=100, dtype=tf.int32)))
+print(dataset2.output_types)  # ==> "(tf.float32, tf.int32)"
+print(dataset2.output_shapes)  # ==> "((), (100,))"
+
+dataset3 = tf.data.Dataset.zip((dataset1, dataset2))
+print(dataset3.output_types)  # ==> (tf.float32, (tf.float32, tf.int32))
+print(dataset3.output_shapes)  # ==> "(10, ((), (100,)))"
+```
+
+It is often convenient to give names to each component of an element, for
+example if they represent different features of a training example. In addition
+to tuples, you can use `collections.namedtuple` or a dictionary mapping strings
+to tensors to represent a single element of a `Dataset`.
+
+```python
+dataset = tf.data.Dataset.from_tensor_slices(
+   {"a": tf.random_uniform([4]),
+    "b": tf.random_uniform([4, 100], maxval=100, dtype=tf.int32)})
+print(dataset.output_types)  # ==> "{'a': tf.float32, 'b': tf.int32}"
+print(dataset.output_shapes)  # ==> "{'a': (), 'b': (100,)}"
+```
+
+The `Dataset` transformations support datasets of any structure. When using the
+`Dataset.map()`, `Dataset.flat_map()`, and `Dataset.filter()` transformations,
+which apply a function to each element, the element structure determines the
+arguments of the function:
+
+```python
+dataset1 = dataset1.map(lambda x: ...)
+
+dataset2 = dataset2.flat_map(lambda x, y: ...)
+
+# Note: Argument destructuring is not available in Python 3.
+dataset3 = dataset3.filter(lambda x, (y, z): ...)
+```
+
+### Creating an iterator
+
+Once you have built a `Dataset` to represent your input data, the next step is to
+create an `Iterator` to access elements from that dataset.  The `tf.data` API
+currently supports the following iterators, in increasing level of
+sophistication:
+
+* **one-shot**,
+* **initializable**,
+* **reinitializable**, and
+* **feedable**.
+
+A **one-shot** iterator is the simplest form of iterator, which only supports
+iterating once through a dataset, with no need for explicit initialization.
+One-shot iterators handle almost all of the cases that the existing queue-based
+input pipelines support, but they do not support parameterization. Using the
+example of `Dataset.range()`:
+
+```python
+dataset = tf.data.Dataset.range(100)
+iterator = dataset.make_one_shot_iterator()
+next_element = iterator.get_next()
+
+for i in range(100):
+  value = sess.run(next_element)
+  assert i == value
+```
+
+Note: Currently, one-shot iterators are the only type that is easily usable
+with an `Estimator`.
+
+An **initializable** iterator requires you to run an explicit
+`iterator.initializer` operation before using it. In exchange for this
+inconvenience, it enables you to *parameterize* the definition of the dataset,
+using one or more `tf.placeholder()` tensors that can be fed when you
+initialize the iterator. Continuing the `Dataset.range()` example:
+
+```python
+max_value = tf.placeholder(tf.int64, shape=[])
+dataset = tf.data.Dataset.range(max_value)
+iterator = dataset.make_initializable_iterator()
+next_element = iterator.get_next()
+
+# Initialize an iterator over a dataset with 10 elements.
+sess.run(iterator.initializer, feed_dict={max_value: 10})
+for i in range(10):
+  value = sess.run(next_element)
+  assert i == value
+
+# Initialize the same iterator over a dataset with 100 elements.
+sess.run(iterator.initializer, feed_dict={max_value: 100})
+for i in range(100):
+  value = sess.run(next_element)
+  assert i == value
+```
+
+A **reinitializable** iterator can be initialized from multiple different
+`Dataset` objects. For example, you might have a training input pipeline that
+uses random perturbations to the input images to improve generalization, and
+a validation input pipeline that evaluates predictions on unmodified data. These
+pipelines will typically use different `Dataset` objects that have the same
+structure (i.e. the same types and compatible shapes for each component).
+
+```python
+# Define training and validation datasets with the same structure.
+training_dataset = tf.data.Dataset.range(100).map(
+    lambda x: x + tf.random_uniform([], -10, 10, tf.int64))
+validation_dataset = tf.data.Dataset.range(50)
+
+# A reinitializable iterator is defined by its structure. We could use the
+# `output_types` and `output_shapes` properties of either `training_dataset`
+# or `validation_dataset` here, because they are compatible.
+iterator = tf.data.Iterator.from_structure(training_dataset.output_types,
+                                           training_dataset.output_shapes)
+next_element = iterator.get_next()
+
+training_init_op = iterator.make_initializer(training_dataset)
+validation_init_op = iterator.make_initializer(validation_dataset)
+
+# Run 20 epochs in which the training dataset is traversed, followed by the
+# validation dataset.
+for _ in range(20):
+  # Initialize an iterator over the training dataset.
+  sess.run(training_init_op)
+  for _ in range(100):
+    sess.run(next_element)
+
+  # Initialize an iterator over the validation dataset.
+  sess.run(validation_init_op)
+  for _ in range(50):
+    sess.run(next_element)
+```
+
+A **feedable** iterator can be used together with @{tf.placeholder} to select
+what `Iterator` to use in each call to @{tf.Session.run}, via the familiar
+`feed_dict` mechanism. It offers the same functionality as a reinitializable
+iterator, but it does not require you to initialize the iterator from the start
+of a dataset when you switch between iterators. For example, using the same
+training and validation example from above, you can use
+@{tf.data.Iterator.from_string_handle} to define a feedable iterator
+that allows you to switch between the two datasets:
+
+```python
+# Define training and validation datasets with the same structure.
+training_dataset = tf.data.Dataset.range(100).map(
+    lambda x: x + tf.random_uniform([], -10, 10, tf.int64)).repeat()
+validation_dataset = tf.data.Dataset.range(50)
+
+# A feedable iterator is defined by a handle placeholder and its structure. We
+# could use the `output_types` and `output_shapes` properties of either
+# `training_dataset` or `validation_dataset` here, because they have
+# identical structure.
+handle = tf.placeholder(tf.string, shape=[])
+iterator = tf.data.Iterator.from_string_handle(
+    handle, training_dataset.output_types, training_dataset.output_shapes)
+next_element = iterator.get_next()
+
+# You can use feedable iterators with a variety of different kinds of iterator
+# (such as one-shot and initializable iterators).
+training_iterator = training_dataset.make_one_shot_iterator()
+validation_iterator = validation_dataset.make_initializable_iterator()
+
+# The `Iterator.string_handle()` method returns a tensor that can be evaluated
+# and used to feed the `handle` placeholder.
+training_handle = sess.run(training_iterator.string_handle())
+validation_handle = sess.run(validation_iterator.string_handle())
+
+# Loop forever, alternating between training and validation.
+while True:
+  # Run 200 steps using the training dataset. Note that the training dataset is
+  # infinite, and we resume from where we left off in the previous `while` loop
+  # iteration.
+  for _ in range(200):
+    sess.run(next_element, feed_dict={handle: training_handle})
+
+  # Run one pass over the validation dataset.
+  sess.run(validation_iterator.initializer)
+  for _ in range(50):
+    sess.run(next_element, feed_dict={handle: validation_handle})
+```
+
+### Consuming values from an iterator
+
+The `Iterator.get_next()` method returns one or more `tf.Tensor` objects that
+correspond to the symbolic next element of an iterator. Each time these tensors
+are evaluated, they take the value of the next element in the underlying
+dataset. (Note that, like other stateful objects in TensorFlow, calling
+`Iterator.get_next()` does not immediately advance the iterator. Instead you
+must use the returned `tf.Tensor` objects in a TensorFlow expression, and pass
+the result of that expression to `tf.Session.run()` to get the next elements and
+advance the iterator.)
+
+If the iterator reaches the end of the dataset, executing
+the `Iterator.get_next()` operation will raise a `tf.errors.OutOfRangeError`.
+After this point the iterator will be in an unusable state, and you must
+initialize it again if you want to use it further.
+
+```python
+dataset = tf.data.Dataset.range(5)
+iterator = dataset.make_initializable_iterator()
+next_element = iterator.get_next()
+
+# Typically `result` will be the output of a model, or an optimizer's
+# training operation.
+result = tf.add(next_element, next_element)
+
+sess.run(iterator.initializer)
+print(sess.run(result))  # ==> "0"
+print(sess.run(result))  # ==> "2"
+print(sess.run(result))  # ==> "4"
+print(sess.run(result))  # ==> "6"
+print(sess.run(result))  # ==> "8"
+try:
+  sess.run(result)
+except tf.errors.OutOfRangeError:
+  print("End of dataset")  # ==> "End of dataset"
+```
+
+A common pattern is to wrap the "training loop" in a `try`-`except` block:
+
+```python
+sess.run(iterator.initializer)
+while True:
+  try:
+    sess.run(result)
+  except tf.errors.OutOfRangeError:
+    break
+```
+
+If each element of the dataset has a nested structure, the return value of
+`Iterator.get_next()` will be one or more `tf.Tensor` objects in the same
+nested structure:
+
+```python
+dataset1 = tf.data.Dataset.from_tensor_slices(tf.random_uniform([4, 10]))
+dataset2 = tf.data.Dataset.from_tensor_slices((tf.random_uniform([4]), tf.random_uniform([4, 100])))
+dataset3 = tf.data.Dataset.zip((dataset1, dataset2))
+
+iterator = dataset3.make_initializable_iterator()
+
+sess.run(iterator.initializer)
+next1, (next2, next3) = iterator.get_next()
+```
+
+Note that `next1`, `next2`, and `next3` are tensors produced by the
+same op/node (created by `Iterator.get_next()`). Therefore,  evaluating *any* of
+these tensors will advance the iterator for all components. A typical consumer
+of an iterator will include all components in a single expression.
+
+### Saving iterator state
+
+The @{tf.contrib.data.make_saveable_from_iterator} function creates a
+`SaveableObject` from an iterator, which can be used to save and
+restore the current state of the iterator (and, effectively, the whole input
+pipeline). A saveable object thus created can be added to @{tf.train.Saver}
+variables list or the `tf.GraphKeys.SAVEABLE_OBJECTS` collection for saving and
+restoring in the same manner as a @{tf.Variable}. Refer to
+@{$saved_model$Saving and Restoring} for details on how to save and restore
+variables.
+
+```python
+# Create saveable object from iterator.
+saveable = tf.contrib.data.make_saveable_from_iterator(iterator)
+
+# Save the iterator state by adding it to the saveable objects collection.
+tf.add_to_collection(tf.GraphKeys.SAVEABLE_OBJECTS, saveable)
+saver = tf.train.Saver()
+
+with tf.Session() as sess:
+
+  if should_checkpoint:
+    saver.save(path_to_checkpoint)
+
+# Restore the iterator state.
+with tf.Session() as sess:
+  saver.restore(sess, path_to_checkpoint)
+```
+
+## Reading input data
+
+### Consuming NumPy arrays
+
+If all of your input data fit in memory, the simplest way to create a `Dataset`
+from them is to convert them to `tf.Tensor` objects and use
+`Dataset.from_tensor_slices()`.
+
+```python
+# Load the training data into two NumPy arrays, for example using `np.load()`.
+with np.load("/var/data/training_data.npy") as data:
+  features = data["features"]
+  labels = data["labels"]
+
+# Assume that each row of `features` corresponds to the same row as `labels`.
+assert features.shape[0] == labels.shape[0]
+
+dataset = tf.data.Dataset.from_tensor_slices((features, labels))
+```
+
+Note that the above code snippet will embed the `features` and `labels` arrays
+in your TensorFlow graph as `tf.constant()` operations. This works well for a
+small dataset, but wastes memory---because the contents of the array will be
+copied multiple times---and can run into the 2GB limit for the `tf.GraphDef`
+protocol buffer.
+
+As an alternative, you can define the `Dataset` in terms of `tf.placeholder()`
+tensors, and *feed* the NumPy arrays when you initialize an `Iterator` over the
+dataset.
+
+```python
+# Load the training data into two NumPy arrays, for example using `np.load()`.
+with np.load("/var/data/training_data.npy") as data:
+  features = data["features"]
+  labels = data["labels"]
+
+# Assume that each row of `features` corresponds to the same row as `labels`.
+assert features.shape[0] == labels.shape[0]
+
+features_placeholder = tf.placeholder(features.dtype, features.shape)
+labels_placeholder = tf.placeholder(labels.dtype, labels.shape)
+
+dataset = tf.data.Dataset.from_tensor_slices((features_placeholder, labels_placeholder))
+# [Other transformations on `dataset`...]
+dataset = ...
+iterator = dataset.make_initializable_iterator()
+
+sess.run(iterator.initializer, feed_dict={features_placeholder: features,
+                                          labels_placeholder: labels})
+```
+
+### Consuming TFRecord data
+
+The `tf.data` API supports a variety of file formats so that you can process
+large datasets that do not fit in memory. For example, the TFRecord file format
+is a simple record-oriented binary format that many TensorFlow applications use
+for training data. The `tf.data.TFRecordDataset` class enables you to
+stream over the contents of one or more TFRecord files as part of an input
+pipeline.
+
+```python
+# Creates a dataset that reads all of the examples from two files.
+filenames = ["/var/data/file1.tfrecord", "/var/data/file2.tfrecord"]
+dataset = tf.data.TFRecordDataset(filenames)
+```
+
+The `filenames` argument to the `TFRecordDataset` initializer can either be a
+string, a list of strings, or a `tf.Tensor` of strings. Therefore if you have
+two sets of files for training and validation purposes, you can use a
+`tf.placeholder(tf.string)` to represent the filenames, and initialize an
+iterator from the appropriate filenames:
+
+```python
+filenames = tf.placeholder(tf.string, shape=[None])
+dataset = tf.data.TFRecordDataset(filenames)
+dataset = dataset.map(...)  # Parse the record into tensors.
+dataset = dataset.repeat()  # Repeat the input indefinitely.
+dataset = dataset.batch(32)
+iterator = dataset.make_initializable_iterator()
+
+# You can feed the initializer with the appropriate filenames for the current
+# phase of execution, e.g. training vs. validation.
+
+# Initialize `iterator` with training data.
+training_filenames = ["/var/data/file1.tfrecord", "/var/data/file2.tfrecord"]
+sess.run(iterator.initializer, feed_dict={filenames: training_filenames})
+
+# Initialize `iterator` with validation data.
+validation_filenames = ["/var/data/validation1.tfrecord", ...]
+sess.run(iterator.initializer, feed_dict={filenames: validation_filenames})
+```
+
+### Consuming text data
+
+Many datasets are distributed as one or more text files. The
+`tf.data.TextLineDataset` provides an easy way to extract lines from
+one or more text files. Given one or more filenames, a `TextLineDataset` will
+produce one string-valued element per line of those files. Like a
+`TFRecordDataset`, `TextLineDataset` accepts `filenames` as a `tf.Tensor`, so
+you can parameterize it by passing a `tf.placeholder(tf.string)`.
+
+```python
+filenames = ["/var/data/file1.txt", "/var/data/file2.txt"]
+dataset = tf.data.TextLineDataset(filenames)
+```
+
+By default, a `TextLineDataset` yields *every* line of each file, which may
+not be desirable, for example if the file starts with a header line, or contains
+comments. These lines can be removed using the `Dataset.skip()` and
+`Dataset.filter()` transformations. To apply these transformations to each
+file separately, we use `Dataset.flat_map()` to create a nested `Dataset` for
+each file.
+
+```python
+filenames = ["/var/data/file1.txt", "/var/data/file2.txt"]
+
+dataset = tf.data.Dataset.from_tensor_slices(filenames)
+
+# Use `Dataset.flat_map()` to transform each file as a separate nested dataset,
+# and then concatenate their contents sequentially into a single "flat" dataset.
+# * Skip the first line (header row).
+# * Filter out lines beginning with "#" (comments).
+dataset = dataset.flat_map(
+    lambda filename: (
+        tf.data.TextLineDataset(filename)
+        .skip(1)
+        .filter(lambda line: tf.not_equal(tf.substr(line, 0, 1), "#"))))
+```
+
+### Consuming CSV data
+
+The CSV file format is a popular format for storing tabular data in plain text.
+The @{tf.contrib.data.CsvDataset} class provides a way to extract records from
+one or more CSV files that comply with [RFC 4180](https://tools.ietf.org/html/rfc4180).
+Given one or more filenames and a list of defaults, a `CsvDataset` will produce
+a tuple of elements whose types correspond to the types of the defaults
+provided, per CSV record. Like `TFRecordDataset` and `TextLineDataset`,
+`CsvDataset` accepts `filenames` as a `tf.Tensor`, so you can parameterize it
+by passing a  `tf.placeholder(tf.string)`.
+
+```
+# Creates a dataset that reads all of the records from two CSV files, each with
+# eight float columns
+filenames = ["/var/data/file1.csv", "/var/data/file2.csv"]
+record_defaults = [tf.float32] * 8   # Eight required float columns
+dataset = tf.contrib.data.CsvDataset(filenames, record_defaults)
+```
+
+If some columns are empty, you can provide defaults instead of types.
+
+```
+# Creates a dataset that reads all of the records from two CSV files, each with
+# four float columns which may have missing values
+record_defaults = [[0.0]] * 8
+dataset = tf.contrib.data.CsvDataset(filenames, record_defaults)
+```
+
+By default, a `CsvDataset` yields *every* column of *every* line of the file,
+which may not be desirable, for example if the file starts with a header line
+that should be ignored, or if some columns are not required in the input.
+These lines and fields can be removed with the `header` and `select_cols`
+arguments respectively.
+
+```
+# Creates a dataset that reads all of the records from two CSV files with
+# headers, extracting float data from columns 2 and 4.
+record_defaults = [[0.0]] * 2  # Only provide defaults for the selected columns
+dataset = tf.contrib.data.CsvDataset(filenames, record_defaults, header=True, select_cols=[2,4])
+```
+<!--
+TODO(mrry): Add these sections.
+
+### Consuming from a Python generator
+-->
+
+## Preprocessing data with `Dataset.map()`
+
+The `Dataset.map(f)` transformation produces a new dataset by applying a given
+function `f` to each element of the input dataset. It is based on
+the
+[`map()` function](https://en.wikipedia.org/wiki/Map_(higher-order_function))
+that is commonly applied to lists (and other structures) in functional
+programming languages.  The function `f` takes the `tf.Tensor` objects that
+represent a single element in the input, and returns the `tf.Tensor` objects
+that will represent a single element in the new dataset. Its implementation uses
+standard TensorFlow operations to transform one element into another.
+
+This section covers common examples of how to use `Dataset.map()`.
+
+### Parsing `tf.Example` protocol buffer messages
+
+Many input pipelines extract `tf.train.Example` protocol buffer messages from a
+TFRecord-format file (written, for example, using
+`tf.python_io.TFRecordWriter`). Each `tf.train.Example` record contains one or
+more "features", and the input pipeline typically converts these features into
+tensors.
+
+```python
+# Transforms a scalar string `example_proto` into a pair of a scalar string and
+# a scalar integer, representing an image and its label, respectively.
+def _parse_function(example_proto):
+  features = {"image": tf.FixedLenFeature((), tf.string, default_value=""),
+              "label": tf.FixedLenFeature((), tf.int32, default_value=0)}
+  parsed_features = tf.parse_single_example(example_proto, features)
+  return parsed_features["image"], parsed_features["label"]
+
+# Creates a dataset that reads all of the examples from two files, and extracts
+# the image and label features.
+filenames = ["/var/data/file1.tfrecord", "/var/data/file2.tfrecord"]
+dataset = tf.data.TFRecordDataset(filenames)
+dataset = dataset.map(_parse_function)
+```
+
+### Decoding image data and resizing it
+
+When training a neural network on real-world image data, it is often necessary
+to convert images of different sizes to a common size, so that they may be
+batched into a fixed size.
+
+```python
+# Reads an image from a file, decodes it into a dense tensor, and resizes it
+# to a fixed shape.
+def _parse_function(filename, label):
+  image_string = tf.read_file(filename)
+  image_decoded = tf.image.decode_jpeg(image_string)
+  image_resized = tf.image.resize_images(image_decoded, [28, 28])
+  return image_resized, label
+
+# A vector of filenames.
+filenames = tf.constant(["/var/data/image1.jpg", "/var/data/image2.jpg", ...])
+
+# `labels[i]` is the label for the image in `filenames[i].
+labels = tf.constant([0, 37, ...])
+
+dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))
+dataset = dataset.map(_parse_function)
+```
+
+### Applying arbitrary Python logic with `tf.py_func()`
+
+For performance reasons, we encourage you to use TensorFlow operations for
+preprocessing your data whenever possible. However, it is sometimes useful to
+call upon external Python libraries when parsing your input data. To do so,
+invoke, the `tf.py_func()` operation in a `Dataset.map()` transformation.
+
+```python
+import cv2
+
+# Use a custom OpenCV function to read the image, instead of the standard
+# TensorFlow `tf.read_file()` operation.
+def _read_py_function(filename, label):
+  image_decoded = cv2.imread(filename.decode(), cv2.IMREAD_GRAYSCALE)
+  return image_decoded, label
+
+# Use standard TensorFlow operations to resize the image to a fixed shape.
+def _resize_function(image_decoded, label):
+  image_decoded.set_shape([None, None, None])
+  image_resized = tf.image.resize_images(image_decoded, [28, 28])
+  return image_resized, label
+
+filenames = ["/var/data/image1.jpg", "/var/data/image2.jpg", ...]
+labels = [0, 37, 29, 1, ...]
+
+dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))
+dataset = dataset.map(
+    lambda filename, label: tuple(tf.py_func(
+        _read_py_function, [filename, label], [tf.uint8, label.dtype])))
+dataset = dataset.map(_resize_function)
+```
+
+<!--
+TODO(mrry): Add this section.
+
+### Handling text data with unusual sizes
+-->
+
+## Batching dataset elements
+
+### Simple batching
+
+The simplest form of batching stacks `n` consecutive elements of a dataset into
+a single element. The `Dataset.batch()` transformation does exactly this, with
+the same constraints as the `tf.stack()` operator, applied to each component
+of the elements: i.e. for each component *i*, all elements must have a tensor
+of the exact same shape.
+
+```python
+inc_dataset = tf.data.Dataset.range(100)
+dec_dataset = tf.data.Dataset.range(0, -100, -1)
+dataset = tf.data.Dataset.zip((inc_dataset, dec_dataset))
+batched_dataset = dataset.batch(4)
+
+iterator = batched_dataset.make_one_shot_iterator()
+next_element = iterator.get_next()
+
+print(sess.run(next_element))  # ==> ([0, 1, 2,   3],   [ 0, -1,  -2,  -3])
+print(sess.run(next_element))  # ==> ([4, 5, 6,   7],   [-4, -5,  -6,  -7])
+print(sess.run(next_element))  # ==> ([8, 9, 10, 11],   [-8, -9, -10, -11])
+```
+
+### Batching tensors with padding
+
+The above recipe works for tensors that all have the same size. However, many
+models (e.g. sequence models) work with input data that can have varying size
+(e.g. sequences of different lengths). To handle this case, the
+`Dataset.padded_batch()` transformation enables you to batch tensors of
+different shape by specifying one or more dimensions in which they may be
+padded.
+
+```python
+dataset = tf.data.Dataset.range(100)
+dataset = dataset.map(lambda x: tf.fill([tf.cast(x, tf.int32)], x))
+dataset = dataset.padded_batch(4, padded_shapes=[None])
+
+iterator = dataset.make_one_shot_iterator()
+next_element = iterator.get_next()
+
+print(sess.run(next_element))  # ==> [[0, 0, 0], [1, 0, 0], [2, 2, 0], [3, 3, 3]]
+print(sess.run(next_element))  # ==> [[4, 4, 4, 4, 0, 0, 0],
+                               #      [5, 5, 5, 5, 5, 0, 0],
+                               #      [6, 6, 6, 6, 6, 6, 0],
+                               #      [7, 7, 7, 7, 7, 7, 7]]
+```
+
+The `Dataset.padded_batch()` transformation allows you to set different padding
+for each dimension of each component, and it may be variable-length (signified
+by `None` in the example above) or constant-length. It is also possible to
+override the padding value, which defaults to 0.
+
+<!--
+TODO(mrry): Add this section.
+
+### Dense ragged -> tf.SparseTensor
+-->
+
+## Training workflows
+
+### Processing multiple epochs
+
+The `tf.data` API offers two main ways to process multiple epochs of the same
+data.
+
+The simplest way to iterate over a dataset in multiple epochs is to use the
+`Dataset.repeat()` transformation. For example, to create a dataset that repeats
+its input for 10 epochs:
+
+```python
+filenames = ["/var/data/file1.tfrecord", "/var/data/file2.tfrecord"]
+dataset = tf.data.TFRecordDataset(filenames)
+dataset = dataset.map(...)
+dataset = dataset.repeat(10)
+dataset = dataset.batch(32)
+```
+
+Applying the `Dataset.repeat()` transformation with no arguments will repeat
+the input indefinitely. The `Dataset.repeat()` transformation concatenates its
+arguments without signaling the end of one epoch and the beginning of the next
+epoch.
+
+If you want to receive a signal at the end of each epoch, you can write a
+training loop that catches the `tf.errors.OutOfRangeError` at the end of a
+dataset. At that point you might collect some statistics (e.g. the validation
+error) for the epoch.
+
+```python
+filenames = ["/var/data/file1.tfrecord", "/var/data/file2.tfrecord"]
+dataset = tf.data.TFRecordDataset(filenames)
+dataset = dataset.map(...)
+dataset = dataset.batch(32)
+iterator = dataset.make_initializable_iterator()
+next_element = iterator.get_next()
+
+# Compute for 100 epochs.
+for _ in range(100):
+  sess.run(iterator.initializer)
+  while True:
+    try:
+      sess.run(next_element)
+    except tf.errors.OutOfRangeError:
+      break
+
+  # [Perform end-of-epoch calculations here.]
+```
+
+### Randomly shuffling input data
+
+The `Dataset.shuffle()` transformation randomly shuffles the input dataset
+using a similar algorithm to `tf.RandomShuffleQueue`: it maintains a fixed-size
+buffer and chooses the next element uniformly at random from that buffer.
+
+```python
+filenames = ["/var/data/file1.tfrecord", "/var/data/file2.tfrecord"]
+dataset = tf.data.TFRecordDataset(filenames)
+dataset = dataset.map(...)
+dataset = dataset.shuffle(buffer_size=10000)
+dataset = dataset.batch(32)
+dataset = dataset.repeat()
+```
+
+### Using high-level APIs
+
+The @{tf.train.MonitoredTrainingSession} API simplifies many aspects of running
+TensorFlow in a distributed setting. `MonitoredTrainingSession` uses the
+@{tf.errors.OutOfRangeError} to signal that training has completed, so to use it
+with the `tf.data` API, we recommend using
+`Dataset.make_one_shot_iterator()`. For example:
+
+```python
+filenames = ["/var/data/file1.tfrecord", "/var/data/file2.tfrecord"]
+dataset = tf.data.TFRecordDataset(filenames)
+dataset = dataset.map(...)
+dataset = dataset.shuffle(buffer_size=10000)
+dataset = dataset.batch(32)
+dataset = dataset.repeat(num_epochs)
+iterator = dataset.make_one_shot_iterator()
+
+next_example, next_label = iterator.get_next()
+loss = model_function(next_example, next_label)
+
+training_op = tf.train.AdagradOptimizer(...).minimize(loss)
+
+with tf.train.MonitoredTrainingSession(...) as sess:
+  while not sess.should_stop():
+    sess.run(training_op)
+```
+
+To use a `Dataset` in the `input_fn` of a @{tf.estimator.Estimator}, we also
+recommend using `Dataset.make_one_shot_iterator()`. For example:
+
+```python
+def dataset_input_fn():
+  filenames = ["/var/data/file1.tfrecord", "/var/data/file2.tfrecord"]
+  dataset = tf.data.TFRecordDataset(filenames)
+
+  # Use `tf.parse_single_example()` to extract data from a `tf.Example`
+  # protocol buffer, and perform any additional per-record preprocessing.
+  def parser(record):
+    keys_to_features = {
+        "image_data": tf.FixedLenFeature((), tf.string, default_value=""),
+        "date_time": tf.FixedLenFeature((), tf.int64, default_value=""),
+        "label": tf.FixedLenFeature((), tf.int64,
+                                    default_value=tf.zeros([], dtype=tf.int64)),
+    }
+    parsed = tf.parse_single_example(record, keys_to_features)
+
+    # Perform additional preprocessing on the parsed data.
+    image = tf.image.decode_jpeg(parsed["image_data"])
+    image = tf.reshape(image, [299, 299, 1])
+    label = tf.cast(parsed["label"], tf.int32)
+
+    return {"image_data": image, "date_time": parsed["date_time"]}, label
+
+  # Use `Dataset.map()` to build a pair of a feature dictionary and a label
+  # tensor for each example.
+  dataset = dataset.map(parser)
+  dataset = dataset.shuffle(buffer_size=10000)
+  dataset = dataset.batch(32)
+  dataset = dataset.repeat(num_epochs)
+  iterator = dataset.make_one_shot_iterator()
+
+  # `features` is a dictionary in which each value is a batch of values for
+  # that feature; `labels` is a batch of labels.
+  features, labels = iterator.get_next()
+  return features, labels
+```
diff --git a/tensorflow/docs_src/guide/datasets_for_estimators.md b/tensorflow/docs_src/guide/datasets_for_estimators.md
new file mode 100644
index 0000000000..b04af78cd8
--- /dev/null
+++ b/tensorflow/docs_src/guide/datasets_for_estimators.md
@@ -0,0 +1,387 @@
+# Datasets for Estimators
+
+The @{tf.data} module contains a collection of classes that allows you to
+easily load data, manipulate it, and pipe it into your model. This document
+introduces the API by walking through two simple examples:
+
+* Reading in-memory data from numpy arrays.
+* Reading lines from a csv file.
+
+<!-- TODO(markdaoust): Add links to an example reading from multiple-files
+(image_retraining), and a from_generator example. -->
+
+## Basic input
+
+Taking slices from an array is the simplest way to get started with `tf.data`.
+
+The @{$premade_estimators$Premade Estimators} chapter describes
+the following `train_input_fn`, from
+[`iris_data.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/iris_data.py),
+to pipe the data into the Estimator:
+
+``` python
+def train_input_fn(features, labels, batch_size):
+    """An input function for training"""
+    # Convert the inputs to a Dataset.
+    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
+
+    # Shuffle, repeat, and batch the examples.
+    dataset = dataset.shuffle(1000).repeat().batch(batch_size)
+
+    # Return the dataset.
+    return dataset
+```
+
+Let's look at this more closely.
+
+### Arguments
+
+This function expects three arguments. Arguments expecting an "array" can
+accept nearly anything that can be converted to an array with `numpy.array`.
+One exception is
+[`tuple`](https://docs.python.org/3/tutorial/datastructures.html#tuples-and-sequences)
+which, as we will see, has special meaning for `Datasets`.
+
+* `features`: A `{'feature_name':array}` dictionary (or
+  [`DataFrame`](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html))
+  containing the raw input features.
+* `labels` : An array containing the
+  [label](https://developers.google.com/machine-learning/glossary/#label)
+  for each example.
+* `batch_size` : An integer indicating the desired batch size.
+
+In [`premade_estimator.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/premade_estimator.py)
+we retrieved the Iris data using the `iris_data.load_data()` function.
+You can run it, and unpack the results as follows:
+
+``` python
+import iris_data
+
+# Fetch the data
+train, test = iris_data.load_data()
+features, labels = train
+```
+
+Then we passed this data to the input function, with a line similar to this:
+
+``` python
+batch_size=100
+iris_data.train_input_fn(features, labels, batch_size)
+```
+
+Let's walk through the `train_input_fn()`.
+
+### Slices
+
+The function starts by using the @{tf.data.Dataset.from_tensor_slices} function
+to create a @{tf.data.Dataset} representing slices of the array. The array is
+sliced across the first dimension. For example, an array containing the
+@{$tutorials/layers$mnist training data} has a shape of `(60000, 28, 28)`.
+Passing this to `from_tensor_slices` returns a `Dataset` object containing
+60000 slices, each one a 28x28 image.
+
+The code that returns this `Dataset` is as follows:
+
+``` python
+train, test = tf.keras.datasets.mnist.load_data()
+mnist_x, mnist_y = train
+
+mnist_ds = tf.data.Dataset.from_tensor_slices(mnist_x)
+print(mnist_ds)
+```
+
+This will print the following line, showing the
+@{$guide/tensors#shapes$shapes} and
+@{$guide/tensors#data_types$types} of the items in
+the dataset. Note that a `Dataset` does not know how many items it contains.
+
+``` None
+<TensorSliceDataset shapes: (28,28), types: tf.uint8>
+```
+
+The `Dataset` above represents a simple collection of arrays, but datasets are
+much more powerful than this. A `Dataset` can transparently handle any nested
+combination of dictionaries or tuples (or
+[`namedtuple`](https://docs.python.org/2/library/collections.html#collections.namedtuple)
+).
+
+For example after converting the iris `features`
+to a standard python dictionary, you can then convert the dictionary of arrays
+to a `Dataset` of dictionaries as follows:
+
+``` python
+dataset = tf.data.Dataset.from_tensor_slices(dict(features))
+print(dataset)
+```
+``` None
+<TensorSliceDataset
+
+  shapes: {
+    SepalLength: (), PetalWidth: (),
+    PetalLength: (), SepalWidth: ()},
+
+  types: {
+      SepalLength: tf.float64, PetalWidth: tf.float64,
+      PetalLength: tf.float64, SepalWidth: tf.float64}
+>
+```
+
+Here we see that when a `Dataset` contains structured elements, the `shapes`
+and `types` of the `Dataset` take on the same structure. This dataset contains
+dictionaries of @{$guide/tensors#rank$scalars}, all of type
+`tf.float64`.
+
+The first line of the iris `train_input_fn` uses the same functionality, but
+adds another level of structure. It creates a dataset containing
+`(features_dict, label)` pairs.
+
+The following code shows that the label is a scalar with type `int64`:
+
+``` python
+# Convert the inputs to a Dataset.
+dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
+print(dataset)
+```
+```
+<TensorSliceDataset
+    shapes: (
+        {
+          SepalLength: (), PetalWidth: (),
+          PetalLength: (), SepalWidth: ()},
+        ()),
+
+    types: (
+        {
+          SepalLength: tf.float64, PetalWidth: tf.float64,
+          PetalLength: tf.float64, SepalWidth: tf.float64},
+        tf.int64)>
+```
+
+### Manipulation
+
+Currently the `Dataset` would iterate over the data once, in a fixed order, and
+only produce a single element at a time. It needs further processing before it
+can be used for training. Fortunately, the `tf.data.Dataset` class provides
+methods to better prepare the data for training. The next line of the input
+function takes advantage of several of these methods:
+
+``` python
+# Shuffle, repeat, and batch the examples.
+dataset = dataset.shuffle(1000).repeat().batch(batch_size)
+```
+
+The @{tf.data.Dataset.shuffle$`shuffle`} method uses a fixed-size buffer to
+shuffle the items as they pass through. In this case the `buffer_size` is
+greater than the number of examples in the `Dataset`, ensuring that the data is
+completely shuffled (The Iris data set only contains 150 examples).
+
+The @{tf.data.Dataset.repeat$`repeat`} method restarts the `Dataset` when
+it reaches the end. To limit the number of epochs, set the `count` argument.
+
+The @{tf.data.Dataset.batch$`batch`} method collects a number of examples and
+stacks them, to create batches. This adds a dimension to their shape. The new
+dimension is added as the first dimension. The following code uses
+the `batch` method on the MNIST `Dataset`, from earlier. This results in a
+`Dataset` containing 3D arrays representing stacks of `(28,28)` images:
+
+``` python
+print(mnist_ds.batch(100))
+```
+
+``` none
+<BatchDataset
+  shapes: (?, 28, 28),
+  types: tf.uint8>
+```
+Note that the dataset has an unknown batch size because the last batch will
+have fewer elements.
+
+In `train_input_fn`, after batching the `Dataset` contains 1D vectors of
+elements where each scalar was previously:
+
+```python
+print(dataset)
+```
+```
+<TensorSliceDataset
+    shapes: (
+        {
+          SepalLength: (?,), PetalWidth: (?,),
+          PetalLength: (?,), SepalWidth: (?,)},
+        (?,)),
+
+    types: (
+        {
+          SepalLength: tf.float64, PetalWidth: tf.float64,
+          PetalLength: tf.float64, SepalWidth: tf.float64},
+        tf.int64)>
+```
+
+
+### Return
+
+At this point the `Dataset` contains `(features_dict, labels)` pairs.
+This is the format expected by the `train` and `evaluate` methods, so the
+`input_fn` returns the dataset.
+
+The `labels` can/should be omitted when using the `predict` method.
+
+<!--
+  TODO(markdaoust): link to `input_fn` doc when it exists
+-->
+
+
+## Reading a CSV File
+
+The most common real-world use case for the `Dataset` class is to stream data
+from files on disk. The @{tf.data} module includes a variety of
+file readers. Let's see how parsing the Iris dataset from the csv file looks
+using a `Dataset`.
+
+The following call to the `iris_data.maybe_download` function downloads the
+data if necessary, and returns the pathnames of the resulting files:
+
+``` python
+import iris_data
+train_path, test_path = iris_data.maybe_download()
+```
+
+The [`iris_data.csv_input_fn`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/iris_data.py)
+function contains an alternative implementation that parses the csv files using
+a `Dataset`.
+
+Let's look at how to build an Estimator-compatible input function that reads
+from the local files.
+
+### Build the `Dataset`
+
+We start by building a @{tf.data.TextLineDataset$`TextLineDataset`} object to
+read the file one line at a time. Then, we call the
+@{tf.data.Dataset.skip$`skip`} method to skip over the first line of the file, which contains a header, not an example:
+
+``` python
+ds = tf.data.TextLineDataset(train_path).skip(1)
+```
+
+### Build a csv line parser
+
+We will start by building a function to parse a single line.
+
+The following `iris_data.parse_line` function accomplishes this task using the
+@{tf.decode_csv} function, and some simple python code:
+
+We must parse each of the lines in the dataset in order to generate the
+necessary `(features, label)` pairs. The following `_parse_line` function
+calls @{tf.decode_csv} to parse a single line into its features
+and the label. Since Estimators require that features be represented as a
+dictionary, we rely on Python's built-in `dict` and `zip` functions to build
+that dictionary.  The feature names are the keys of that dictionary.
+We then call the dictionary's `pop` method to remove the label field from
+the features dictionary:
+
+``` python
+# Metadata describing the text columns
+COLUMNS = ['SepalLength', 'SepalWidth',
+           'PetalLength', 'PetalWidth',
+           'label']
+FIELD_DEFAULTS = [[0.0], [0.0], [0.0], [0.0], [0]]
+def _parse_line(line):
+    # Decode the line into its fields
+    fields = tf.decode_csv(line, FIELD_DEFAULTS)
+
+    # Pack the result into a dictionary
+    features = dict(zip(COLUMNS,fields))
+
+    # Separate the label from the features
+    label = features.pop('label')
+
+    return features, label
+```
+
+### Parse the lines
+
+Datasets have many methods for manipulating the data while it is being piped
+to a model. The most heavily-used method is @{tf.data.Dataset.map$`map`}, which
+applies a transformation to each element of the `Dataset`.
+
+The `map` method takes a `map_func` argument that describes how each item in the
+`Dataset` should be transformed.
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/datasets/map.png">
+</div>
+<div style="text-align: center">
+The @{tf.data.Dataset.map$`map`} method applies the `map_func` to
+transform each item in the <code>Dataset</code>.
+</div>
+
+So to parse the lines as they are streamed out of the csv file, we pass our
+`_parse_line` function to the `map` method:
+
+``` python
+ds = ds.map(_parse_line)
+print(ds)
+```
+``` None
+<MapDataset
+shapes: (
+    {SepalLength: (), PetalWidth: (), ...},
+    ()),
+types: (
+    {SepalLength: tf.float32, PetalWidth: tf.float32, ...},
+    tf.int32)>
+```
+
+Now instead of simple scalar strings, the dataset contains `(features, label)`
+pairs.
+
+the remainder of the `iris_data.csv_input_fn` function is identical
+to `iris_data.train_input_fn` which was covered in the in the
+[Basic input](#basic_input) section.
+
+### Try it out
+
+This function can be used as a replacement for
+`iris_data.train_input_fn`. It can be used to feed an estimator as follows:
+
+``` python
+train_path, test_path = iris_data.maybe_download()
+
+# All the inputs are numeric
+feature_columns = [
+    tf.feature_column.numeric_column(name)
+    for name in iris_data.CSV_COLUMN_NAMES[:-1]]
+
+# Build the estimator
+est = tf.estimator.LinearClassifier(feature_columns,
+                                    n_classes=3)
+# Train the estimator
+batch_size = 100
+est.train(
+    steps=1000,
+    input_fn=lambda : iris_data.csv_input_fn(train_path, batch_size))
+```
+
+Estimators expect an `input_fn` to take no arguments. To work around this
+restriction, we use `lambda` to capture the arguments and provide the expected
+interface.
+
+## Summary
+
+The `tf.data` module provides a collection of classes and functions for easily
+reading data from a variety of sources. Furthermore, `tf.data` has simple
+powerful methods for applying a wide variety of standard and custom
+transformations.
+
+Now you have the basic idea of how to efficiently load data into an
+Estimator. Consider the following documents next:
+
+
+* @{$custom_estimators}, which demonstrates how to build your own
+  custom `Estimator` model.
+* The @{$low_level_intro#datasets$Low Level Introduction}, which demonstrates
+  how to experiment directly with `tf.data.Datasets` using TensorFlow's low
+  level APIs.
+* @{$guide/datasets} which goes into great detail about additional
+  functionality of `Datasets`.
+
diff --git a/tensorflow/docs_src/guide/debugger.md b/tensorflow/docs_src/guide/debugger.md
new file mode 100644
index 0000000000..6bd941886d
--- /dev/null
+++ b/tensorflow/docs_src/guide/debugger.md
@@ -0,0 +1,804 @@
+# TensorFlow Debugger
+
+<!-- [comment]: TODO(barryr): Links to and from sections on "Graphs" & "Monitoring Learning". -->
+
+[TOC]
+
+`tfdbg` is a specialized debugger for TensorFlow. It lets you view the internal
+structure and states of running TensorFlow graphs during training and inference,
+which is difficult to debug with general-purpose debuggers such as Python's `pdb`
+due to TensorFlow's computation-graph paradigm.
+
+This guide focuses on the command-line interface (CLI) of `tfdbg`. For guide on
+how to use the graphical user interface (GUI) of tfdbg, i.e., the
+**TensorBoard Debugger Plugin**, please visit
+[its README](https://github.com/tensorflow/tensorboard/blob/master/tensorboard/plugins/debugger/README.md).
+
+Note: The TensorFlow debugger uses a
+[curses](https://en.wikipedia.org/wiki/Curses_\(programming_library\))-based text
+user interface. On Mac OS X, the `ncurses` library is required and can be
+installed with `brew install homebrew/dupes/ncurses`. On Windows, curses isn't as
+well supported, so a [readline](https://en.wikipedia.org/wiki/GNU_Readline)-based
+interface can be used with tfdbg by installing `pyreadline` with `pip`. If you
+use Anaconda3, you can install it with a command such as
+`"C:\Program Files\Anaconda3\Scripts\pip.exe" install pyreadline`. Unofficial
+Windows curses packages can be downloaded
+[here](https://www.lfd.uci.edu/~gohlke/pythonlibs/#curses), then subsequently
+installed using `pip install <your_version>.whl`, however curses on Windows may
+not work as reliably as curses on Linux or Mac.
+
+This tutorial demonstrates how to use the **tfdbg** CLI to debug the appearance
+of [`nan`s](https://en.wikipedia.org/wiki/NaN)
+and [`inf`s](https://en.wikipedia.org/wiki/Infinity), a frequently-encountered
+type of bug in TensorFlow model development.
+The following example is for users who use the low-level
+[`Session`](https://www.tensorflow.org/api_docs/python/tf/Session) API of
+TensorFlow. A later section of this document describes how to use **tfdbg**
+with a higher-level API, namely `Estimator`s.
+To *observe* such an issue, run the following command without the debugger (the
+source code can be found
+[here](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/debug/examples/debug_mnist.py)):
+
+```none
+python -m tensorflow.python.debug.examples.debug_mnist
+```
+
+This code trains a simple neural network for MNIST digit image recognition.
+Notice that the accuracy increases slightly after the first training step, but
+then gets stuck at a low (near-chance) level:
+
+```none
+Accuracy at step 0: 0.1113
+Accuracy at step 1: 0.3183
+Accuracy at step 2: 0.098
+Accuracy at step 3: 0.098
+Accuracy at step 4: 0.098
+```
+
+Wondering what might have gone wrong, you suspect that certain nodes in the
+training graph generated bad numeric values such as `inf`s and `nan`s, because
+this is a common cause of this type of training failure.
+Let's use tfdbg to debug this issue and pinpoint the exact graph node where this
+numeric problem first surfaced.
+
+## Wrapping TensorFlow Sessions with tfdbg
+
+To add support for tfdbg in our example, all that is needed is to add the
+following lines of code and wrap the Session object with a debugger wrapper.
+This code is already added in
+[debug_mnist.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/debug/examples/debug_mnist.py),
+so you can activate tfdbg CLI with the `--debug` flag at the command line.
+
+```python
+# Let your BUILD target depend on "//tensorflow/python/debug:debug_py"
+# (You don't need to worry about the BUILD dependency if you are using a pip
+#  install of open-source TensorFlow.)
+from tensorflow.python import debug as tf_debug
+
+sess = tf_debug.LocalCLIDebugWrapperSession(sess)
+```
+
+This wrapper has the same interface as Session, so enabling debugging requires
+no other changes to the code. The wrapper provides additional features,
+including:
+
+* Bringing up a CLI before and after `Session.run()` calls, to let you
+control the execution and inspect the graph's internal state.
+* Allowing you to register special `filters` for tensor values, to facilitate
+the diagnosis of issues.
+
+In this example, we have already registered a tensor filter called
+@{tfdbg.has_inf_or_nan},
+which simply determines if there are any `nan` or `inf` values in any
+intermediate tensors (tensors that are neither inputs or outputs of the
+`Session.run()` call, but are in the path leading from the inputs to the
+outputs). This filter is for `nan`s and `inf`s is a common enough use case that
+we ship it with the
+@{$python/tfdbg#Classes_for_debug_dump_data_and_directories$`debug_data`}
+module.
+
+Note: You can also write your own custom filters. See
+the @{tfdbg.DebugDumpDir.find$API documentation}
+of `DebugDumpDir.find()` for additional information.
+
+## Debugging Model Training with tfdbg
+
+
+Let's try training the model again, but with the `--debug` flag added this time:
+
+```none
+python -m tensorflow.python.debug.examples.debug_mnist --debug
+```
+
+The debug wrapper session will prompt you when it is about to execute the first
+`Session.run()` call, with information regarding the fetched tensor and feed
+dictionaries displayed on the screen.
+
+![tfdbg run-start UI](https://www.tensorflow.org/images/tfdbg_screenshot_run_start.png)
+
+This is what we refer to as the *run-start CLI*. It lists the feeds and fetches
+to the current `Session.run` call, before executing anything.
+
+If the screen size is too small to display the content of the message in its
+entirety, you can resize it.
+
+Use the **PageUp** / **PageDown** / **Home** / **End** keys to navigate the
+screen output. On most keyboards lacking those keys **Fn + Up** /
+**Fn + Down** / **Fn + Right** / **Fn + Left** will work.
+
+Enter the `run` command (or just `r`) at the command prompt:
+
+```
+tfdbg> run
+```
+
+The `run` command causes tfdbg to execute until the end of the next
+`Session.run()` call, which calculates the model's accuracy using a test data
+set. tfdbg augments the runtime Graph to dump all intermediate tensors.
+After the run ends, tfdbg displays all the dumped tensors values in the
+*run-end CLI*. For example:
+
+![tfdbg run-end UI: accuracy](https://www.tensorflow.org/images/tfdbg_screenshot_run_end_accuracy.png)
+
+This list of tensors can also be obtained by running the command `lt` after you
+executed `run`.
+
+### tfdbg CLI Frequently-Used Commands
+
+Try the following commands at the `tfdbg>` prompt (referencing the code at
+`tensorflow/python/debug/examples/debug_mnist.py`):
+
+| Command            | Syntax or Option | Explanation  | Example                   |
+|:-------------------|:---------------- |:------------ |:------------------------- |
+| **`lt`** | | **List dumped tensors.** | `lt` |
+| | `-n <name_pattern>` | List dumped tensors with names matching given regular-expression pattern. | `lt -n Softmax.*` |
+| | `-t <op_pattern>` | List dumped tensors with op types matching given regular-expression pattern. | `lt -t MatMul` |
+| | `-f <filter_name>` | List only the tensors that pass a registered tensor filter. | `lt -f has_inf_or_nan` |
+| | `-f <filter_name> -fenn <regex>` | List only the tensors that pass a registered tensor filter, excluding nodes with names matching the regular expression. | `lt -f has_inf_or_nan` `-fenn .*Sqrt.*` |
+| | `-s <sort_key>` | Sort the output by given `sort_key`, whose possible values are `timestamp` (default), `dump_size`, `op_type` and `tensor_name`. | `lt -s dump_size` |
+| | `-r` | Sort in reverse order. | `lt -r -s dump_size` |
+| **`pt`** | | **Print value of a dumped tensor.** | |
+| | `pt <tensor>` | Print tensor value. | `pt hidden/Relu:0` |
+| | `pt <tensor>[slicing]` | Print a subarray of tensor, using [numpy](http://www.numpy.org/)-style array slicing. | `pt hidden/Relu:0[0:50,:]` |
+| | `-a` | Print the entirety of a large tensor, without using ellipses. (May take a long time for large tensors.) | `pt -a hidden/Relu:0[0:50,:]` |
+| | `-r <range>` | Highlight elements falling into specified numerical range. Multiple ranges can be used in conjunction. | `pt hidden/Relu:0 -a -r [[-inf,-1],[1,inf]]` |
+| | `-n <number>` | Print dump corresponding to specified 0-based dump number. Required for tensors with multiple dumps. | `pt -n 0 hidden/Relu:0` |
+| | `-s` | Include a summary of the numeric values of the tensor (applicable only to non-empty tensors with Boolean and numeric types such as `int*` and `float*`.) | `pt -s hidden/Relu:0[0:50,:]` |
+| | `-w` | Write the value of the tensor (possibly sliced) to a Numpy file using [`numpy.save()`](https://docs.scipy.org/doc/numpy-1.13.0/reference/generated/numpy.save.html) | `pt -s hidden/Relu:0 -w /tmp/relu.npy` |
+| **`@[coordinates]`** | | Navigate to specified element in `pt` output. | `@[10,0]` or `@10,0` |
+| **`/regex`** | |  [less](https://linux.die.net/man/1/less)-style search for given regular expression. | `/inf` |
+| **`/`** | | Scroll to the next line with matches to the searched regex (if any). | `/` |
+| **`pf`** | | **Print a value in the feed_dict to `Session.run`.** | |
+| | `pf <feed_tensor_name>` | Print the value of the feed. Also note that the `pf` command has the `-a`, `-r` and `-s` flags (not listed below), which have the same syntax and semantics as the identically-named flags of `pt`. | `pf input_xs:0` |
+| **eval** | | **Evaluate arbitrary Python and numpy expression.** | |
+| | `eval <expression>` | Evaluate a Python / numpy expression, with numpy available as `np` and debug tensor names enclosed in backticks. | ``eval "np.matmul((`output/Identity:0` / `Softmax:0`).T, `Softmax:0`)"`` |
+| | `-a` | Print a large-sized evaluation result in its entirety, i.e., without using ellipses. | ``eval -a 'np.sum(`Softmax:0`, axis=1)'`` |
+| | `-w` | Write the result of the evaluation to a Numpy file using [`numpy.save()`](https://docs.scipy.org/doc/numpy-1.13.0/reference/generated/numpy.save.html) | ``eval -a 'np.sum(`Softmax:0`, axis=1)' -w /tmp/softmax_sum.npy`` |
+| **`ni`** | | **Display node information.** | |
+| | `-a` | Include node attributes in the output. | `ni -a hidden/Relu` |
+| | `-d` | List the debug dumps available from the node. | `ni -d hidden/Relu` |
+| | `-t` | Display the Python stack trace of the node's creation. | `ni -t hidden/Relu` |
+| **`li`** | | **List inputs to node** | |
+| | `-r` | List the inputs to node, recursively (the input tree.) | `li -r hidden/Relu:0` |
+| | `-d <max_depth>` | Limit recursion depth under the `-r` mode. | `li -r -d 3 hidden/Relu:0` |
+| | `-c` | Include control inputs. | `li -c -r hidden/Relu:0` |
+| | `-t` | Show op types of input nodes. | `li -t -r hidden/Relu:0` |
+| **`lo`** | | **List output recipients of node** | |
+| | `-r` | List the output recipients of node, recursively (the output tree.) | `lo -r hidden/Relu:0` |
+| | `-d <max_depth>` | Limit recursion depth under the `-r` mode. | `lo -r -d 3 hidden/Relu:0` |
+| | `-c` | Include recipients via control edges. | `lo -c -r hidden/Relu:0` |
+| | `-t` | Show op types of recipient nodes. | `lo -t -r hidden/Relu:0` |
+| **`ls`** | | **List Python source files involved in node creation.** | |
+| | `-p <path_pattern>` | Limit output to source files matching given regular-expression path pattern. | `ls -p .*debug_mnist.*` |
+| | `-n` | Limit output to node names matching given regular-expression pattern. | `ls -n Softmax.*` |
+| **`ps`** | | **Print Python source file.** | |
+| | `ps <file_path>` | Print given Python source file source.py, with the lines annotated with the nodes created at each of them (if any). | `ps /path/to/source.py` |
+| | `-t` | Perform annotation with respect to Tensors, instead of the default, nodes. | `ps -t /path/to/source.py` |
+| | `-b <line_number>` | Annotate source.py beginning at given line. | `ps -b 30 /path/to/source.py` |
+| | `-m <max_elements>` | Limit the number of elements in the annotation for each line. | `ps -m 100 /path/to/source.py` |
+| **`run`** | | **Proceed to the next Session.run()** | `run` |
+| | `-n` | Execute through the next `Session.run` without debugging, and drop to CLI right before the run after that. | `run -n` |
+| | `-t <T>` | Execute `Session.run` `T - 1` times without debugging, followed by a run with debugging. Then drop to CLI right after the debugged run. | `run -t 10` |
+| | `-f <filter_name>` | Continue executing `Session.run` until any intermediate tensor triggers the specified Tensor filter (causes the filter to return `True`). | `run -f has_inf_or_nan` |
+| | `-f <filter_name> -fenn <regex>` | Continue executing `Session.run` until any intermediate tensor whose node names doesn't match the regular expression triggers the specified Tensor filter (causes the filter to return `True`). | `run -f has_inf_or_nan -fenn .*Sqrt.*` |
+| | `--node_name_filter <pattern>` | Execute the next `Session.run`, watching only nodes with names matching the given regular-expression pattern. | `run --node_name_filter Softmax.*` |
+| | `--op_type_filter <pattern>` | Execute the next `Session.run`, watching only nodes with op types matching the given regular-expression pattern. | `run --op_type_filter Variable.*` |
+| | `--tensor_dtype_filter <pattern>` | Execute the next `Session.run`, dumping only Tensors with data types (`dtype`s) matching the given regular-expression pattern. | `run --tensor_dtype_filter int.*` |
+| | `-p` | Execute the next `Session.run` call in profiling mode. | `run -p` |
+| **`ri`** | | **Display information about the run the current run, including fetches and feeds.** | `ri` |
+| **`config`** | | **Set or show persistent TFDBG UI configuration.** | |
+| | `set` | Set the value of a config item: {`graph_recursion_depth`, `mouse_mode`}. | `config set graph_recursion_depth 3` |
+| | `show` | Show current persistent UI configuration. | `config show` |
+| **`help`** | | **Print general help information** | `help` |
+| | `help <command>` | Print help for given command. | `help lt` |
+
+Note that each time you enter a command, a new screen output
+will appear. This is somewhat analogous to web pages in a browser. You can
+navigate between these screens by clicking the `<--` and
+`-->` text arrows near the top-left corner of the CLI.
+
+### Other Features of the tfdbg CLI
+
+In addition to the commands listed above, the tfdbg CLI provides the following
+additional features:
+
+*   To navigate through previous tfdbg commands, type in a few characters
+    followed by the Up or Down arrow keys. tfdbg will show you the history of
+    commands that started with those characters.
+*   To navigate through the history of screen outputs, do either of the
+    following:
+    * Use the `prev` and `next` commands.
+    * Click underlined `<--` and `-->` links near the top left corner of the
+      screen.
+*   Tab completion of commands and some command arguments.
+*   To redirect the screen output to a file instead of the screen, end the
+    command with bash-style redirection. For example, the following command
+    redirects the output of the pt command to the `/tmp/xent_value_slices.txt`
+    file:
+
+  ```none
+  tfdbg> pt cross_entropy/Log:0[:, 0:10] > /tmp/xent_value_slices.txt
+  ```
+
+### Finding `nan`s and `inf`s
+
+In this first `Session.run()` call, there happen to be no problematic numerical
+values. You can move on to the next run by using the command `run` or its
+shorthand `r`.
+
+> TIP: If you enter `run` or `r` repeatedly, you will be able to move through
+> the `Session.run()` calls in a sequential manner.
+>
+> You can also use the `-t` flag to move ahead a number of `Session.run()` calls
+> at a time, for example:
+>
+> ```
+> tfdbg> run -t 10
+> ```
+
+Instead of entering `run` repeatedly and manually searching for `nan`s and
+`inf`s in the run-end UI after every `Session.run()` call (for example, by using
+the `pt` command shown in the table above) , you can use the following
+command to let the debugger repeatedly execute `Session.run()` calls without
+stopping at the run-start or run-end prompt, until the first `nan` or `inf`
+value shows up in the graph. This is analogous to *conditional breakpoints* in
+some procedural-language debuggers:
+
+```none
+tfdbg> run -f has_inf_or_nan
+```
+
+> NOTE: The preceding command works properly because a tensor filter called
+> `has_inf_or_nan` has been registered for you when the wrapped session is
+> created. This filter detects `nan`s and `inf`s (as explained previously).
+> If you have registered any other filters, you can
+> use "run -f" to have tfdbg run until any tensor triggers that filter (cause
+> the filter to return True).
+>
+> ``` python
+> def my_filter_callable(datum, tensor):
+>   # A filter that detects zero-valued scalars.
+>   return len(tensor.shape) == 0 and tensor == 0.0
+>
+> sess.add_tensor_filter('my_filter', my_filter_callable)
+> ```
+>
+> Then at the tfdbg run-start prompt run until your filter is triggered:
+>
+> ```
+> tfdbg> run -f my_filter
+> ```
+
+See [this API document](https://www.tensorflow.org/api_docs/python/tfdbg/DebugDumpDir#find)
+for more information on the expected signature and return value of the predicate
+`Callable` used with `add_tensor_filter()`.
+
+![tfdbg run-end UI: infs and nans](https://www.tensorflow.org/images/tfdbg_screenshot_run_end_inf_nan.png)
+
+As the screen display indicates on the first line, the `has_inf_or_nan` filter is first triggered
+during the fourth `Session.run()` call: an
+[Adam optimizer](https://www.tensorflow.org/api_docs/python/tf/train/AdamOptimizer)
+forward-backward training pass on the graph. In this run, 36 (out of the total
+95) intermediate tensors contain `nan` or `inf` values. These tensors are listed
+in chronological order, with their timestamps displayed on the left. At the top
+of the list, you can see the first tensor in which the bad numerical values
+first surfaced: `cross_entropy/Log:0`.
+
+To view the value of the tensor, click the underlined tensor name
+`cross_entropy/Log:0` or enter the equivalent command:
+
+```none
+tfdbg> pt cross_entropy/Log:0
+```
+
+Scroll down a little and you will notice some scattered `inf` values. If the
+instances of `inf` and `nan` are difficult to spot by eye, you can use the
+following command to perform a regex search and highlight the output:
+
+```none
+tfdbg> /inf
+```
+
+Or, alternatively:
+
+```none
+tfdbg> /(inf|nan)
+```
+
+You can also use the `-s` or `--numeric_summary` command to get a quick summary
+of the types of numeric values in the tensor:
+
+``` none
+tfdbg> pt -s cross_entropy/Log:0
+```
+
+From the summary, you can see that several of the 1000 elements of the
+`cross_entropy/Log:0` tensor are `-inf`s (negative infinities).
+
+Why did these infinities appear? To further debug, display more information
+about the node `cross_entropy/Log` by clicking the underlined `node_info` menu
+item on the top or entering the equivalent node_info (`ni`) command:
+
+```none
+tfdbg> ni cross_entropy/Log
+```
+
+![tfdbg run-end UI: infs and nans](https://www.tensorflow.org/images/tfdbg_screenshot_run_end_node_info.png)
+
+You can see that this node has the op type `Log`
+and that its input is the node `Softmax`. Run the following command to
+take a closer look at the input tensor:
+
+```none
+tfdbg> pt Softmax:0
+```
+
+Examine the values in the input tensor, searching for zeros:
+
+```none
+tfdbg> /0\.000
+```
+
+Indeed, there are zeros. Now it is clear that the origin of the bad numerical
+values is the node `cross_entropy/Log` taking logs of zeros. To find out the
+culprit line in the Python source code, use the `-t` flag of the `ni` command
+to show the traceback of the node's construction:
+
+```none
+tfdbg> ni -t cross_entropy/Log
+```
+
+If you click "node_info" at the top of the screen, tfdbg automatically shows the
+traceback of the node's construction.
+
+From the traceback, you can see that the op is constructed at the following
+line:
+[`debug_mnist.py`](https://www.tensorflow.org/code/tensorflow/python/debug/examples/debug_mnist.py):
+
+```python
+diff = y_ * tf.log(y)
+```
+
+**tfdbg** has a feature that makes it easy to trace Tensors and ops back to
+lines in Python source files. It can annotate lines of a Python file with
+the ops or Tensors created by them. To use this feature,
+simply click the underlined line numbers in the stack trace output of the
+`ni -t <op_name>` commands, or use the `ps` (or `print_source`) command such as:
+`ps /path/to/source.py`. For example, the following screenshot shows the output
+of a `ps` command.
+
+![tfdbg run-end UI: annotated Python source file](https://www.tensorflow.org/images/tfdbg_screenshot_run_end_annotated_source.png)
+
+### Fixing the problem
+
+To fix the problem, edit `debug_mnist.py`, changing the original line:
+
+```python
+diff = -(y_ * tf.log(y))
+```
+
+to the built-in, numerically-stable implementation of softmax cross-entropy:
+
+```python
+diff = tf.losses.softmax_cross_entropy(labels=y_, logits=logits)
+```
+
+Rerun with the `--debug` flag as follows:
+
+```none
+python -m tensorflow.python.debug.examples.debug_mnist --debug
+```
+
+At the `tfdbg>` prompt, enter the following command:
+
+```none
+run -f has_inf_or_nan`
+```
+
+Confirm that no tensors are flagged as containing `nan` or `inf` values, and
+accuracy now continues to rise rather than getting stuck. Success!
+
+## Debugging TensorFlow Estimators
+
+This section explains how to debug TensorFlow programs that use the `Estimator`
+APIs. Part of the convenience provided by these APIs is that
+they manage `Session`s internally. This makes the `LocalCLIDebugWrapperSession`
+described in the preceding sections inapplicable. Fortunately, you can still
+debug them by using special `hook`s provided by `tfdbg`.
+
+`tfdbg` can debug the
+@{tf.estimator.Estimator.train$`train()`},
+@{tf.estimator.Estimator.evaluate$`evaluate()`} and
+@{tf.estimator.Estimator.predict$`predict()`}
+methods of tf-learn `Estimator`s. To debug `Estimator.train()`,
+create a `LocalCLIDebugHook` and supply it in the `hooks` argument. For example:
+
+```python
+# First, let your BUILD target depend on "//tensorflow/python/debug:debug_py"
+# (You don't need to worry about the BUILD dependency if you are using a pip
+#  install of open-source TensorFlow.)
+from tensorflow.python import debug as tf_debug
+
+# Create a LocalCLIDebugHook and use it as a monitor when calling fit().
+hooks = [tf_debug.LocalCLIDebugHook()]
+
+# To debug `train`:
+classifier.train(input_fn,
+                 steps=1000,
+                 hooks=hooks)
+```
+
+Similarly, to debug `Estimator.evaluate()` and `Estimator.predict()`, assign
+hooks to the `hooks` parameter, as in the following example:
+
+```python
+# To debug `evaluate`:
+accuracy_score = classifier.evaluate(eval_input_fn,
+                                     hooks=hooks)["accuracy"]
+
+# To debug `predict`:
+predict_results = classifier.predict(predict_input_fn, hooks=hooks)
+```
+
+[debug_tflearn_iris.py](https://www.tensorflow.org/code/tensorflow/python/debug/examples/debug_tflearn_iris.py),
+based on [tf-learn's iris tutorial](https://www.tensorflow.org/versions/r1.8/get_started/tflearn),
+contains a full example of how to use the tfdbg with `Estimator`s.
+To run this example, do:
+
+```none
+python -m tensorflow.python.debug.examples.debug_tflearn_iris --debug
+```
+
+The `LocalCLIDebugHook` also allows you to configure a `watch_fn` that can be
+used to flexibly specify what `Tensor`s to watch on different `Session.run()`
+calls, as a function of the `fetches` and `feed_dict` and other states. See
+@{tfdbg.DumpingDebugWrapperSession.__init__$this API doc}
+for more details.
+
+## Debugging Keras Models with TFDBG
+
+To use TFDBG with [Keras](https://keras.io/), let the Keras backend use
+a TFDBG-wrapped Session object. For example, to use the CLI wrapper:
+
+``` python
+import tensorflow as tf
+from keras import backend as keras_backend
+from tensorflow.python import debug as tf_debug
+
+keras_backend.set_session(tf_debug.LocalCLIDebugWrapperSession(tf.Session()))
+
+# Define your keras model, called "model".
+model.fit(...)  # This will break into the TFDBG CLI.
+```
+
+## Debugging tf-slim with TFDBG
+
+TFDBG supports debugging of training and evaluation with
+[tf-slim](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim).
+As detailed below, training and evaluation require slightly different debugging
+workflows.
+
+### Debugging training in tf-slim
+To debug the training process, provide `LocalCLIDebugWrapperSession` to the
+`session_wrapper` argument of `slim.learning.train()`. For example:
+
+``` python
+import tensorflow as tf
+from tensorflow.python import debug as tf_debug
+
+# ... Code that creates the graph and the train_op ...
+tf.contrib.slim.learning.train(
+    train_op,
+    logdir,
+    number_of_steps=10,
+    session_wrapper=tf_debug.LocalCLIDebugWrapperSession)
+```
+
+### Debugging evaluation in tf-slim
+To debug the evaluation process, provide `LocalCLIDebugHook` to the
+`hooks` argument of `slim.evaluation.evaluate_once()`. For example:
+
+``` python
+import tensorflow as tf
+from tensorflow.python import debug as tf_debug
+
+# ... Code that creates the graph and the eval and final ops ...
+tf.contrib.slim.evaluation.evaluate_once(
+    '',
+    checkpoint_path,
+    logdir,
+    eval_op=my_eval_op,
+    final_op=my_value_op,
+    hooks=[tf_debug.LocalCLIDebugHook()])
+```
+
+## Offline Debugging of Remotely-Running Sessions
+
+Often, your model is running on a remote machine or a process that you don't
+have terminal access to. To perform model debugging in such cases, you can use
+the `offline_analyzer` binary of `tfdbg` (described below). It operates on
+dumped data directories. This can be done to both the lower-level `Session` API
+and the higher-level `Estimator` API.
+
+### Debugging Remote tf.Sessions
+
+If you interact directly with the `tf.Session` API in `python`, you can
+configure the `RunOptions` proto that you call your `Session.run()` method
+with, by using the method @{tfdbg.watch_graph}.
+This will cause the intermediate tensors and runtime graphs to be dumped to a
+shared storage location of your choice when the `Session.run()` call occurs
+(at the cost of slower performance). For example:
+
+```python
+from tensorflow.python import debug as tf_debug
+
+# ... Code where your session and graph are set up...
+
+run_options = tf.RunOptions()
+tf_debug.watch_graph(
+      run_options,
+      session.graph,
+      debug_urls=["file:///shared/storage/location/tfdbg_dumps_1"])
+# Be sure to specify different directories for different run() calls.
+
+session.run(fetches, feed_dict=feeds, options=run_options)
+```
+
+Later, in an environment that you have terminal access to (for example, a local
+computer that can access the shared storage location specified in the code
+above), you can load and inspect the data in the dump directory on the shared
+storage by using the `offline_analyzer` binary of `tfdbg`. For example:
+
+```none
+python -m tensorflow.python.debug.cli.offline_analyzer \
+    --dump_dir=/shared/storage/location/tfdbg_dumps_1
+```
+
+The `Session` wrapper `DumpingDebugWrapperSession` offers an easier and more
+flexible way to generate file-system dumps that can be analyzed offline.
+To use it, simply wrap your session in a `tf_debug.DumpingDebugWrapperSession`.
+For example:
+
+```python
+# Let your BUILD target depend on "//tensorflow/python/debug:debug_py
+# (You don't need to worry about the BUILD dependency if you are using a pip
+#  install of open-source TensorFlow.)
+from tensorflow.python import debug as tf_debug
+
+sess = tf_debug.DumpingDebugWrapperSession(
+    sess, "/shared/storage/location/tfdbg_dumps_1/", watch_fn=my_watch_fn)
+```
+
+The `watch_fn` argument accepts a `Callable` that allows you to configure what
+`tensor`s to watch on different `Session.run()` calls, as a function of the
+`fetches` and `feed_dict` to the `run()` call and other states.
+
+### C++ and other languages
+
+If your model code is written in C++ or other languages, you can also
+modify the `debug_options` field of `RunOptions` to generate debug dumps that
+can be inspected offline. See
+[the proto definition](https://www.tensorflow.org/code/tensorflow/core/protobuf/debug.proto)
+for more details.
+
+### Debugging Remotely-Running Estimators
+
+If your remote TensorFlow server runs `Estimator`s,
+you can use the non-interactive `DumpingDebugHook`. For example:
+
+```python
+# Let your BUILD target depend on "//tensorflow/python/debug:debug_py
+# (You don't need to worry about the BUILD dependency if you are using a pip
+#  install of open-source TensorFlow.)
+from tensorflow.python import debug as tf_debug
+
+hooks = [tf_debug.DumpingDebugHook("/shared/storage/location/tfdbg_dumps_1")]
+```
+
+Then this `hook` can be used in the same way as the `LocalCLIDebugHook` examples
+described earlier in this document.
+As the training, evalution or prediction happens with `Estimator`,
+tfdbg creates directories having the following name pattern:
+`/shared/storage/location/tfdbg_dumps_1/run_<epoch_timestamp_microsec>_<uuid>`.
+Each directory corresponds to a `Session.run()` call that underlies
+the `fit()` or `evaluate()` call. You can load these directories and inspect
+them in a command-line interface in an offline manner using the
+`offline_analyzer` offered by tfdbg. For example:
+
+```bash
+python -m tensorflow.python.debug.cli.offline_analyzer \
+    --dump_dir="/shared/storage/location/tfdbg_dumps_1/run_<epoch_timestamp_microsec>_<uuid>"
+```
+
+## Frequently Asked Questions
+
+**Q**: _Do the timestamps on the left side of the `lt` output reflect actual
+       performance in a non-debugging session?_
+
+**A**: No. The debugger inserts additional special-purpose debug nodes to the
+       graph to record the values of intermediate tensors. These nodes
+       slow down the graph execution. If you are interested in profiling your
+       model, check out
+
+   1. The profiling mode of tfdbg: `tfdbg> run -p`.
+   2. [tfprof](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/profiler)
+      and other profiling tools for TensorFlow.
+
+**Q**: _How do I link tfdbg against my `Session` in Bazel? Why do I see an
+       error such as "ImportError: cannot import name debug"?_
+
+**A**: In your BUILD rule, declare dependencies:
+       `"//tensorflow:tensorflow_py"` and `"//tensorflow/python/debug:debug_py"`.
+       The first is the dependency that you include to use TensorFlow even
+       without debugger support; the second enables the debugger.
+       Then, In your Python file, add:
+
+```python
+from tensorflow.python import debug as tf_debug
+
+# Then wrap your TensorFlow Session with the local-CLI wrapper.
+sess = tf_debug.LocalCLIDebugWrapperSession(sess)
+```
+
+**Q**: _Does tfdbg help debug runtime errors such as shape mismatches?_
+
+**A**: Yes. tfdbg intercepts errors generated by ops during runtime and presents
+       the errors with some debug instructions to the user in the CLI.
+       See examples:
+
+```none
+# Debugging shape mismatch during matrix multiplication.
+python -m tensorflow.python.debug.examples.debug_errors \
+    --error shape_mismatch --debug
+
+# Debugging uninitialized variable.
+python -m tensorflow.python.debug.examples.debug_errors \
+    --error uninitialized_variable --debug
+```
+
+**Q**: _How can I let my tfdbg-wrapped Sessions or Hooks run the debug mode
+only from the main thread?_
+
+**A**:
+This is a common use case, in which the `Session` object is used from multiple
+threads concurrently. Typically, the child threads take care of background tasks
+such as running enqueue operations. Often, you want to debug only the main
+thread (or less frequently, only one of the child threads). You can use the
+`thread_name_filter` keyword argument of `LocalCLIDebugWrapperSession` to
+achieve this type of thread-selective debugging. For example, to debug from the
+main thread only, construct a wrapped `Session` as follows:
+
+```python
+sess = tf_debug.LocalCLIDebugWrapperSession(sess, thread_name_filter="MainThread$")
+```
+
+The above example relies on the fact that main threads in Python have the
+default name `MainThread`.
+
+**Q**: _The model I am debugging is very large. The data dumped by tfdbg
+fills up the free space of my disk. What can I do?_
+
+**A**:
+You might encounter this problem in any of the following situations:
+
+*   models with many intermediate tensors
+*   very large intermediate tensors
+*   many @{tf.while_loop} iterations
+
+There are three possible workarounds or solutions:
+
+*  The constructors of `LocalCLIDebugWrapperSession` and `LocalCLIDebugHook`
+   provide a keyword argument, `dump_root`, to specify the path
+   to which tfdbg dumps the debug data. You can use it to let tfdbg dump the
+   debug data on a disk with larger free space. For example:
+
+```python
+# For LocalCLIDebugWrapperSession
+sess = tf_debug.LocalCLIDebugWrapperSession(dump_root="/with/lots/of/space")
+
+# For LocalCLIDebugHook
+hooks = [tf_debug.LocalCLIDebugHook(dump_root="/with/lots/of/space")]
+```
+   Make sure that the directory pointed to by dump_root is empty or nonexistent.
+   `tfdbg` cleans up the dump directories before exiting.
+
+*  Reduce the batch size used during the runs.
+*  Use the filtering options of tfdbg's `run` command to watch only specific
+   nodes in the graph. For example:
+
+   ```
+   tfdbg> run --node_name_filter .*hidden.*
+   tfdbg> run --op_type_filter Variable.*
+   tfdbg> run --tensor_dtype_filter int.*
+   ```
+
+   The first command above watches only nodes whose name match the
+   regular-expression pattern `.*hidden.*`. The second command watches only
+   operations whose name match the pattern `Variable.*`. The third one watches
+   only the tensors whose dtype match the pattern `int.*` (e.g., `int32`).
+
+
+**Q**: _Why can't I select text in the tfdbg CLI?_
+
+**A**: This is because the tfdbg CLI enables mouse events in the terminal by
+       default. This [mouse-mask](https://linux.die.net/man/3/mousemask) mode
+       overrides default terminal interactions, including text selection. You
+       can re-enable text selection by using the command `mouse off` or
+       `m off`.
+
+**Q**: _Why does the tfdbg CLI show no dumped tensors when I debug code like the following?_
+
+``` python
+a = tf.ones([10], name="a")
+b = tf.add(a, a, name="b")
+sess = tf.Session()
+sess = tf_debug.LocalCLIDebugWrapperSession(sess)
+sess.run(b)
+```
+
+**A**: The reason why you see no data dumped is because every node in the
+       executed TensorFlow graph is constant-folded by the TensorFlow runtime.
+       In this exapmle, `a` is a constant tensor; therefore, the fetched
+       tensor `b` is effectively also a constant tensor. TensorFlow's graph
+       optimization folds the graph that contains `a` and `b` into a single
+       node to speed up future runs of the graph, which is why `tfdbg` does
+       not generate any intermediate tensor dumps. However, if `a` were a
+       @{tf.Variable}, as in the following example:
+
+``` python
+import numpy as np
+
+a = tf.Variable(np.ones[10], name="a")
+b = tf.add(a, a, name="b")
+sess = tf.Session()
+sess.run(tf.global_variables_initializer())
+sess = tf_debug.LocalCLIDebugWrapperSession(sess)
+sess.run(b)
+```
+
+the constant-folding would not occur and `tfdbg` should show the intermediate
+tensor dumps.
+
+
+**Q**: I am debugging a model that generates unwanted infinities or NaNs. But
+       there are some nodes in my model that are known to generate infinities
+       or NaNs in their output tensors even under completely normal conditions.
+       How can I skip those nodes during my `run -f has_inf_or_nan` actions?
+
+**A**: Use the `--filter_exclude_node_names` (`-fenn` for short) flag. For
+       example, if you known you have a node with name matching the regular
+       expression `.*Sqrt.*` that generates infinities or NaNs regardless
+       of whether the model is behaving correctly, you can exclude the nodes
+       from the infinity/NaN-finding runs with the command
+       `run -f has_inf_or_nan -fenn .*Sqrt.*`.
+
+
+**Q**: Is there a GUI for tfdbg?
+
+**A**: Yes, the **TensorBoard Debugger Plugin** is the GUI of tfdbg.
+       It offers features such as inspection of the computation graph,
+       real-time visualization of tensor values, continuation to tensor
+       and conditional breakpoints, and tying tensors to their
+       graph-construction source code, all in the browser environment.
+       To get started, please visit
+       [its README](https://github.com/tensorflow/tensorboard/blob/master/tensorboard/plugins/debugger/README.md).
diff --git a/tensorflow/docs_src/guide/eager.md b/tensorflow/docs_src/guide/eager.md
new file mode 100644
index 0000000000..00d02b4455
--- /dev/null
+++ b/tensorflow/docs_src/guide/eager.md
@@ -0,0 +1,849 @@
+# Eager Execution
+
+TensorFlow's eager execution is an imperative programming environment that
+evaluates operations immediately, without building graphs: operations return
+concrete values instead of constructing a computational graph to run later. This
+makes it easy to get started with TensorFlow and debug models, and it
+reduces boilerplate as well. To follow along with this guide, run the code
+samples below in an interactive `python` interpreter.
+
+Eager execution is a flexible machine learning platform for research and
+experimentation, providing:
+
+* *An intuitive interface*—Structure your code naturally and use Python data
+  structures. Quickly iterate on small models and small data.
+* *Easier debugging*—Call ops directly to inspect running models and test
+  changes. Use standard Python debugging tools for immediate error reporting.
+* *Natural control flow*—Use Python control flow instead of graph control
+  flow, simplifying the specification of dynamic models.
+
+Eager execution supports most TensorFlow operations and GPU acceleration. For a
+collection of examples running in eager execution, see:
+[tensorflow/contrib/eager/python/examples](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/examples).
+
+Note: Some models may experience increased overhead with eager execution
+enabled. Performance improvements are ongoing, but please
+[file a bug](https://github.com/tensorflow/tensorflow/issues) if you find a
+problem and share your benchmarks.
+
+## Setup and basic usage
+
+Upgrade to the latest version of TensorFlow:
+
+```
+$ pip install --upgrade tensorflow
+```
+
+To start eager execution, add `tf.enable_eager_execution()` to the beginning of
+the program or console session. Do not add this operation to other modules that
+the program calls.
+
+```py
+from __future__ import absolute_import, division, print_function
+
+import tensorflow as tf
+
+tf.enable_eager_execution()
+```
+
+Now you can run TensorFlow operations and the results will return immediately:
+
+```py
+tf.executing_eagerly()        # => True
+
+x = [[2.]]
+m = tf.matmul(x, x)
+print("hello, {}".format(m))  # => "hello, [[4.]]"
+```
+
+Enabling eager execution changes how TensorFlow operations behave—now they
+immediately evaluate and return their values to Python. `tf.Tensor` objects
+reference concrete values instead of symbolic handles to nodes in a computational
+graph. Since there isn't a computational graph to build and run later in a
+session, it's easy to inspect results using `print()` or a debugger. Evaluating,
+printing, and checking tensor values does not break the flow for computing
+gradients.
+
+Eager execution works nicely with [NumPy](http://www.numpy.org/). NumPy
+operations accept `tf.Tensor` arguments. TensorFlow
+[math operations](https://www.tensorflow.org/api_guides/python/math_ops) convert
+Python objects and NumPy arrays to `tf.Tensor` objects. The
+`tf.Tensor.numpy` method returns the object's value as a NumPy `ndarray`.
+
+```py
+a = tf.constant([[1, 2],
+                 [3, 4]])
+print(a)
+# => tf.Tensor([[1 2]
+#               [3 4]], shape=(2, 2), dtype=int32)
+
+# Broadcasting support
+b = tf.add(a, 1)
+print(b)
+# => tf.Tensor([[2 3]
+#               [4 5]], shape=(2, 2), dtype=int32)
+
+# Operator overloading is supported
+print(a * b)
+# => tf.Tensor([[ 2  6]
+#               [12 20]], shape=(2, 2), dtype=int32)
+
+# Use NumPy values
+import numpy as np
+
+c = np.multiply(a, b)
+print(c)
+# => [[ 2  6]
+#     [12 20]]
+
+# Obtain numpy value from a tensor:
+print(a.numpy())
+# => [[1 2]
+#     [3 4]]
+```
+
+The `tf.contrib.eager` module contains symbols available to both eager and graph execution
+environments and is useful for writing code to [work with graphs](#work_with_graphs):
+
+```py
+tfe = tf.contrib.eager
+```
+
+## Dynamic control flow
+
+A major benefit of eager execution is that all the functionality of the host
+language is available while your model is executing. So, for example,
+it is easy to write [fizzbuzz](https://en.wikipedia.org/wiki/Fizz_buzz):
+
+```py
+def fizzbuzz(max_num):
+  counter = tf.constant(0)
+  max_num = tf.convert_to_tensor(max_num)
+  for num in range(max_num.numpy()):
+    num = tf.constant(num)
+    if int(num % 3) == 0 and int(num % 5) == 0:
+      print('FizzBuzz')
+    elif int(num % 3) == 0:
+      print('Fizz')
+    elif int(num % 5) == 0:
+      print('Buzz')
+    else:
+      print(num)
+    counter += 1
+  return counter
+```
+
+This has conditionals that depend on tensor values and it prints these values
+at runtime.
+
+## Build a model
+
+Many machine learning models are represented by composing layers. When
+using TensorFlow with eager execution you can either write your own layers or
+use a layer provided in the `tf.keras.layers` package.
+
+While you can use any Python object to represent a layer,
+TensorFlow has `tf.keras.layers.Layer` as a convenient base class. Inherit from
+it to implement your own layer:
+
+```py
+class MySimpleLayer(tf.keras.layers.Layer):
+  def __init__(self, output_units):
+    self.output_units = output_units
+
+  def build(self, input):
+    # The build method gets called the first time your layer is used.
+    # Creating variables on build() allows you to make their shape depend
+    # on the input shape and hence remove the need for the user to specify
+    # full shapes. It is possible to create variables during __init__() if
+    # you already know their full shapes.
+    self.kernel = self.add_variable(
+      "kernel", [input.shape[-1], self.output_units])
+
+  def call(self, input):
+    # Override call() instead of __call__ so we can perform some bookkeeping.
+    return tf.matmul(input, self.kernel)
+```
+
+Use `tf.keras.layers.Dense` layer instead  of `MySimpleLayer` above as it has
+a superset of its functionality (it can also add a bias).
+
+When composing layers into models you can use `tf.keras.Sequential` to represent
+models which are a linear stack of layers. It is easy to use for basic models:
+
+```py
+model = tf.keras.Sequential([
+  tf.keras.layers.Dense(10, input_shape=(784,)),  # must declare input shape
+  tf.keras.layers.Dense(10)
+])
+```
+
+Alternatively, organize models in classes by inheriting from `tf.keras.Model`.
+This is a container for layers that is a layer itself, allowing `tf.keras.Model`
+objects to contain other `tf.keras.Model` objects.
+
+```py
+class MNISTModel(tf.keras.Model):
+  def __init__(self):
+    super(MNISTModel, self).__init__()
+    self.dense1 = tf.keras.layers.Dense(units=10)
+    self.dense2 = tf.keras.layers.Dense(units=10)
+
+  def call(self, input):
+    """Run the model."""
+    result = self.dense1(input)
+    result = self.dense2(result)
+    result = self.dense2(result)  # reuse variables from dense2 layer
+    return result
+
+model = MNISTModel()
+```
+
+It's not required to set an input shape for the `tf.keras.Model` class since
+the parameters are set the first time input is passed to the layer.
+
+`tf.keras.layers` classes create and contain their own model variables that
+are tied to the lifetime of their layer objects. To share layer variables, share
+their objects.
+
+
+## Eager training
+
+### Computing gradients
+
+[Automatic differentiation](https://en.wikipedia.org/wiki/Automatic_differentiation)
+is useful for implementing machine learning algorithms such as
+[backpropagation](https://en.wikipedia.org/wiki/Backpropagation) for training
+neural networks. During eager execution, use `tf.GradientTape` to trace
+operations for computing gradients later.
+
+`tf.GradientTape` is an opt-in feature to provide maximal performance when
+not tracing. Since different operations can occur during each call, all
+forward-pass operations get recorded to a "tape". To compute the gradient, play
+the tape backwards and then discard. A particular `tf.GradientTape` can only
+compute one gradient; subsequent calls throw a runtime error.
+
+```py
+w = tfe.Variable([[1.0]])
+with tf.GradientTape() as tape:
+  loss = w * w
+
+grad = tape.gradient(loss, w)
+print(grad)  # => tf.Tensor([[ 2.]], shape=(1, 1), dtype=float32)
+```
+
+Here's an example of `tf.GradientTape` that records forward-pass operations
+to train a simple model:
+
+```py
+# A toy dataset of points around 3 * x + 2
+NUM_EXAMPLES = 1000
+training_inputs = tf.random_normal([NUM_EXAMPLES])
+noise = tf.random_normal([NUM_EXAMPLES])
+training_outputs = training_inputs * 3 + 2 + noise
+
+def prediction(input, weight, bias):
+  return input * weight + bias
+
+# A loss function using mean-squared error
+def loss(weights, biases):
+  error = prediction(training_inputs, weights, biases) - training_outputs
+  return tf.reduce_mean(tf.square(error))
+
+# Return the derivative of loss with respect to weight and bias
+def grad(weights, biases):
+  with tf.GradientTape() as tape:
+    loss_value = loss(weights, biases)
+  return tape.gradient(loss_value, [weights, biases])
+
+train_steps = 200
+learning_rate = 0.01
+# Start with arbitrary values for W and B on the same batch of data
+W = tfe.Variable(5.)
+B = tfe.Variable(10.)
+
+print("Initial loss: {:.3f}".format(loss(W, B)))
+
+for i in range(train_steps):
+  dW, dB = grad(W, B)
+  W.assign_sub(dW * learning_rate)
+  B.assign_sub(dB * learning_rate)
+  if i % 20 == 0:
+    print("Loss at step {:03d}: {:.3f}".format(i, loss(W, B)))
+
+print("Final loss: {:.3f}".format(loss(W, B)))
+print("W = {}, B = {}".format(W.numpy(), B.numpy()))
+```
+
+Output (exact numbers may vary):
+
+```
+Initial loss: 71.204
+Loss at step 000: 68.333
+Loss at step 020: 30.222
+Loss at step 040: 13.691
+Loss at step 060: 6.508
+Loss at step 080: 3.382
+Loss at step 100: 2.018
+Loss at step 120: 1.422
+Loss at step 140: 1.161
+Loss at step 160: 1.046
+Loss at step 180: 0.996
+Final loss: 0.974
+W = 3.01582956314, B = 2.1191945076
+```
+
+Replay the `tf.GradientTape` to compute the gradients and apply them in a
+training loop. This is demonstrated in an excerpt from the
+[mnist_eager.py](https://github.com/tensorflow/models/blob/master/official/mnist/mnist_eager.py)
+example:
+
+```py
+dataset = tf.data.Dataset.from_tensor_slices((data.train.images,
+                                              data.train.labels))
+...
+for (batch, (images, labels)) in enumerate(dataset):
+  ...
+  with tf.GradientTape() as tape:
+    logits = model(images, training=True)
+    loss_value = loss(logits, labels)
+  ...
+  grads = tape.gradient(loss_value, model.variables)
+  optimizer.apply_gradients(zip(grads, model.variables),
+                            global_step=tf.train.get_or_create_global_step())
+```
+
+
+The following example creates a multi-layer model that classifies the standard
+[MNIST handwritten digits](https://www.tensorflow.org/tutorials/layers). It
+demonstrates the optimizer and layer APIs to build trainable graphs in an eager
+execution environment.
+
+### Train a model
+
+Even without training, call the model and inspect the output in eager execution:
+
+```py
+# Create a tensor representing a blank image
+batch = tf.zeros([1, 1, 784])
+print(batch.shape)  # => (1, 1, 784)
+
+result = model(batch)
+# => tf.Tensor([[[ 0.  0., ..., 0.]]], shape=(1, 1, 10), dtype=float32)
+```
+
+This example uses the
+[dataset.py module](https://github.com/tensorflow/models/blob/master/official/mnist/dataset.py)
+from the
+[TensorFlow MNIST example](https://github.com/tensorflow/models/tree/master/official/mnist);
+download this file to your local directory. Run the following to download the
+MNIST data files to your working directory and prepare a `tf.data.Dataset`
+for training:
+
+```py
+import dataset  # download dataset.py file
+dataset_train = dataset.train('./datasets').shuffle(60000).repeat(4).batch(32)
+```
+
+To train a model, define a loss function to optimize and then calculate
+gradients. Use an optimizer to update the variables:
+
+```py
+def loss(model, x, y):
+  prediction = model(x)
+  return tf.losses.sparse_softmax_cross_entropy(labels=y, logits=prediction)
+
+def grad(model, inputs, targets):
+  with tf.GradientTape() as tape:
+    loss_value = loss(model, inputs, targets)
+  return tape.gradient(loss_value, model.variables)
+
+optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
+
+x, y = iter(dataset_train).next()
+print("Initial loss: {:.3f}".format(loss(model, x, y)))
+
+# Training loop
+for (i, (x, y)) in enumerate(dataset_train):
+  # Calculate derivatives of the input function with respect to its parameters.
+  grads = grad(model, x, y)
+  # Apply the gradient to the model
+  optimizer.apply_gradients(zip(grads, model.variables),
+                            global_step=tf.train.get_or_create_global_step())
+  if i % 200 == 0:
+    print("Loss at step {:04d}: {:.3f}".format(i, loss(model, x, y)))
+
+print("Final loss: {:.3f}".format(loss(model, x, y)))
+```
+
+Output (exact numbers may vary):
+
+```
+Initial loss: 2.674
+Loss at step 0000: 2.593
+Loss at step 0200: 2.143
+Loss at step 0400: 2.009
+Loss at step 0600: 2.103
+Loss at step 0800: 1.621
+Loss at step 1000: 1.695
+...
+Loss at step 6600: 0.602
+Loss at step 6800: 0.557
+Loss at step 7000: 0.499
+Loss at step 7200: 0.744
+Loss at step 7400: 0.681
+Final loss: 0.670
+```
+
+And for faster training, move the computation to a GPU:
+
+```py
+with tf.device("/gpu:0"):
+  for (i, (x, y)) in enumerate(dataset_train):
+    # minimize() is equivalent to the grad() and apply_gradients() calls.
+    optimizer.minimize(lambda: loss(model, x, y),
+                       global_step=tf.train.get_or_create_global_step())
+```
+
+### Variables and optimizers
+
+`tfe.Variable` objects store mutable `tf.Tensor` values accessed during
+training to make automatic differentiation easier. The parameters of a model can
+be encapsulated in classes as variables.
+
+Better encapsulate model parameters by using `tfe.Variable` with
+`tf.GradientTape`. For example, the automatic differentiation example above
+can be rewritten:
+
+```py
+class Model(tf.keras.Model):
+  def __init__(self):
+    super(Model, self).__init__()
+    self.W = tfe.Variable(5., name='weight')
+    self.B = tfe.Variable(10., name='bias')
+  def predict(self, inputs):
+    return inputs * self.W + self.B
+
+# A toy dataset of points around 3 * x + 2
+NUM_EXAMPLES = 2000
+training_inputs = tf.random_normal([NUM_EXAMPLES])
+noise = tf.random_normal([NUM_EXAMPLES])
+training_outputs = training_inputs * 3 + 2 + noise
+
+# The loss function to be optimized
+def loss(model, inputs, targets):
+  error = model.predict(inputs) - targets
+  return tf.reduce_mean(tf.square(error))
+
+def grad(model, inputs, targets):
+  with tf.GradientTape() as tape:
+    loss_value = loss(model, inputs, targets)
+  return tape.gradient(loss_value, [model.W, model.B])
+
+# Define:
+# 1. A model.
+# 2. Derivatives of a loss function with respect to model parameters.
+# 3. A strategy for updating the variables based on the derivatives.
+model = Model()
+optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
+
+print("Initial loss: {:.3f}".format(loss(model, training_inputs, training_outputs)))
+
+# Training loop
+for i in range(300):
+  grads = grad(model, training_inputs, training_outputs)
+  optimizer.apply_gradients(zip(grads, [model.W, model.B]),
+                            global_step=tf.train.get_or_create_global_step())
+  if i % 20 == 0:
+    print("Loss at step {:03d}: {:.3f}".format(i, loss(model, training_inputs, training_outputs)))
+
+print("Final loss: {:.3f}".format(loss(model, training_inputs, training_outputs)))
+print("W = {}, B = {}".format(model.W.numpy(), model.B.numpy()))
+```
+
+Output (exact numbers may vary):
+
+```
+Initial loss: 69.066
+Loss at step 000: 66.368
+Loss at step 020: 30.107
+Loss at step 040: 13.959
+Loss at step 060: 6.769
+Loss at step 080: 3.567
+Loss at step 100: 2.141
+Loss at step 120: 1.506
+Loss at step 140: 1.223
+Loss at step 160: 1.097
+Loss at step 180: 1.041
+Loss at step 200: 1.016
+Loss at step 220: 1.005
+Loss at step 240: 1.000
+Loss at step 260: 0.998
+Loss at step 280: 0.997
+Final loss: 0.996
+W = 2.99431324005, B = 2.02129220963
+```
+
+## Use objects for state during eager execution
+
+With graph execution, program state (such as the variables) is stored in global
+collections and their lifetime is managed by the `tf.Session` object. In
+contrast, during eager execution the lifetime of state objects is determined by
+the lifetime of their corresponding Python object.
+
+### Variables are objects
+
+During eager execution, variables persist until the last reference to the object
+is removed, and is then deleted.
+
+```py
+with tf.device("gpu:0"):
+  v = tfe.Variable(tf.random_normal([1000, 1000]))
+  v = None  # v no longer takes up GPU memory
+```
+
+### Object-based saving
+
+`tfe.Checkpoint` can save and restore `tfe.Variable`s to and from
+checkpoints:
+
+```py
+x = tfe.Variable(10.)
+
+checkpoint = tfe.Checkpoint(x=x)  # save as "x"
+
+x.assign(2.)   # Assign a new value to the variables and save.
+save_path = checkpoint.save('./ckpt/')
+
+x.assign(11.)  # Change the variable after saving.
+
+# Restore values from the checkpoint
+checkpoint.restore(save_path)
+
+print(x)  # => 2.0
+```
+
+To save and load models, `tfe.Checkpoint` stores the internal state of objects,
+without requiring hidden variables. To record the state of a `model`,
+an `optimizer`, and a global step, pass them to a `tfe.Checkpoint`:
+
+```py
+model = MyModel()
+optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
+checkpoint_dir = ‘/path/to/model_dir’
+checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
+root = tfe.Checkpoint(optimizer=optimizer,
+                      model=model,
+                      optimizer_step=tf.train.get_or_create_global_step())
+
+root.save(file_prefix=checkpoint_prefix)
+# or
+root.restore(tf.train.latest_checkpoint(checkpoint_dir))
+```
+
+### Object-oriented metrics
+
+`tfe.metrics` are stored as objects. Update a metric by passing the new data to
+the callable, and retrieve the result using the `tfe.metrics.result` method,
+for example:
+
+```py
+m = tfe.metrics.Mean("loss")
+m(0)
+m(5)
+m.result()  # => 2.5
+m([8, 9])
+m.result()  # => 5.5
+```
+
+#### Summaries and TensorBoard
+
+@{$summaries_and_tensorboard$TensorBoard} is a visualization tool for
+understanding, debugging and optimizing the model training process. It uses
+summary events that are written while executing the program.
+
+`tf.contrib.summary` is compatible with both eager and graph execution
+environments. Summary operations, such as `tf.contrib.summary.scalar`, are
+inserted during model construction. For example, to record summaries once every
+100 global steps:
+
+```py
+writer = tf.contrib.summary.create_file_writer(logdir)
+global_step=tf.train.get_or_create_global_step()  # return global step var
+
+writer.set_as_default()
+
+for _ in range(iterations):
+  global_step.assign_add(1)
+  # Must include a record_summaries method
+  with tf.contrib.summary.record_summaries_every_n_global_steps(100):
+    # your model code goes here
+    tf.contrib.summary.scalar('loss', loss)
+     ...
+```
+
+## Advanced automatic differentiation topics
+
+### Dynamic models
+
+`tf.GradientTape` can also be used in dynamic models. This example for a
+[backtracking line search](https://wikipedia.org/wiki/Backtracking_line_search)
+algorithm looks like normal NumPy code, except there are gradients and is
+differentiable, despite the complex control flow:
+
+```py
+def line_search_step(fn, init_x, rate=1.0):
+  with tf.GradientTape() as tape:
+    # Variables are automatically recorded, but manually watch a tensor
+    tape.watch(init_x)
+    value = fn(init_x)
+  grad = tape.gradient(value, init_x)
+  grad_norm = tf.reduce_sum(grad * grad)
+  init_value = value
+  while value > init_value - rate * grad_norm:
+    x = init_x - rate * grad
+    value = fn(x)
+    rate /= 2.0
+  return x, value
+```
+
+### Additional functions to compute gradients
+
+`tf.GradientTape` is a powerful interface for computing gradients, but there
+is another [Autograd](https://github.com/HIPS/autograd)-style API available for
+automatic differentiation. These functions are useful if writing math code with
+only tensors and gradient functions, and without `tfe.Variables`:
+
+* `tfe.gradients_function` —Returns a function that computes the derivatives
+  of its input function parameter with respect to its arguments. The input
+  function parameter must return a scalar value. When the returned function is
+  invoked, it returns a list of `tf.Tensor` objects: one element for each
+  argument of the input function. Since anything of interest must be passed as a
+  function parameter, this becomes unwieldy if there's a dependency on many
+  trainable parameters.
+* `tfe.value_and_gradients_function` —Similar to
+  `tfe.gradients_function`, but when the returned function is invoked, it
+  returns the value from the input function in addition to the list of
+  derivatives of the input function with respect to its arguments.
+
+In the following example, `tfe.gradients_function` takes the `square`
+function as an argument and returns a function that computes the partial
+derivatives of `square` with respect to its inputs. To calculate the derivative
+of `square` at `3`, `grad(3.0)` returns `6`.
+
+```py
+def square(x):
+  return tf.multiply(x, x)
+
+grad = tfe.gradients_function(square)
+
+square(3.)  # => 9.0
+grad(3.)    # => [6.0]
+
+# The second-order derivative of square:
+gradgrad = tfe.gradients_function(lambda x: grad(x)[0])
+gradgrad(3.)  # => [2.0]
+
+# The third-order derivative is None:
+gradgradgrad = tfe.gradients_function(lambda x: gradgrad(x)[0])
+gradgradgrad(3.)  # => [None]
+
+
+# With flow control:
+def abs(x):
+  return x if x > 0. else -x
+
+grad = tfe.gradients_function(abs)
+
+grad(3.)   # => [1.0]
+grad(-3.)  # => [-1.0]
+```
+
+### Custom gradients
+
+Custom gradients are an easy way to override gradients in eager and graph
+execution. Within the forward function, define the gradient with respect to the
+inputs, outputs, or intermediate results. For example, here's an easy way to clip
+the norm of the gradients in the backward pass:
+
+```py
+@tf.custom_gradient
+def clip_gradient_by_norm(x, norm):
+  y = tf.identity(x)
+  def grad_fn(dresult):
+    return [tf.clip_by_norm(dresult, norm), None]
+  return y, grad_fn
+```
+
+Custom gradients are commonly used to provide a numerically stable gradient for a
+sequence of operations:
+
+```py
+def log1pexp(x):
+  return tf.log(1 + tf.exp(x))
+grad_log1pexp = tfe.gradients_function(log1pexp)
+
+# The gradient computation works fine at x = 0.
+grad_log1pexp(0.)  # => [0.5]
+
+# However, x = 100 fails because of numerical instability.
+grad_log1pexp(100.)  # => [nan]
+```
+
+Here, the `log1pexp` function can be analytically simplified with a custom
+gradient. The implementation below reuses the value for `tf.exp(x)` that is
+computed during the forward pass—making it more efficient by eliminating
+redundant calculations:
+
+```py
+@tf.custom_gradient
+def log1pexp(x):
+  e = tf.exp(x)
+  def grad(dy):
+    return dy * (1 - 1 / (1 + e))
+  return tf.log(1 + e), grad
+
+grad_log1pexp = tfe.gradients_function(log1pexp)
+
+# As before, the gradient computation works fine at x = 0.
+grad_log1pexp(0.)  # => [0.5]
+
+# And the gradient computation also works at x = 100.
+grad_log1pexp(100.)  # => [1.0]
+```
+
+## Performance
+
+Computation is automatically offloaded to GPUs during eager execution. If you
+want control over where a computation runs you can enclose it in a
+`tf.device('/gpu:0')` block (or the CPU equivalent):
+
+```py
+import time
+
+def measure(x, steps):
+  # TensorFlow initializes a GPU the first time it's used, exclude from timing.
+  tf.matmul(x, x)
+  start = time.time()
+  for i in range(steps):
+    x = tf.matmul(x, x)
+    _ = x.numpy()  # Make sure to execute op and not just enqueue it
+  end = time.time()
+  return end - start
+
+shape = (1000, 1000)
+steps = 200
+print("Time to multiply a {} matrix by itself {} times:".format(shape, steps))
+
+# Run on CPU:
+with tf.device("/cpu:0"):
+  print("CPU: {} secs".format(measure(tf.random_normal(shape), steps)))
+
+# Run on GPU, if available:
+if tfe.num_gpus() > 0:
+  with tf.device("/gpu:0"):
+    print("GPU: {} secs".format(measure(tf.random_normal(shape), steps)))
+else:
+  print("GPU: not found")
+```
+
+Output (exact numbers depend on hardware):
+
+```
+Time to multiply a (1000, 1000) matrix by itself 200 times:
+CPU: 4.614904403686523 secs
+GPU: 0.5581181049346924 secs
+```
+
+A `tf.Tensor` object can be copied to a different device to execute its
+operations:
+
+```py
+x = tf.random_normal([10, 10])
+
+x_gpu0 = x.gpu()
+x_cpu = x.cpu()
+
+_ = tf.matmul(x_cpu, x_cpu)    # Runs on CPU
+_ = tf.matmul(x_gpu0, x_gpu0)  # Runs on GPU:0
+
+if tfe.num_gpus() > 1:
+  x_gpu1 = x.gpu(1)
+  _ = tf.matmul(x_gpu1, x_gpu1)  # Runs on GPU:1
+```
+
+### Benchmarks
+
+For compute-heavy models, such as
+[ResNet50](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/examples/resnet50)
+training on a GPU, eager execution performance is comparable to graph execution.
+But this gap grows larger for models with less computation and there is work to
+be done for optimizing hot code paths for models with lots of small operations.
+
+
+## Work with graphs
+
+While eager execution makes development and debugging more interactive,
+TensorFlow graph execution has advantages for distributed training, performance
+optimizations, and production deployment. However, writing graph code can feel
+different than writing regular Python code and more difficult to debug.
+
+For building and training graph-constructed models, the Python program first
+builds a graph representing the computation, then invokes `Session.run` to send
+the graph for execution on the C++-based runtime.  This provides:
+
+* Automatic differentiation using static autodiff.
+* Simple deployment to a platform independent server.
+* Graph-based optimizations (common subexpression elimination, constant-folding, etc.).
+* Compilation and kernel fusion.
+* Automatic distribution and replication (placing nodes on the distributed system).
+
+Deploying code written for eager execution is more difficult: either generate a
+graph from the model, or run the Python runtime and code directly on the server.
+
+### Write compatible code
+
+The same code written for eager execution will also build a graph during graph
+execution. Do this by simply running the same code in a new Python session where
+eager execution is not enabled.
+
+Most TensorFlow operations work during eager execution, but there are some things
+to keep in mind:
+
+* Use `tf.data` for input processing instead of queues. It's faster and easier.
+* Use object-oriented layer APIs—like `tf.keras.layers` and
+  `tf.keras.Model`—since they have explicit storage for variables.
+* Most model code works the same during eager and graph execution, but there are
+  exceptions. (For example, dynamic models using Python control flow to change the
+  computation based on inputs.)
+* Once eager execution is enabled with `tf.enable_eager_execution`, it
+  cannot be turned off. Start a new Python session to return to graph execution.
+
+It's best to write code for both eager execution *and* graph execution. This
+gives you eager's interactive experimentation and debuggability with the
+distributed performance benefits of graph execution.
+
+Write, debug, and iterate in eager execution, then import the model graph for
+production deployment. Use `tfe.Checkpoint` to save and restore model
+variables, this allows movement between eager and graph execution environments.
+See the examples in:
+[tensorflow/contrib/eager/python/examples](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/examples).
+
+### Use eager execution in a graph environment
+
+Selectively enable eager execution in a TensorFlow graph environment using
+`tfe.py_func`. This is used when `tf.enable_eager_execution()` has *not*
+been called.
+
+```py
+def my_py_func(x):
+  x = tf.matmul(x, x)  # You can use tf ops
+  print(x)  # but it's eager!
+  return x
+
+with tf.Session() as sess:
+  x = tf.placeholder(dtype=tf.float32)
+  # Call eager function in graph!
+  pf = tfe.py_func(my_py_func, [x], tf.float32)
+  sess.run(pf, feed_dict={x: [[2.0]]})  # [[4.0]]
+```
diff --git a/tensorflow/docs_src/guide/embedding.md b/tensorflow/docs_src/guide/embedding.md
new file mode 100644
index 0000000000..8a98367dfb
--- /dev/null
+++ b/tensorflow/docs_src/guide/embedding.md
@@ -0,0 +1,262 @@
+# Embeddings
+
+This document introduces the concept of embeddings, gives a simple example of
+how to train an embedding in TensorFlow, and explains how to view embeddings
+with the TensorBoard Embedding Projector
+([live example](http://projector.tensorflow.org)). The first two parts target
+newcomers to machine learning or TensorFlow, and the Embedding Projector how-to
+is for users at all levels.
+
+An alternative tutorial on these concepts is available in the
+[Embeddings section of Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course/embeddings/video-lecture).
+
+[TOC]
+
+An **embedding** is a mapping from discrete objects, such as words, to vectors
+of real numbers. For example, a 300-dimensional embedding for English words
+could include:
+
+```
+blue:  (0.01359, 0.00075997, 0.24608, ..., -0.2524, 1.0048, 0.06259)
+blues:  (0.01396, 0.11887, -0.48963, ..., 0.033483, -0.10007, 0.1158)
+orange:  (-0.24776, -0.12359, 0.20986, ..., 0.079717, 0.23865, -0.014213)
+oranges:  (-0.35609, 0.21854, 0.080944, ..., -0.35413, 0.38511, -0.070976)
+```
+
+The individual dimensions in these vectors typically have no inherent meaning.
+Instead, it's the overall patterns of location and distance between vectors
+that machine learning takes advantage of.
+
+Embeddings are important for input to machine learning. Classifiers, and neural
+networks more generally, work on vectors of real numbers. They train best on
+dense vectors, where all values contribute to define an object. However, many
+important inputs to machine learning, such as words of text, do not have a
+natural vector representation. Embedding functions are the standard and
+effective way to transform such discrete input objects into useful
+continuous vectors.
+
+Embeddings are also valuable as outputs of machine learning. Because embeddings
+map objects to vectors, applications can use similarity in vector space (for
+instance, Euclidean distance or the angle between vectors) as a robust and
+flexible measure of object similarity. One common use is to find nearest
+neighbors.  Using the same word embeddings as above, for instance, here are the
+three nearest neighbors for each word and the corresponding angles:
+
+```
+blue:  (red, 47.6°), (yellow, 51.9°), (purple, 52.4°)
+blues:  (jazz, 53.3°), (folk, 59.1°), (bluegrass, 60.6°)
+orange:  (yellow, 53.5°), (colored, 58.0°), (bright, 59.9°)
+oranges:  (apples, 45.3°), (lemons, 48.3°), (mangoes, 50.4°)
+```
+
+This would tell an application that apples and oranges are in some way more
+similar (45.3° apart) than lemons and oranges (48.3° apart).
+
+## Embeddings in TensorFlow
+
+To create word embeddings in TensorFlow, we first split the text into words
+and then assign an integer to every word in the vocabulary. Let us assume that
+this has already been done, and that `word_ids` is a vector of these integers.
+For example, the sentence “I have a cat.” could be split into
+`[“I”, “have”, “a”, “cat”, “.”]` and then the corresponding `word_ids` tensor
+would have shape `[5]` and consist of 5 integers. To map these word ids
+to vectors, we need to create the embedding variable and use the
+`tf.nn.embedding_lookup` function as follows:
+
+```
+word_embeddings = tf.get_variable(“word_embeddings”,
+    [vocabulary_size, embedding_size])
+embedded_word_ids = tf.nn.embedding_lookup(word_embeddings, word_ids)
+```
+
+After this, the tensor `embedded_word_ids` will have shape `[5, embedding_size]`
+in our example and contain the embeddings (dense vectors) for each of the 5
+words. At the end of training, `word_embeddings` will contain the embeddings
+for all words in the vocabulary.
+
+Embeddings can be trained in many network types, and with various loss
+functions and data sets. For example, one could use a recurrent neural network
+to predict the next word from the previous one given a large corpus of
+sentences, or one could train two networks to do multi-lingual translation.
+These methods are described in the @{$word2vec$Vector Representations of Words}
+tutorial.
+
+## Visualizing Embeddings
+
+TensorBoard includes the **Embedding Projector**, a tool that lets you
+interactively visualize embeddings. This tool can read embeddings from your
+model and render them in two or three dimensions.
+
+The Embedding Projector has three panels:
+
+- *Data panel* on the top left, where you can choose the run, the embedding
+  variable and data columns to color and label points by.
+- *Projections panel* on the bottom left, where you can choose the type of
+  projection.
+- *Inspector panel* on the right side, where you can search for particular
+  points and see a list of nearest neighbors.
+
+### Projections
+The Embedding Projector provides three ways to reduce the dimensionality of a
+data set.
+
+- *[t-SNE](https://en.wikipedia.org/wiki/T-distributed_stochastic_neighbor_embedding)*:
+  a nonlinear nondeterministic algorithm (T-distributed stochastic neighbor
+  embedding) that tries to preserve local neighborhoods in the data, often at
+  the expense of distorting global structure. You can choose whether to compute
+  two- or three-dimensional projections.
+
+- *[PCA](https://en.wikipedia.org/wiki/Principal_component_analysis)*:
+  a linear deterministic algorithm (principal component analysis) that tries to
+  capture as much of the data variability in as few dimensions as possible. PCA
+  tends to highlight large-scale structure in the data, but can distort local
+  neighborhoods. The Embedding Projector computes the top 10 principal
+  components, from which you can choose two or three to view.
+
+- *Custom*: a linear projection onto horizontal and vertical axes that you
+  specify using labels in the data. You define the horizontal axis, for
+  instance, by giving text patterns for "Left" and "Right". The Embedding
+  Projector finds all points whose label matches the "Left" pattern and
+  computes the centroid of that set; similarly for "Right".  The line passing
+  through these two centroids defines the horizontal axis. The vertical axis is
+  likewise computed from the centroids for points matching the "Up" and "Down"
+  text patterns.
+
+Further useful articles are
+[How to Use t-SNE Effectively](https://distill.pub/2016/misread-tsne/) and
+[Principal Component Analysis Explained Visually](http://setosa.io/ev/principal-component-analysis/).
+
+### Exploration
+
+You can explore visually by zooming, rotating, and panning using natural
+click-and-drag gestures. Hovering your mouse over a point will show any
+[metadata](#metadata) for that point.  You can also inspect nearest-neighbor
+subsets.  Clicking on a point causes the right pane to list the nearest
+neighbors, along with distances to the current point. The nearest-neighbor
+points are also highlighted in the projection.
+
+It is sometimes useful to restrict the view to a subset of points and perform
+projections only on those points. To do so, you can select points in multiple
+ways:
+
+- After clicking on a point, its nearest neighbors are also selected.
+- After a search, the points matching the query are selected.
+- Enabling selection, clicking on a point and dragging defines a selection
+  sphere.
+
+Then click the "Isolate *nnn* points" button at the top of the Inspector pane
+on the right hand side. The following image shows 101 points selected and ready
+for the user to click "Isolate 101 points":
+
+![Selection of nearest neighbors](https://www.tensorflow.org/images/embedding-nearest-points.png "Selection of nearest neighbors")
+
+*Selection of the nearest neighbors of “important” in a word embedding dataset.*
+
+Advanced tip: filtering with custom projection can be powerful. Below, we
+filtered the 100 nearest neighbors of “politics” and projected them onto the
+“worst” - “best” vector as an x axis. The y axis is random. As a result, one
+finds on the right side “ideas”, “science”, “perspective”, “journalism” but on
+the left “crisis”, “violence” and “conflict”.
+
+<table width="100%;">
+  <tr>
+    <td style="width: 30%;">
+      <img src="https://www.tensorflow.org/images/embedding-custom-controls.png" alt="Custom controls panel" title="Custom controls panel" />
+    </td>
+    <td style="width: 70%;">
+      <img src="https://www.tensorflow.org/images/embedding-custom-projection.png" alt="Custom projection" title="Custom projection" />
+    </td>
+  </tr>
+  <tr>
+    <td style="width: 30%;">
+      Custom projection controls.
+    </td>
+    <td style="width: 70%;">
+      Custom projection of neighbors of "politics" onto "best" - "worst" vector.
+    </td>
+  </tr>
+</table>
+
+To share your findings, you can use the bookmark panel in the bottom right
+corner and save the current state (including computed coordinates of any
+projection) as a small file. The Projector can then be pointed to a set of one
+or more of these files, producing the panel below. Other users can then walk
+through a sequence of bookmarks.
+
+<img src="https://www.tensorflow.org/images/embedding-bookmark.png" alt="Bookmark panel" style="width:300px;">
+
+### Metadata
+
+If you are working with an embedding, you'll probably want to attach
+labels/images to the data points. You can do this by generating a metadata file
+containing the labels for each point and clicking "Load data" in the data panel
+of the Embedding Projector.
+
+The metadata can be either labels or images, which are
+stored in a separate file. For labels, the format should
+be a [TSV file](https://en.wikipedia.org/wiki/Tab-separated_values)
+(tab characters shown in red) whose first line contains column headers
+(shown in bold) and subsequent lines contain the metadata values. For example:
+
+<code>
+<b>Word<span style="color:#800;">\t</span>Frequency</b><br/>
+  Airplane<span style="color:#800;">\t</span>345<br/>
+  Car<span style="color:#800;">\t</span>241<br/>
+  ...
+</code>
+
+The order of lines in the metadata file is assumed to match the order of
+vectors in the embedding variable, except for the header.  Consequently, the
+(i+1)-th line in the metadata file corresponds to the i-th row of the embedding
+variable.  If the TSV metadata file has only a single column, then we don’t
+expect a header row, and assume each row is the label of the embedding. We
+include this exception because it matches the commonly-used "vocab file"
+format.
+
+To use images as metadata, you must produce a single
+[sprite image](https://www.google.com/webhp#q=what+is+a+sprite+image),
+consisting of small thumbnails, one for each vector in the embedding.  The
+sprite should store thumbnails in row-first order: the first data point placed
+in the top left and the last data point in the bottom right, though the last
+row doesn't have to be filled, as shown below.
+
+<table style="border: none;">
+<tr style="background-color: transparent;">
+  <td style="border: 1px solid black">0</td>
+  <td style="border: 1px solid black">1</td>
+  <td style="border: 1px solid black">2</td>
+</tr>
+<tr style="background-color: transparent;">
+  <td style="border: 1px solid black">3</td>
+  <td style="border: 1px solid black">4</td>
+  <td style="border: 1px solid black">5</td>
+</tr>
+<tr style="background-color: transparent;">
+  <td style="border: 1px solid black">6</td>
+  <td style="border: 1px solid black">7</td>
+  <td style="border: 1px solid black"></td>
+</tr>
+</table>
+
+Follow [this link](https://www.tensorflow.org/images/embedding-mnist.mp4)
+to see a fun example of thumbnail images in the Embedding Projector.
+
+
+## Mini-FAQ
+
+**Is "embedding" an action or a thing?**
+Both. People talk about embedding words in a vector space (action) and about
+producing word embeddings (things).  Common to both is the notion of embedding
+as a mapping from discrete objects to vectors. Creating or applying that
+mapping is an action, but the mapping itself is a thing.
+
+**Are embeddings high-dimensional or low-dimensional?**
+It depends. A 300-dimensional vector space of words and phrases, for instance,
+is often called low-dimensional (and dense) when compared to the millions of
+words and phrases it can contain. But mathematically it is high-dimensional,
+displaying many properties that are dramatically different from what our human
+intuition has learned about 2- and 3-dimensional spaces.
+
+**Is an embedding the same as an embedding layer?**
+No. An *embedding layer* is a part of neural network, but an *embedding* is a more
+general concept.
diff --git a/tensorflow/docs_src/guide/estimators.md b/tensorflow/docs_src/guide/estimators.md
new file mode 100644
index 0000000000..78b30c3040
--- /dev/null
+++ b/tensorflow/docs_src/guide/estimators.md
@@ -0,0 +1,193 @@
+# Estimators
+
+This document introduces @{tf.estimator$**Estimators**}--a high-level TensorFlow
+API that greatly simplifies machine learning programming. Estimators encapsulate
+the following actions:
+
+*   training
+*   evaluation
+*   prediction
+*   export for serving
+
+You may either use the pre-made Estimators we provide or write your
+own custom Estimators.  All Estimators--whether pre-made or custom--are
+classes based on the @{tf.estimator.Estimator} class.
+
+Note: TensorFlow also includes a deprecated `Estimator` class at
+@{tf.contrib.learn.Estimator}, which you should not use.
+
+
+## Advantages of Estimators
+
+Estimators provide the following benefits:
+
+*   You can run Estimator-based models on a local host or on a
+    distributed multi-server environment without changing your model.
+    Furthermore, you can run Estimator-based models on CPUs, GPUs,
+    or TPUs without recoding your model.
+*   Estimators simplify sharing implementations between model developers.
+*   You can develop a state of the art model with high-level intuitive code.
+    In short, it is generally much easier to create models with Estimators
+    than with the low-level TensorFlow APIs.
+*   Estimators are themselves built on @{tf.layers}, which
+    simplifies customization.
+*   Estimators build the graph for you.
+*   Estimators provide a safe distributed training loop that controls how and
+    when to:
+    *   build the graph
+    *   initialize variables
+    *   start queues
+    *   handle exceptions
+    *   create checkpoint files and recover from failures
+    *   save summaries for TensorBoard
+
+When writing an application with Estimators, you must separate the data input
+pipeline from the model.  This separation simplifies experiments with
+different data sets.
+
+
+## Pre-made Estimators
+
+Pre-made Estimators enable you to work at a much higher conceptual level
+than the base TensorFlow APIs. You no longer have to worry about creating
+the computational graph or sessions since Estimators handle all
+the "plumbing" for you.  That is, pre-made Estimators create and manage
+@{tf.Graph$`Graph`} and @{tf.Session$`Session`} objects for you.  Furthermore,
+pre-made Estimators let you experiment with different model architectures by
+making only minimal code changes.  @{tf.estimator.DNNClassifier$`DNNClassifier`},
+for example, is a pre-made Estimator class that trains classification models
+based on dense, feed-forward neural networks.
+
+
+### Structure of a pre-made Estimators program
+
+A TensorFlow program relying on a pre-made Estimator typically consists
+of the following four steps:
+
+1.  **Write one or more dataset importing functions.** For example, you might
+    create one function to import the training set and another function to
+    import the test set. Each dataset importing function must return two
+    objects:
+
+    *   a dictionary in which the keys are feature names and the
+        values are Tensors (or SparseTensors) containing the corresponding
+        feature data
+    *   a Tensor containing one or more labels
+
+    For example, the following code illustrates the basic skeleton for
+    an input function:
+
+        def input_fn(dataset):
+           ...  # manipulate dataset, extracting the feature dict and the label
+           return feature_dict, label
+
+    (See @{$guide/datasets} for full details.)
+
+2.  **Define the feature columns.** Each @{tf.feature_column}
+    identifies a feature name, its type, and any input pre-processing.
+    For example, the following snippet creates three feature
+    columns that hold integer or floating-point data.  The first two
+    feature columns simply identify the feature's name and type. The
+    third feature column also specifies a lambda the program will invoke
+    to scale the raw data:
+
+        # Define three numeric feature columns.
+        population = tf.feature_column.numeric_column('population')
+        crime_rate = tf.feature_column.numeric_column('crime_rate')
+        median_education = tf.feature_column.numeric_column('median_education',
+                            normalizer_fn=lambda x: x - global_education_mean)
+
+3.  **Instantiate the relevant pre-made Estimator.**  For example, here's
+    a sample instantiation of a pre-made Estimator named `LinearClassifier`:
+
+        # Instantiate an estimator, passing the feature columns.
+        estimator = tf.estimator.LinearClassifier(
+            feature_columns=[population, crime_rate, median_education],
+            )
+
+4.  **Call a training, evaluation, or inference method.**
+    For example, all Estimators provide a `train` method, which trains a model.
+
+        # my_training_set is the function created in Step 1
+        estimator.train(input_fn=my_training_set, steps=2000)
+
+
+### Benefits of pre-made Estimators
+
+Pre-made Estimators encode best practices, providing the following benefits:
+
+*   Best practices for determining where different parts of the computational
+    graph should run, implementing strategies on a single machine or on a
+    cluster.
+*   Best practices for event (summary) writing and universally useful
+    summaries.
+
+If you don't use pre-made Estimators, you must implement the preceding
+features yourself.
+
+
+## Custom Estimators
+
+The heart of every Estimator--whether pre-made or custom--is its
+**model function**, which is a method that builds graphs for training,
+evaluation, and prediction. When you are using a pre-made Estimator,
+someone else has already implemented the model function. When relying
+on a custom Estimator, you must write the model function yourself. A
+@{$custom_estimators$companion document}
+explains how to write the model function.
+
+
+## Recommended workflow
+
+We recommend the following workflow:
+
+1.  Assuming a suitable pre-made Estimator exists, use it to build your
+    first model and use its results to establish a baseline.
+2.  Build and test your overall pipeline, including the integrity and
+    reliability of your data with this pre-made Estimator.
+3.  If suitable alternative pre-made Estimators are available, run
+    experiments to determine which pre-made Estimator produces the
+    best results.
+4.  Possibly, further improve your model by building your own custom Estimator.
+
+
+## Creating Estimators from Keras models
+
+You can convert existing Keras models to Estimators. Doing so enables your Keras
+model to access Estimator's strengths, such as distributed training. Call
+@{tf.keras.estimator.model_to_estimator} as in the
+following sample:
+
+```python
+# Instantiate a Keras inception v3 model.
+keras_inception_v3 = tf.keras.applications.inception_v3.InceptionV3(weights=None)
+# Compile model with the optimizer, loss, and metrics you'd like to train with.
+keras_inception_v3.compile(optimizer=tf.keras.optimizers.SGD(lr=0.0001, momentum=0.9),
+                          loss='categorical_crossentropy',
+                          metric='accuracy')
+# Create an Estimator from the compiled Keras model. Note the initial model
+# state of the keras model is preserved in the created Estimator.
+est_inception_v3 = tf.keras.estimator.model_to_estimator(keras_model=keras_inception_v3)
+
+# Treat the derived Estimator as you would with any other Estimator.
+# First, recover the input name(s) of Keras model, so we can use them as the
+# feature column name(s) of the Estimator input function:
+keras_inception_v3.input_names  # print out: ['input_1']
+# Once we have the input name(s), we can create the input function, for example,
+# for input(s) in the format of numpy ndarray:
+train_input_fn = tf.estimator.inputs.numpy_input_fn(
+    x={"input_1": train_data},
+    y=train_labels,
+    num_epochs=1,
+    shuffle=False)
+# To train, we call Estimator's train function:
+est_inception_v3.train(input_fn=train_input_fn, steps=2000)
+```
+Note that the names of feature columns and labels of a keras estimator come from
+the corresponding compiled keras model. For example, the input key names for
+`train_input_fn` above can be obtained from `keras_inception_v3.input_names`,
+and similarly, the predicted output names can be obtained from
+`keras_inception_v3.output_names`.
+
+For more details, please refer to the documentation for
+@{tf.keras.estimator.model_to_estimator}.
diff --git a/tensorflow/docs_src/guide/faq.md b/tensorflow/docs_src/guide/faq.md
new file mode 100644
index 0000000000..b6291a9ffa
--- /dev/null
+++ b/tensorflow/docs_src/guide/faq.md
@@ -0,0 +1,297 @@
+# Frequently Asked Questions
+
+This document provides answers to some of the frequently asked questions about
+TensorFlow. If you have a question that is not covered here, you might find an
+answer on one of the TensorFlow @{$about$community resources}.
+
+[TOC]
+
+## Features and Compatibility
+
+#### Can I run distributed training on multiple computers?
+
+Yes! TensorFlow gained
+@{$distributed$support for distributed computation} in
+version 0.8. TensorFlow now supports multiple devices (CPUs and GPUs) in one or
+more computers.
+
+#### Does TensorFlow work with Python 3?
+
+As of the 0.6.0 release timeframe (Early December 2015), we do support Python
+3.3+.
+
+## Building a TensorFlow graph
+
+See also the
+@{$python/framework$API documentation on building graphs}.
+
+#### Why does `c = tf.matmul(a, b)` not execute the matrix multiplication immediately?
+
+In the TensorFlow Python API, `a`, `b`, and `c` are
+@{tf.Tensor} objects. A `Tensor` object is
+a symbolic handle to the result of an operation, but does not actually hold the
+values of the operation's output. Instead, TensorFlow encourages users to build
+up complicated expressions (such as entire neural networks and its gradients) as
+a dataflow graph. You then offload the computation of the entire dataflow graph
+(or a subgraph of it) to a TensorFlow
+@{tf.Session}, which is able to execute the
+whole computation much more efficiently than executing the operations
+one-by-one.
+
+#### How are devices named?
+
+The supported device names are `"/device:CPU:0"` (or `"/cpu:0"`) for the CPU
+device, and `"/device:GPU:i"` (or `"/gpu:i"`) for the *i*th GPU device.
+
+#### How do I place operations on a particular device?
+
+To place a group of operations on a device, create them within a
+@{tf.device$`with tf.device(name):`} context.  See
+the how-to documentation on
+@{$using_gpu$using GPUs with TensorFlow} for details of how
+TensorFlow assigns operations to devices, and the
+@{$deep_cnn$CIFAR-10 tutorial} for an example model that
+uses multiple GPUs.
+
+
+## Running a TensorFlow computation
+
+See also the
+@{$python/client$API documentation on running graphs}.
+
+#### What's the deal with feeding and placeholders?
+
+Feeding is a mechanism in the TensorFlow Session API that allows you to
+substitute different values for one or more tensors at run time. The `feed_dict`
+argument to @{tf.Session.run} is a
+dictionary that maps @{tf.Tensor} objects to
+numpy arrays (and some other types), which will be used as the values of those
+tensors in the execution of a step.
+
+#### What is the difference between `Session.run()` and `Tensor.eval()`?
+
+If `t` is a @{tf.Tensor} object,
+@{tf.Tensor.eval} is shorthand for
+@{tf.Session.run}, where `sess` is the
+current @{tf.get_default_session}. The
+two following snippets of code are equivalent:
+
+```python
+# Using `Session.run()`.
+sess = tf.Session()
+c = tf.constant(5.0)
+print(sess.run(c))
+
+# Using `Tensor.eval()`.
+c = tf.constant(5.0)
+with tf.Session():
+  print(c.eval())
+```
+
+In the second example, the session acts as a
+[context manager](https://docs.python.org/2.7/reference/compound_stmts.html#with),
+which has the effect of installing it as the default session for the lifetime of
+the `with` block. The context manager approach can lead to more concise code for
+simple use cases (like unit tests); if your code deals with multiple graphs and
+sessions, it may be more straightforward to make explicit calls to
+`Session.run()`.
+
+#### Do Sessions have a lifetime? What about intermediate tensors?
+
+Sessions can own resources, such as
+@{tf.Variable},
+@{tf.QueueBase}, and
+@{tf.ReaderBase}. These resources can sometimes use
+a significant amount of memory, and can be released when the session is closed by calling
+@{tf.Session.close}.
+
+The intermediate tensors that are created as part of a call to
+@{$python/client$`Session.run()`} will be freed at or before the
+end of the call.
+
+#### Does the runtime parallelize parts of graph execution?
+
+The TensorFlow runtime parallelizes graph execution across many different
+dimensions:
+
+* The individual ops have parallel implementations, using multiple cores in a
+  CPU, or multiple threads in a GPU.
+* Independent nodes in a TensorFlow graph can run in parallel on multiple
+  devices, which makes it possible to speed up
+  @{$deep_cnn$CIFAR-10 training using multiple GPUs}.
+* The Session API allows multiple concurrent steps (i.e. calls to
+  @{tf.Session.run} in parallel). This
+  enables the runtime to get higher throughput, if a single step does not use
+  all of the resources in your computer.
+
+#### Which client languages are supported in TensorFlow?
+
+TensorFlow is designed to support multiple client languages.
+Currently, the best-supported client language is [Python](../api_docs/python/index.md). Experimental interfaces for
+executing and constructing graphs are also available for
+[C++](../api_docs/cc/index.md), [Java](../api_docs/java/reference/org/tensorflow/package-summary.html) and [Go](https://godoc.org/github.com/tensorflow/tensorflow/tensorflow/go).
+
+TensorFlow also has a
+[C-based client API](https://www.tensorflow.org/code/tensorflow/c/c_api.h)
+to help build support for more client languages.  We invite contributions of new
+language bindings.
+
+Bindings for various other languages (such as [C#](https://github.com/migueldeicaza/TensorFlowSharp), [Julia](https://github.com/malmaud/TensorFlow.jl), [Ruby](https://github.com/somaticio/tensorflow.rb) and [Scala](https://github.com/eaplatanios/tensorflow_scala)) created and supported by the open source community build on top of the C API supported by the TensorFlow maintainers.
+
+#### Does TensorFlow make use of all the devices (GPUs and CPUs) available on my machine?
+
+TensorFlow supports multiple GPUs and CPUs. See the how-to documentation on
+@{$using_gpu$using GPUs with TensorFlow} for details of how
+TensorFlow assigns operations to devices, and the
+@{$deep_cnn$CIFAR-10 tutorial} for an example model that
+uses multiple GPUs.
+
+Note that TensorFlow only uses GPU devices with a compute capability greater
+than 3.5.
+
+#### Why does `Session.run()` hang when using a reader or a queue?
+
+The @{tf.ReaderBase} and
+@{tf.QueueBase} classes provide special operations that
+can *block* until input (or free space in a bounded queue) becomes
+available. These operations allow you to build sophisticated
+@{$reading_data$input pipelines}, at the cost of making the
+TensorFlow computation somewhat more complicated. See the how-to documentation
+for
+@{$reading_data#creating_threads_to_prefetch_using_queuerunner_objects$using `QueueRunner` objects to drive queues and readers}
+for more information on how to use them.
+
+## Variables
+
+See also the how-to documentation on @{$variables$variables} and
+@{$python/state_ops$the API documentation for variables}.
+
+#### What is the lifetime of a variable?
+
+A variable is created when you first run the
+@{tf.Variable.initializer}
+operation for that variable in a session. It is destroyed when that
+@{tf.Session.close}.
+
+#### How do variables behave when they are concurrently accessed?
+
+Variables allow concurrent read and write operations. The value read from a
+variable may change if it is concurrently updated. By default, concurrent
+assignment operations to a variable are allowed to run with no mutual exclusion.
+To acquire a lock when assigning to a variable, pass `use_locking=True` to
+@{tf.Variable.assign}.
+
+## Tensor shapes
+
+See also the
+@{tf.TensorShape}.
+
+#### How can I determine the shape of a tensor in Python?
+
+In TensorFlow, a tensor has both a static (inferred) shape and a dynamic (true)
+shape. The static shape can be read using the
+@{tf.Tensor.get_shape}
+method: this shape is inferred from the operations that were used to create the
+tensor, and may be
+@{tf.TensorShape$partially complete}. If the static
+shape is not fully defined, the dynamic shape of a `Tensor` `t` can be
+determined by evaluating @{tf.shape$`tf.shape(t)`}.
+
+#### What is the difference between `x.set_shape()` and `x = tf.reshape(x)`?
+
+The @{tf.Tensor.set_shape} method updates
+the static shape of a `Tensor` object, and it is typically used to provide
+additional shape information when this cannot be inferred directly. It does not
+change the dynamic shape of the tensor.
+
+The @{tf.reshape} operation creates
+a new tensor with a different dynamic shape.
+
+#### How do I build a graph that works with variable batch sizes?
+
+It is often useful to build a graph that works with variable batch sizes 
+so that the same code can be used for (mini-)batch training, and
+single-instance inference. The resulting graph can be
+@{tf.Graph.as_graph_def$saved as a protocol buffer}
+and
+@{tf.import_graph_def$imported into another program}.
+
+When building a variable-size graph, the most important thing to remember is not
+to encode the batch size as a Python constant, but instead to use a symbolic
+`Tensor` to represent it. The following tips may be useful:
+
+* Use [`batch_size = tf.shape(input)[0]`](../api_docs/python/array_ops.md#shape)
+  to extract the batch dimension from a `Tensor` called `input`, and store it in
+  a `Tensor` called `batch_size`.
+
+* Use @{tf.reduce_mean} instead
+  of `tf.reduce_sum(...) / batch_size`.
+
+
+## TensorBoard
+
+#### How can I visualize a TensorFlow graph?
+
+See the @{$graph_viz$graph visualization tutorial}.
+
+#### What is the simplest way to send data to TensorBoard?
+
+Add summary ops to your TensorFlow graph, and write
+these summaries to a log directory.  Then, start TensorBoard using
+
+    python tensorflow/tensorboard/tensorboard.py --logdir=path/to/log-directory
+
+For more details, see the
+@{$summaries_and_tensorboard$Summaries and TensorBoard tutorial}.
+
+#### Every time I launch TensorBoard, I get a network security popup!
+
+You can change TensorBoard to serve on localhost rather than '0.0.0.0' by
+the flag --host=localhost. This should quiet any security warnings.
+
+## Extending TensorFlow
+
+See the how-to documentation for
+@{$adding_an_op$adding a new operation to TensorFlow}.
+
+#### My data is in a custom format. How do I read it using TensorFlow?
+
+There are three main options for dealing with data in a custom format.
+
+The easiest option is to write parsing code in Python that transforms the data
+into a numpy array. Then, use @{tf.data.Dataset.from_tensor_slices} to
+create an input pipeline from the in-memory data.
+
+If your data doesn't fit in memory, try doing the parsing in the Dataset
+pipeline. Start with an appropriate file reader, like
+@{tf.data.TextLineDataset}. Then convert the dataset by mapping
+@{tf.data.Dataset.map$mapping} appropriate operations over it.
+Prefer predefined TensorFlow operations such as @{tf.decode_raw},
+@{tf.decode_csv}, @{tf.parse_example}, or @{tf.image.decode_png}.
+
+If your data is not easily parsable with the built-in TensorFlow operations,
+consider converting it, offline, to a format that is easily parsable, such
+as @{tf.python_io.TFRecordWriter$`TFRecord`} format.
+
+The most efficient method to customize the parsing behavior is to
+@{$adding_an_op$add a new op written in C++} that parses your
+data format. The @{$new_data_formats$guide to handling new data formats} has
+more information about the steps for doing this.
+
+
+## Miscellaneous
+
+#### What is TensorFlow's coding style convention?
+
+The TensorFlow Python API adheres to the
+[PEP8](https://www.python.org/dev/peps/pep-0008/) conventions.<sup>*</sup> In
+particular, we use `CamelCase` names for classes, and `snake_case` names for
+functions, methods, and properties. We also adhere to the
+[Google Python style guide](https://google.github.io/styleguide/pyguide.html).
+
+The TensorFlow C++ code base adheres to the
+[Google C++ style guide](https://google.github.io/styleguide/cppguide.html).
+
+(<sup>*</sup> With one exception: we use 2-space indentation instead of 4-space
+indentation.)
+
diff --git a/tensorflow/docs_src/guide/feature_columns.md b/tensorflow/docs_src/guide/feature_columns.md
new file mode 100644
index 0000000000..1013ec910c
--- /dev/null
+++ b/tensorflow/docs_src/guide/feature_columns.md
@@ -0,0 +1,572 @@
+# Feature Columns
+
+This document details feature columns. Think of **feature columns** as the
+intermediaries between raw data and Estimators. Feature columns are very rich,
+enabling you to transform a diverse range of raw data into formats that
+Estimators can use, allowing easy experimentation.
+
+In @{$premade_estimators$Premade Estimators}, we used the premade
+Estimator, @{tf.estimator.DNNClassifier$`DNNClassifier`} to train a model to
+predict different types of Iris flowers from four input features. That example
+created only numerical feature columns (of type
+@{tf.feature_column.numeric_column}). Although numerical feature columns model
+the lengths of petals and sepals effectively, real world data sets contain all
+kinds of features, many of which are non-numerical.
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/feature_columns/feature_cloud.jpg">
+</div>
+<div style="text-align: center">
+Some real-world features (such as, longitude) are numerical, but many are not.
+</div>
+
+## Input to a Deep Neural Network
+
+What kind of data can a deep neural network operate on? The answer
+is, of course, numbers (for example, `tf.float32`). After all, every neuron in
+a neural network performs multiplication and addition operations on weights and
+input data. Real-life input data, however, often contains non-numerical
+(categorical) data. For example, consider a `product_class` feature that can
+contain the following three non-numerical values:
+
+* `kitchenware`
+* `electronics`
+* `sports`
+
+ML models generally represent categorical values as simple vectors in which a
+1 represents the presence of a value and a 0 represents the absence of a value.
+For example, when `product_class` is set to `sports`, an ML model would usually
+represent `product_class` as  `[0, 0, 1]`, meaning:
+
+* `0`: `kitchenware` is absent
+* `0`: `electronics` is absent
+* `1`: `sports` is present
+
+So, although raw data can be numerical or categorical, an ML model represents
+all features as numbers.
+
+## Feature Columns
+
+As the following figure suggests, you specify the input to a model through the
+`feature_columns` argument of an Estimator (`DNNClassifier` for Iris).
+Feature Columns bridge input data (as returned by `input_fn`) with your model.
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/feature_columns/inputs_to_model_bridge.jpg">
+</div>
+<div style="text-align: center">
+Feature columns bridge raw data with the data your model needs.
+</div>
+
+To create feature columns, call functions from the
+@{tf.feature_column} module. This document explains nine of the functions in
+that module. As the following figure shows, all nine functions return either a
+Categorical-Column or a Dense-Column object, except `bucketized_column`, which
+inherits from both classes:
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/feature_columns/some_constructors.jpg">
+</div>
+<div style="text-align: center">
+Feature column methods fall into two main categories and one hybrid category.
+</div>
+
+Let's look at these functions in more detail.
+
+### Numeric column
+
+The Iris classifier calls the @{tf.feature_column.numeric_column} function for
+all input features:
+
+  * `SepalLength`
+  * `SepalWidth`
+  * `PetalLength`
+  * `PetalWidth`
+
+Although `tf.numeric_column` provides optional arguments, calling
+`tf.numeric_column` without any arguments, as follows, is a fine way to specify
+a numerical value with the default data type (`tf.float32`) as input to your
+model:
+
+```python
+# Defaults to a tf.float32 scalar.
+numeric_feature_column = tf.feature_column.numeric_column(key="SepalLength")
+```
+
+To specify a non-default numerical data type, use the `dtype` argument. For
+example:
+
+``` python
+# Represent a tf.float64 scalar.
+numeric_feature_column = tf.feature_column.numeric_column(key="SepalLength",
+                                                          dtype=tf.float64)
+```
+
+By default, a numeric column creates a single value (scalar). Use the shape
+argument to specify another shape. For example:
+
+<!--TODO(markdaoust) link to full example-->
+```python
+# Represent a 10-element vector in which each cell contains a tf.float32.
+vector_feature_column = tf.feature_column.numeric_column(key="Bowling",
+                                                         shape=10)
+
+# Represent a 10x5 matrix in which each cell contains a tf.float32.
+matrix_feature_column = tf.feature_column.numeric_column(key="MyMatrix",
+                                                         shape=[10,5])
+```
+### Bucketized column
+
+Often, you don't want to feed a number directly into the model, but instead
+split its value into different categories based on numerical ranges.  To do so,
+create a @{tf.feature_column.bucketized_column$bucketized column}. For
+example, consider raw data that represents the year a house was built. Instead
+of representing that year as a scalar numeric column, we could split the year
+into the following four buckets:
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/feature_columns/bucketized_column.jpg">
+</div>
+<div style="text-align: center">
+Dividing year data into four buckets.
+</div>
+
+The model will represent the buckets as follows:
+
+|Date Range |Represented as... |
+|:----------|:-----------------|
+|< 1960               | [1, 0, 0, 0] |
+|>= 1960 but < 1980   | [0, 1, 0, 0] |
+|>= 1980 but < 2000   | [0, 0, 1, 0] |
+|>= 2000              | [0, 0, 0, 1] |
+
+Why would you want to split a number—a perfectly valid input to your
+model—into a categorical value? Well, notice that the categorization splits a
+single input number into a four-element vector. Therefore, the model now can
+learn _four individual weights_ rather than just one; four weights creates a
+richer model than one weight. More importantly, bucketizing enables the model
+to clearly distinguish between different year categories since only one of the
+elements is set (1) and the other three elements are cleared (0). For example,
+when we just use a single number (a year) as input, a linear model can only
+learn a linear relationship. So, bucketing provides the model with additional
+flexibility that the model can use to learn.
+
+The following code demonstrates how to create a bucketized feature:
+
+<!--TODO(markdaoust) link to full example - housing price grid?-->
+```python
+# First, convert the raw input to a numeric column.
+numeric_feature_column = tf.feature_column.numeric_column("Year")
+
+# Then, bucketize the numeric column on the years 1960, 1980, and 2000.
+bucketized_feature_column = tf.feature_column.bucketized_column(
+    source_column = numeric_feature_column,
+    boundaries = [1960, 1980, 2000])
+```
+Note that specifying a _three_-element boundaries vector creates a
+_four_-element bucketized vector.
+
+
+### Categorical identity column
+
+**Categorical identity columns** can be seen as a special case of bucketized
+columns. In traditional bucketized columns, each bucket represents a range of
+values (for example, from 1960 to 1979). In a categorical identity column, each
+bucket represents a single, unique integer. For example, let's say you want to
+represent the integer range `[0, 4)`.  That is, you want to represent the
+integers 0, 1, 2, or 3. In this case, the categorical identity mapping looks
+like this:
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/feature_columns/categorical_column_with_identity.jpg">
+</div>
+<div style="text-align: center">
+A categorical identity column mapping. Note that this is a one-hot
+encoding, not a binary numerical encoding.
+</div>
+
+As with bucketized columns, a model can learn a separate weight for each class
+in a categorical identity column. For example, instead of using a string to
+represent the `product_class`, let's represent each class with a unique integer
+value. That is:
+
+* `0="kitchenware"`
+* `1="electronics"`
+* `2="sport"`
+
+Call @{tf.feature_column.categorical_column_with_identity} to implement a
+categorical identity column. For example:
+
+``` python
+# Create categorical output for an integer feature named "my_feature_b",
+# The values of my_feature_b must be >= 0 and < num_buckets
+identity_feature_column = tf.feature_column.categorical_column_with_identity(
+    key='my_feature_b',
+    num_buckets=4) # Values [0, 4)
+
+# In order for the preceding call to work, the input_fn() must return
+# a dictionary containing 'my_feature_b' as a key. Furthermore, the values
+# assigned to 'my_feature_b' must belong to the set [0, 4).
+def input_fn():
+    ...
+    return ({ 'my_feature_a':[7, 9, 5, 2], 'my_feature_b':[3, 1, 2, 2] },
+            [Label_values])
+```
+
+### Categorical vocabulary column
+
+We cannot input strings directly to a model. Instead, we must first map strings
+to numeric or categorical values. Categorical vocabulary columns provide a good
+way to represent strings as a one-hot vector. For example:
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/feature_columns/categorical_column_with_vocabulary.jpg">
+</div>
+<div style="text-align: center">
+Mapping string values to vocabulary columns.
+</div>
+
+As you can see, categorical vocabulary columns are kind of an enum version of
+categorical identity columns. TensorFlow provides two different functions to
+create categorical vocabulary columns:
+
+* @{tf.feature_column.categorical_column_with_vocabulary_list}
+* @{tf.feature_column.categorical_column_with_vocabulary_file}
+
+`categorical_column_with_vocabulary_list` maps each string to an integer based
+on an explicit vocabulary list. For example:
+
+```python
+# Given input "feature_name_from_input_fn" which is a string,
+# create a categorical feature by mapping the input to one of
+# the elements in the vocabulary list.
+vocabulary_feature_column =
+    tf.feature_column.categorical_column_with_vocabulary_list(
+        key=feature_name_from_input_fn,
+        vocabulary_list=["kitchenware", "electronics", "sports"])
+```
+
+The preceding function is pretty straightforward, but it has a significant
+drawback. Namely, there's way too much typing when the vocabulary list is long.
+For these cases, call
+`tf.feature_column.categorical_column_with_vocabulary_file` instead, which lets
+you place the vocabulary words in a separate file. For example:
+
+```python
+
+# Given input "feature_name_from_input_fn" which is a string,
+# create a categorical feature to our model by mapping the input to one of
+# the elements in the vocabulary file
+vocabulary_feature_column =
+    tf.feature_column.categorical_column_with_vocabulary_file(
+        key=feature_name_from_input_fn,
+        vocabulary_file="product_class.txt",
+        vocabulary_size=3)
+```
+
+`product_class.txt` should contain one line for each vocabulary element. In our
+case:
+
+```None
+kitchenware
+electronics
+sports
+```
+
+### Hashed Column
+
+So far, we've worked with a naively small number of categories. For example,
+our product_class example has only 3 categories. Often though, the number of
+categories can be so big that it's not possible to have individual categories
+for each vocabulary word or integer because that would consume too much memory.
+For these cases, we can instead turn the question around and ask, "How many
+categories am I willing to have for my input?"  In fact, the
+@{tf.feature_column.categorical_column_with_hash_bucket} function enables you
+to specify the number of categories. For this type of feature column the model
+calculates a hash value of the input, then puts it into one of
+the `hash_bucket_size` categories using the modulo operator, as in the following
+pseudocode:
+
+```python
+# pseudocode
+feature_id = hash(raw_feature) % hash_buckets_size
+```
+
+The code to create the `feature_column` might look something like this:
+
+``` python
+hashed_feature_column =
+    tf.feature_column.categorical_column_with_hash_bucket(
+        key = "some_feature",
+        hash_buckets_size = 100) # The number of categories
+```
+At this point, you might rightfully think: "This is crazy!" After all, we are
+forcing the different input values to a smaller set of categories. This means
+that two probably unrelated inputs will be mapped to the same
+category, and consequently mean the same thing to the neural network. The
+following figure illustrates this dilemma, showing that kitchenware and sports
+both get assigned to category (hash bucket) 12:
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/feature_columns/hashed_column.jpg">
+</div>
+<div style="text-align: center">
+Representing data with hash buckets.
+</div>
+
+As with many counterintuitive phenomena in machine learning, it turns out that
+hashing often works well in practice. That's because hash categories provide
+the model with some separation. The model can use additional features to further
+separate kitchenware from sports.
+
+### Crossed column
+
+Combining features into a single feature, better known as
+[feature crosses](https://developers.google.com/machine-learning/glossary/#feature_cross),
+enables the model to learn separate weights for each combination of
+features.
+
+More concretely, suppose we want our model to calculate real estate prices in
+Atlanta, GA. Real-estate prices within this city vary greatly depending on
+location. Representing latitude and longitude as separate features isn't very
+useful in identifying real-estate location dependencies; however, crossing
+latitude and longitude into a single feature can pinpoint locations. Suppose we
+represent Atlanta as a grid of 100x100 rectangular sections, identifying each
+of the 10,000 sections by a feature cross of latitude and longitude. This
+feature cross enables the model to train on pricing conditions related to each
+individual section, which is a much stronger signal than latitude and longitude
+alone.
+
+The following figure shows our plan, with the latitude & longitude values for
+the corners of the city in red text:
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/feature_columns/Atlanta.jpg">
+</div>
+<div style="text-align: center">
+Map of Atlanta. Imagine this map divided into 10,000 sections of
+equal size.
+</div>
+
+For the solution, we used a combination of the `bucketized_column` we looked at
+earlier, with the @{tf.feature_column.crossed_column} function.
+
+<!--TODO(markdaoust) link to full example-->
+
+``` python
+def make_dataset(latitude, longitude, labels):
+    assert latitude.shape == longitude.shape == labels.shape
+
+    features = {'latitude': latitude.flatten(),
+                'longitude': longitude.flatten()}
+    labels=labels.flatten()
+
+    return tf.data.Dataset.from_tensor_slices((features, labels))
+
+
+# Bucketize the latitude and longitude using the `edges`
+latitude_bucket_fc = tf.feature_column.bucketized_column(
+    tf.feature_column.numeric_column('latitude'),
+    list(atlanta.latitude.edges))
+
+longitude_bucket_fc = tf.feature_column.bucketized_column(
+    tf.feature_column.numeric_column('longitude'),
+    list(atlanta.longitude.edges))
+
+# Cross the bucketized columns, using 5000 hash bins.
+crossed_lat_lon_fc = tf.feature_column.crossed_column(
+    [latitude_bucket_fc, longitude_bucket_fc], 5000)
+
+fc = [
+    latitude_bucket_fc,
+    longitude_bucket_fc,
+    crossed_lat_lon_fc]
+
+# Build and train the Estimator.
+est = tf.estimator.LinearRegressor(fc, ...)
+```
+
+You may create a feature cross from either of the following:
+
+* Feature names; that is, names from the `dict` returned from `input_fn`.
+* Any categorical column, except `categorical_column_with_hash_bucket`
+  (since `crossed_column` hashes the input).
+
+When the feature columns `latitude_bucket_fc` and `longitude_bucket_fc` are
+crossed, TensorFlow will create `(latitude_fc, longitude_fc)` pairs for each
+example. This would produce a full grid of possibilities as follows:
+
+``` None
+ (0,0),  (0,1)...  (0,99)
+ (1,0),  (1,1)...  (1,99)
+   ...     ...       ...
+(99,0), (99,1)...(99, 99)
+```
+
+Except that a full grid would only be tractable for inputs with limited
+vocabularies. Instead of building this, potentially huge, table of inputs,
+the `crossed_column` only builds the number requested by the `hash_bucket_size`
+argument. The feature column assigns an example to a index by running a hash
+function on the tuple of inputs, followed by a modulo operation with
+`hash_bucket_size`.
+
+As discussed earlier, performing the
+hash and modulo function limits the number of categories, but can cause category
+collisions; that is, multiple (latitude, longitude) feature crosses will end
+up in the same hash bucket. In practice though, performing feature crosses
+still adds significant value to the learning capability of your models.
+
+Somewhat counterintuitively, when creating feature crosses, you typically still
+should include the original (uncrossed) features in your model (as in the
+preceding code snippet). The independent latitude and longitude features help the
+model distinguish between examples where a hash collision has occurred in the
+crossed feature.
+
+## Indicator and embedding columns
+
+Indicator columns and embedding columns never work on features directly, but
+instead take categorical columns as input.
+
+When using an indicator column, we're telling TensorFlow to do exactly what
+we've seen in our categorical product_class example. That is, an
+**indicator column** treats each category as an element in a one-hot vector,
+where the matching category has value 1 and the rest have 0s:
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/feature_columns/categorical_column_with_identity.jpg">
+</div>
+<div style="text-align: center">
+Representing data in indicator columns.
+</div>
+
+Here's how you create an indicator column by calling
+@{tf.feature_column.indicator_column}:
+
+``` python
+categorical_column = ... # Create any type of categorical column.
+
+# Represent the categorical column as an indicator column.
+indicator_column = tf.feature_column.indicator_column(categorical_column)
+```
+
+Now, suppose instead of having just three possible classes, we have a million.
+Or maybe a billion. For a number of reasons, as the number of categories grow
+large, it becomes infeasible to train a neural network using indicator columns.
+
+We can use an embedding column to overcome this limitation. Instead of
+representing the data as a one-hot vector of many dimensions, an
+**embedding column** represents that data as a lower-dimensional, ordinary
+vector in which each cell can contain any number, not just 0 or 1. By
+permitting a richer palette of numbers for every cell, an embedding column
+contains far fewer cells than an indicator column.
+
+Let's look at an example comparing indicator and embedding columns. Suppose our
+input examples consist of different words from a limited palette of only 81
+words. Further suppose that the data set provides the following input
+words in 4 separate examples:
+
+* `"dog"`
+* `"spoon"`
+* `"scissors"`
+* `"guitar"`
+
+In that case, the following figure illustrates the processing path for
+embedding columns or indicator columns.
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/feature_columns/embedding_vs_indicator.jpg">
+</div>
+<div style="text-align: center">
+An embedding column stores categorical data in a lower-dimensional
+vector than an indicator column. (We just placed random numbers into the
+embedding vectors; training determines the actual numbers.)
+</div>
+
+When an example is processed, one of the `categorical_column_with...` functions
+maps the example string to a numerical categorical value. For example, a
+function maps "spoon" to `[32]`. (The 32 comes from our imagination—the actual
+values depend on the mapping function.) You may then represent these numerical
+categorical values in either of the following two ways:
+
+* As an indicator column. A function converts each numeric categorical value
+  into an 81-element vector (because our palette consists of 81 words), placing
+  a 1 in the index of the categorical value (0, 32, 79, 80) and a 0 in all the
+  other positions.
+
+* As an embedding column. A function uses the numerical categorical values
+  `(0, 32, 79, 80)` as indices to a lookup table. Each slot in that lookup table
+  contains a 3-element vector.
+
+How do the values in the embeddings vectors magically get assigned? Actually,
+the assignments happen during training. That is, the model learns the best way
+to map your input numeric categorical values to the embeddings vector value in
+order to solve your problem. Embedding columns increase your model's
+capabilities, since an embeddings vector learns new relationships between
+categories from the training data.
+
+Why is the embedding vector size 3 in our example? Well, the following "formula"
+provides a general rule of thumb about the number of embedding dimensions:
+
+```python
+embedding_dimensions =  number_of_categories**0.25
+```
+
+That is, the embedding vector dimension should be the 4th root of the number of
+categories. Since our vocabulary size in this example is 81, the recommended
+number of dimensions is 3:
+
+``` python
+3 =  81**0.25
+```
+Note that this is just a general guideline; you can set the number of embedding
+dimensions as you please.
+
+Call @{tf.feature_column.embedding_column} to create an `embedding_column` as
+suggested by the following snippet:
+
+``` python
+categorical_column = ... # Create any categorical column
+
+# Represent the categorical column as an embedding column.
+# This means creating an embedding vector lookup table with one element for each category.
+embedding_column = tf.feature_column.embedding_column(
+    categorical_column=categorical_column,
+    dimension=embedding_dimensions)
+```
+
+@{$guide/embedding$Embeddings} is a significant topic within machine
+learning. This information was just to get you started using them as feature
+columns.
+
+## Passing feature columns to Estimators
+
+As the following list indicates, not all Estimators permit all types of
+`feature_columns` argument(s):
+
+* @{tf.estimator.LinearClassifier$`LinearClassifier`} and
+  @{tf.estimator.LinearRegressor$`LinearRegressor`}: Accept all types of
+  feature column.
+* @{tf.estimator.DNNClassifier$`DNNClassifier`} and
+  @{tf.estimator.DNNRegressor$`DNNRegressor`}: Only accept dense columns. Other
+  column types must be wrapped in either an `indicator_column` or
+  `embedding_column`.
+* @{tf.estimator.DNNLinearCombinedClassifier$`DNNLinearCombinedClassifier`} and
+  @{tf.estimator.DNNLinearCombinedRegressor$`DNNLinearCombinedRegressor`}:
+    * The `linear_feature_columns` argument accepts any feature column type.
+    * The `dnn_feature_columns` argument only accepts dense columns.
+
+## Other Sources
+
+For more examples on feature columns, view the following:
+
+* The @{$low_level_intro#feature_columns$Low Level Introduction} demonstrates how
+  experiment directly with `feature_columns` using TensorFlow's low level APIs.
+* The @{$wide$wide} and @{$wide_and_deep$Wide & Deep} Tutorials solve a
+  binary classification problem using `feature_columns` on a variety of input
+  data types.
+
+To learn more about embeddings, see the following:
+
+* [Deep Learning, NLP, and representations](http://colah.github.io/posts/2014-07-NLP-RNNs-Representations/)
+  (Chris Olah's blog)
+* The TensorFlow [Embedding Projector](http://projector.tensorflow.org)
diff --git a/tensorflow/docs_src/guide/graph_viz.md b/tensorflow/docs_src/guide/graph_viz.md
new file mode 100644
index 0000000000..f581ae56da
--- /dev/null
+++ b/tensorflow/docs_src/guide/graph_viz.md
@@ -0,0 +1,316 @@
+# TensorBoard: Graph Visualization
+
+TensorFlow computation graphs are powerful but complicated. The graph visualization can help you understand and debug them. Here's an example of the visualization at work.
+
+![Visualization of a TensorFlow graph](https://www.tensorflow.org/images/graph_vis_animation.gif "Visualization of a TensorFlow graph")
+*Visualization of a TensorFlow graph.*
+
+To see your own graph, run TensorBoard pointing it to the log directory of the job, click on the graph tab on the top pane and select the appropriate run using the menu at the upper left corner. For in depth information on how to run TensorBoard and make sure you are logging all the necessary information, see @{$summaries_and_tensorboard$TensorBoard: Visualizing Learning}.
+
+## Name scoping and nodes
+
+Typical TensorFlow graphs can have many thousands of nodes--far too many to see
+easily all at once, or even to lay out using standard graph tools. To simplify,
+variable names can be scoped and the visualization uses this information to
+define a hierarchy on the nodes in the graph.  By default, only the top of this
+hierarchy is shown. Here is an example that defines three operations under the
+`hidden` name scope using
+@{tf.name_scope}:
+
+```python
+import tensorflow as tf
+
+with tf.name_scope('hidden') as scope:
+  a = tf.constant(5, name='alpha')
+  W = tf.Variable(tf.random_uniform([1, 2], -1.0, 1.0), name='weights')
+  b = tf.Variable(tf.zeros([1]), name='biases')
+```
+
+This results in the following three op names:
+
+* `hidden/alpha`
+* `hidden/weights`
+* `hidden/biases`
+
+By default, the visualization will collapse all three into a node labeled `hidden`.
+The extra detail isn't lost. You can double-click, or click
+on the orange `+` sign in the top right to expand the node, and then you'll see
+three subnodes for `alpha`, `weights` and `biases`.
+
+Here's a real-life example of a more complicated node in its initial and
+expanded states.
+
+<table width="100%;">
+  <tr>
+    <td style="width: 50%;">
+      <img src="https://www.tensorflow.org/images/pool1_collapsed.png" alt="Unexpanded name scope" title="Unexpanded name scope" />
+    </td>
+    <td style="width: 50%;">
+      <img src="https://www.tensorflow.org/images/pool1_expanded.png" alt="Expanded name scope" title="Expanded name scope" />
+    </td>
+  </tr>
+  <tr>
+    <td style="width: 50%;">
+      Initial view of top-level name scope <code>pool_1</code>. Clicking on the orange <code>+</code> button on the top right or double-clicking on the node itself will expand it.
+    </td>
+    <td style="width: 50%;">
+      Expanded view of <code>pool_1</code> name scope. Clicking on the orange <code>-</code> button on the top right or double-clicking on the node itself will collapse the name scope.
+    </td>
+  </tr>
+</table>
+
+Grouping nodes by name scopes is critical to making a legible graph. If you're
+building a model, name scopes give you control over the resulting visualization.
+**The better your name scopes, the better your visualization.**
+
+The figure above illustrates a second aspect of the visualization. TensorFlow
+graphs have two kinds of connections: data dependencies and control
+dependencies. Data dependencies show the flow of tensors between two ops and
+are shown as solid arrows, while control dependencies use dotted lines. In the
+expanded view (right side of the figure above) all the connections are data
+dependencies with the exception of the dotted line connecting `CheckNumerics`
+and `control_dependency`.
+
+There's a second trick to simplifying the layout. Most TensorFlow graphs have a
+few nodes with many connections to other nodes. For example, many nodes might
+have a control dependency on an initialization step. Drawing all edges between
+the `init` node and its dependencies would create a very cluttered view.
+
+To reduce clutter, the visualization separates out all high-degree nodes to an
+*auxiliary* area on the right and doesn't draw lines to represent their edges.
+Instead of lines, we draw small *node icons* to indicate the connections.
+Separating out the auxiliary nodes typically doesn't remove critical
+information since these nodes are usually related to bookkeeping functions.
+See [Interaction](#interaction) for how to move nodes between the main graph
+and the auxiliary area.
+
+<table width="100%;">
+  <tr>
+    <td style="width: 50%;">
+      <img src="https://www.tensorflow.org/images/conv_1.png" alt="conv_1 is part of the main graph" title="conv_1 is part of the main graph" />
+    </td>
+    <td style="width: 50%;">
+      <img src="https://www.tensorflow.org/images/save.png" alt="save is extracted as auxiliary node" title="save is extracted as auxiliary node" />
+    </td>
+  </tr>
+  <tr>
+    <td style="width: 50%;">
+      Node <code>conv_1</code> is connected to <code>save</code>. Note the little <code>save</code> node icon on its right.
+    </td>
+    <td style="width: 50%;">
+      <code>save</code> has a high degree, and will appear as an auxiliary node. The connection with <code>conv_1</code> is shown as a node icon on its left. To further reduce clutter, since <code>save</code> has a lot of connections, we show the first 5 and abbreviate the others as <code>... 12 more</code>.
+    </td>
+  </tr>
+</table>
+
+One last structural simplification is *series collapsing*. Sequential
+motifs--that is, nodes whose names differ by a number at the end and have
+isomorphic structures--are collapsed into a single *stack* of nodes, as shown
+below. For networks with long sequences, this greatly simplifies the view. As
+with hierarchical nodes, double-clicking expands the series. See
+[Interaction](#interaction) for how to disable/enable series collapsing for a
+specific set of nodes.
+
+<table width="100%;">
+  <tr>
+    <td style="width: 50%;">
+      <img src="https://www.tensorflow.org/images/series.png" alt="Sequence of nodes" title="Sequence of nodes" />
+    </td>
+    <td style="width: 50%;">
+      <img src="https://www.tensorflow.org/images/series_expanded.png" alt="Expanded sequence of nodes" title="Expanded sequence of nodes" />
+    </td>
+  </tr>
+  <tr>
+    <td style="width: 50%;">
+      A collapsed view of a node sequence.
+    </td>
+    <td style="width: 50%;">
+      A small piece of the expanded view, after double-click.
+    </td>
+  </tr>
+</table>
+
+Finally, as one last aid to legibility, the visualization uses special icons
+for constants and summary nodes. To summarize, here's a table of node symbols:
+
+Symbol | Meaning
+--- | ---
+![Name scope](https://www.tensorflow.org/images/namespace_node.png "Name scope") | *High-level* node representing a name scope. Double-click to expand a high-level node.
+![Sequence of unconnected nodes](https://www.tensorflow.org/images/horizontal_stack.png "Sequence of unconnected nodes") | Sequence of numbered nodes that are not connected to each other.
+![Sequence of connected nodes](https://www.tensorflow.org/images/vertical_stack.png "Sequence of connected nodes") | Sequence of numbered nodes that are connected to each other.
+![Operation node](https://www.tensorflow.org/images/op_node.png "Operation node") | An individual operation node.
+![Constant node](https://www.tensorflow.org/images/constant.png "Constant node") | A constant.
+![Summary node](https://www.tensorflow.org/images/summary.png "Summary node") | A summary node.
+![Data flow edge](https://www.tensorflow.org/images/dataflow_edge.png "Data flow edge") | Edge showing the data flow between operations.
+![Control dependency edge](https://www.tensorflow.org/images/control_edge.png "Control dependency edge") | Edge showing the control dependency between operations.
+![Reference edge](https://www.tensorflow.org/images/reference_edge.png "Reference edge") | A reference edge showing that the outgoing operation node can mutate the incoming tensor.
+
+## Interaction {#interaction}
+
+Navigate the graph by panning and zooming. Click and drag to pan, and use a
+scroll gesture to zoom. Double-click on a node, or click on its `+` button, to
+expand a name scope that represents a group of operations. To easily keep
+track of the current viewpoint when zooming and panning, there is a minimap in
+the bottom right corner.
+
+To close an open node, double-click it again or click its `-` button. You can
+also click once to select a node. It will turn a darker color, and details
+about it and the nodes it connects to will appear in the info card at upper
+right corner of the visualization.
+
+<table width="100%;">
+  <tr>
+    <td style="width: 50%;">
+      <img src="https://www.tensorflow.org/images/infocard.png" alt="Info card of a name scope" title="Info card of a name scope" />
+    </td>
+    <td style="width: 50%;">
+      <img src="https://www.tensorflow.org/images/infocard_op.png" alt="Info card of operation node" title="Info card of operation node" />
+    </td>
+  </tr>
+  <tr>
+    <td style="width: 50%;">
+      Info card showing detailed information for the <code>conv2</code> name scope. The inputs and outputs are combined from the inputs and outputs of the operation nodes inside the name scope. For name scopes no attributes are shown.
+    </td>
+    <td style="width: 50%;">
+      Info card showing detailed information for the <code>DecodeRaw</code> operation node. In addition to inputs and outputs, the card shows the device and the attributes associated with the current operation.
+    </td>
+  </tr>
+</table>
+
+TensorBoard provides several ways to change the visual layout of the graph. This
+doesn't change the graph's computational semantics, but it can bring some
+clarity to the network's structure. By right clicking on a node or pressing
+buttons on the bottom of that node's info card, you can make the following
+changes to its layout:
+
+* Nodes can be moved between the main graph and the auxiliary area.
+* A series of nodes can be ungrouped so that the nodes in the series do not
+appear grouped together. Ungrouped series can likewise be regrouped.
+
+Selection can also be helpful in understanding high-degree nodes. Select any
+high-degree node, and the corresponding node icons for its other connections
+will be selected as well. This makes it easy, for example, to see which nodes
+are being saved--and which aren't.
+
+Clicking on a node name in the info card will select it. If necessary, the
+viewpoint will automatically pan so that the node is visible.
+
+Finally, you can choose two color schemes for your graph, using the color menu
+above the legend. The default *Structure View* shows structure: when two
+high-level nodes have the same structure, they appear in the same color of the
+rainbow. Uniquely structured nodes are gray. There's a second view, which shows
+what device the different operations run on. Name scopes are colored
+proportionally to the fraction of devices for the operations inside them.
+
+The images below give an illustration for a piece of a real-life graph.
+
+<table width="100%;">
+  <tr>
+    <td style="width: 50%;">
+      <img src="https://www.tensorflow.org/images/colorby_structure.png" alt="Color by structure" title="Color by structure" />
+    </td>
+    <td style="width: 50%;">
+      <img src="https://www.tensorflow.org/images/colorby_device.png" alt="Color by device" title="Color by device" />
+    </td>
+  </tr>
+  <tr>
+    <td style="width: 50%;">
+      Structure view: The gray nodes have unique structure. The orange <code>conv1</code> and <code>conv2</code> nodes have the same structure, and analogously for nodes with other colors.
+    </td>
+    <td style="width: 50%;">
+      Device view: Name scopes are colored proportionally to the fraction of devices of the operation nodes inside them. Here, purple means GPU and the green is CPU.
+    </td>
+  </tr>
+</table>
+
+## Tensor shape information
+
+When the serialized `GraphDef` includes tensor shapes, the graph visualizer
+labels edges with tensor dimensions, and edge thickness reflects total tensor
+size. To include tensor shapes in the `GraphDef` pass the actual graph object
+(as in `sess.graph`) to the `FileWriter` when serializing the graph.
+The images below show the CIFAR-10 model with tensor shape information:
+<table width="100%;">
+  <tr>
+    <td style="width: 100%;">
+      <img src="https://www.tensorflow.org/images/tensor_shapes.png" alt="CIFAR-10 model with tensor shape information" title="CIFAR-10 model with tensor shape information" />
+    </td>
+  </tr>
+  <tr>
+    <td style="width: 100%;">
+      CIFAR-10 model with tensor shape information.
+    </td>
+  </tr>
+</table>
+
+## Runtime statistics
+
+Often it is useful to collect runtime metadata for a run, such as total memory
+usage, total compute time, and tensor shapes for nodes. The code example below
+is a snippet from the train and test section of a modification of the
+@{$layers$simple MNIST tutorial}, in which we have recorded summaries and
+runtime statistics. See the
+@{$summaries_and_tensorboard#serializing-the-data$Summaries Tutorial}
+for details on how to record summaries.
+Full source is [here](https://www.tensorflow.org/code/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py).
+
+```python
+  # Train the model, and also write summaries.
+  # Every 10th step, measure test-set accuracy, and write test summaries
+  # All other steps, run train_step on training data, & add training summaries
+
+  def feed_dict(train):
+    """Make a TensorFlow feed_dict: maps data onto Tensor placeholders."""
+    if train or FLAGS.fake_data:
+      xs, ys = mnist.train.next_batch(100, fake_data=FLAGS.fake_data)
+      k = FLAGS.dropout
+    else:
+      xs, ys = mnist.test.images, mnist.test.labels
+      k = 1.0
+    return {x: xs, y_: ys, keep_prob: k}
+
+  for i in range(FLAGS.max_steps):
+    if i % 10 == 0:  # Record summaries and test-set accuracy
+      summary, acc = sess.run([merged, accuracy], feed_dict=feed_dict(False))
+      test_writer.add_summary(summary, i)
+      print('Accuracy at step %s: %s' % (i, acc))
+    else:  # Record train set summaries, and train
+      if i % 100 == 99:  # Record execution stats
+        run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
+        run_metadata = tf.RunMetadata()
+        summary, _ = sess.run([merged, train_step],
+                              feed_dict=feed_dict(True),
+                              options=run_options,
+                              run_metadata=run_metadata)
+        train_writer.add_run_metadata(run_metadata, 'step%d' % i)
+        train_writer.add_summary(summary, i)
+        print('Adding run metadata for', i)
+      else:  # Record a summary
+        summary, _ = sess.run([merged, train_step], feed_dict=feed_dict(True))
+        train_writer.add_summary(summary, i)
+```
+
+This code will emit runtime statistics for every 100th step starting at step99.
+
+When you launch tensorboard and go to the Graph tab, you will now see options
+under "Session runs" which correspond to the steps where run metadata was added.
+Selecting one of these runs will show you the snapshot of the network at that
+step, fading out unused nodes. In the controls on the left hand side, you will
+be able to color the nodes by total memory or total compute time. Additionally,
+clicking on a node will display the exact total memory, compute time, and
+tensor output sizes.
+
+
+<table width="100%;">
+  <tr style="height: 380px">
+    <td>
+      <img src="https://www.tensorflow.org/images/colorby_compute_time.png" alt="Color by compute time" title="Color by compute time"/>
+    </td>
+    <td>
+      <img src="https://www.tensorflow.org/images/run_metadata_graph.png" alt="Run metadata graph" title="Run metadata graph" />
+    </td>
+    <td>
+      <img src="https://www.tensorflow.org/images/run_metadata_infocard.png" alt="Run metadata info card" title="Run metadata info card" />
+    </td>
+  </tr>
+</table>
diff --git a/tensorflow/docs_src/guide/graphs.md b/tensorflow/docs_src/guide/graphs.md
new file mode 100644
index 0000000000..e6246ef148
--- /dev/null
+++ b/tensorflow/docs_src/guide/graphs.md
@@ -0,0 +1,558 @@
+# Graphs and Sessions
+
+TensorFlow uses a **dataflow graph** to represent your computation in terms of
+the dependencies between individual operations. This leads to a low-level
+programming model in which you first define the dataflow graph, then create a
+TensorFlow **session** to run parts of the graph across a set of local and
+remote devices.
+
+This guide will be most useful if you intend to use the low-level programming
+model directly. Higher-level APIs such as @{tf.estimator.Estimator} and Keras
+hide the details of graphs and sessions from the end user, but this guide may
+also be useful if you want to understand how these APIs are implemented.
+
+## Why dataflow graphs?
+
+![](../images/tensors_flowing.gif)
+
+[Dataflow](https://en.wikipedia.org/wiki/Dataflow_programming) is a common
+programming model for parallel computing. In a dataflow graph, the nodes
+represent units of computation, and the edges represent the data consumed or
+produced by a computation. For example, in a TensorFlow graph, the @{tf.matmul}
+operation would correspond to a single node with two incoming edges (the
+matrices to be multiplied) and one outgoing edge (the result of the
+multiplication).
+
+<!-- TODO(barryr): Add a diagram to illustrate the @{tf.matmul} graph. -->
+
+Dataflow has several advantages that TensorFlow leverages when executing your
+programs:
+
+* **Parallelism.** By using explicit edges to represent dependencies between
+  operations, it is easy for the system to identify operations that can execute
+  in parallel.
+
+* **Distributed execution.** By using explicit edges to represent the values
+  that flow between operations, it is possible for TensorFlow to partition your
+  program across multiple devices (CPUs, GPUs, and TPUs) attached to different
+  machines. TensorFlow inserts the necessary communication and coordination
+  between devices.
+
+* **Compilation.** TensorFlow's @{$performance/xla$XLA compiler} can
+  use the information in your dataflow graph to generate faster code, for
+  example, by fusing together adjacent operations.
+
+* **Portability.** The dataflow graph is a language-independent representation
+  of the code in your model. You can build a dataflow graph in Python, store it
+  in a @{$saved_model$SavedModel}, and restore it in a C++ program for
+  low-latency inference.
+
+
+## What is a @{tf.Graph}?
+
+A @{tf.Graph} contains two relevant kinds of information:
+
+* **Graph structure.** The nodes and edges of the graph, indicating how
+  individual operations are composed together, but not prescribing how they
+  should be used. The graph structure is like assembly code: inspecting it can
+  convey some useful information, but it does not contain all of the useful
+  context that source code conveys.
+
+* **Graph collections.** TensorFlow provides a general mechanism for storing
+  collections of metadata in a @{tf.Graph}. The @{tf.add_to_collection} function
+  enables you to associate a list of objects with a key (where @{tf.GraphKeys}
+  defines some of the standard keys), and @{tf.get_collection} enables you to
+  look up all objects associated with a key. Many parts of the TensorFlow
+  library use this facility: for example, when you create a @{tf.Variable}, it
+  is added by default to collections representing "global variables" and
+  "trainable variables". When you later come to create a @{tf.train.Saver} or
+  @{tf.train.Optimizer}, the variables in these collections are used as the
+  default arguments.
+
+
+## Building a @{tf.Graph}
+
+Most TensorFlow programs start with a dataflow graph construction phase. In this
+phase, you invoke TensorFlow API functions that construct new @{tf.Operation}
+(node) and @{tf.Tensor} (edge) objects and add them to a @{tf.Graph}
+instance. TensorFlow provides a **default graph** that is an implicit argument
+to all API functions in the same context.  For example:
+
+* Calling `tf.constant(42.0)` creates a single @{tf.Operation} that produces the
+  value `42.0`, adds it to the default graph, and returns a @{tf.Tensor} that
+  represents the value of the constant.
+
+* Calling `tf.matmul(x, y)` creates a single @{tf.Operation} that multiplies
+  the values of @{tf.Tensor} objects `x` and `y`, adds it to the default graph,
+  and returns a @{tf.Tensor} that represents the result of the multiplication.
+
+* Executing `v = tf.Variable(0)` adds to the graph a @{tf.Operation} that will
+  store a writeable tensor value that persists between @{tf.Session.run} calls.
+  The @{tf.Variable} object wraps this operation, and can be used [like a
+  tensor](#tensor-like_objects), which will read the current value of the
+  stored value. The @{tf.Variable} object also has methods such as
+  @{tf.Variable.assign$`assign`} and @{tf.Variable.assign_add$`assign_add`} that
+  create @{tf.Operation} objects that, when executed, update the stored value.
+  (See @{$guide/variables} for more information about variables.)
+
+* Calling @{tf.train.Optimizer.minimize} will add operations and tensors to the
+  default graph that calculates gradients, and return a @{tf.Operation} that,
+  when run, will apply those gradients to a set of variables.
+
+Most programs rely solely on the default graph. However,
+see [Dealing with multiple graphs](#programming_with_multiple_graphs) for more
+advanced use cases. High-level APIs such as the @{tf.estimator.Estimator} API
+manage the default graph on your behalf, and--for example--may create different
+graphs for training and evaluation.
+
+Note: Calling most functions in the TensorFlow API merely adds operations
+and tensors to the default graph, but **does not** perform the actual
+computation. Instead, you compose these functions until you have a @{tf.Tensor}
+or @{tf.Operation} that represents the overall computation--such as performing
+one step of gradient descent--and then pass that object to a @{tf.Session} to
+perform the computation. See the section "Executing a graph in a @{tf.Session}"
+for more details.
+
+## Naming operations
+
+A @{tf.Graph} object defines a **namespace** for the @{tf.Operation} objects it
+contains. TensorFlow automatically chooses a unique name for each operation in
+your graph, but giving operations descriptive names can make your program easier
+to read and debug. The TensorFlow API provides two ways to override the name of
+an operation:
+
+* Each API function that creates a new @{tf.Operation} or returns a new
+  @{tf.Tensor} accepts an optional `name` argument. For example,
+  `tf.constant(42.0, name="answer")` creates a new @{tf.Operation} named
+  `"answer"` and returns a @{tf.Tensor} named `"answer:0"`. If the default graph
+  already contains an operation named `"answer"`, then TensorFlow would append
+  `"_1"`, `"_2"`, and so on to the name, in order to make it unique.
+
+* The @{tf.name_scope} function makes it possible to add a **name scope** prefix
+  to all operations created in a particular context. The current name scope
+  prefix is a `"/"`-delimited list of the names of all active @{tf.name_scope}
+  context managers. If a name scope has already been used in the current
+  context, TensorFlow appends `"_1"`, `"_2"`, and so on. For example:
+
+  ```python
+  c_0 = tf.constant(0, name="c")  # => operation named "c"
+
+  # Already-used names will be "uniquified".
+  c_1 = tf.constant(2, name="c")  # => operation named "c_1"
+
+  # Name scopes add a prefix to all operations created in the same context.
+  with tf.name_scope("outer"):
+    c_2 = tf.constant(2, name="c")  # => operation named "outer/c"
+
+    # Name scopes nest like paths in a hierarchical file system.
+    with tf.name_scope("inner"):
+      c_3 = tf.constant(3, name="c")  # => operation named "outer/inner/c"
+
+    # Exiting a name scope context will return to the previous prefix.
+    c_4 = tf.constant(4, name="c")  # => operation named "outer/c_1"
+
+    # Already-used name scopes will be "uniquified".
+    with tf.name_scope("inner"):
+      c_5 = tf.constant(5, name="c")  # => operation named "outer/inner_1/c"
+  ```
+
+The graph visualizer uses name scopes to group operations and reduce the visual
+complexity of a graph. See [Visualizing your graph](#visualizing-your-graph) for
+more information.
+
+Note that @{tf.Tensor} objects are implicitly named after the @{tf.Operation}
+that produces the tensor as output. A tensor name has the form `"<OP_NAME>:<i>"`
+where:
+
+* `"<OP_NAME>"` is the name of the operation that produces it.
+* `"<i>"` is an integer representing the index of that tensor among the
+  operation's outputs.
+
+## Placing operations on different devices
+
+If you want your TensorFlow program to use multiple different devices, the
+@{tf.device} function provides a convenient way to request that all operations
+created in a particular context are placed on the same device (or type of
+device).
+
+A **device specification** has the following form:
+
+```
+/job:<JOB_NAME>/task:<TASK_INDEX>/device:<DEVICE_TYPE>:<DEVICE_INDEX>
+```
+
+where:
+
+* `<JOB_NAME>` is an alpha-numeric string that does not start with a number.
+* `<DEVICE_TYPE>` is a registered device type (such as `GPU` or `CPU`).
+* `<TASK_INDEX>` is a non-negative integer representing the index of the task
+  in the job named `<JOB_NAME>`. See @{tf.train.ClusterSpec} for an explanation
+  of jobs and tasks.
+* `<DEVICE_INDEX>` is a non-negative integer representing the index of the
+  device, for example, to distinguish between different GPU devices used in the
+  same process.
+
+You do not need to specify every part of a device specification. For example,
+if you are running in a single-machine configuration with a single GPU, you
+might use @{tf.device} to pin some operations to the CPU and GPU:
+
+```python
+# Operations created outside either context will run on the "best possible"
+# device. For example, if you have a GPU and a CPU available, and the operation
+# has a GPU implementation, TensorFlow will choose the GPU.
+weights = tf.random_normal(...)
+
+with tf.device("/device:CPU:0"):
+  # Operations created in this context will be pinned to the CPU.
+  img = tf.decode_jpeg(tf.read_file("img.jpg"))
+
+with tf.device("/device:GPU:0"):
+  # Operations created in this context will be pinned to the GPU.
+  result = tf.matmul(weights, img)
+```
+If you are deploying TensorFlow in a @{$distributed$typical distributed configuration},
+you might specify the job name and task ID to place variables on
+a task in the parameter server job (`"/job:ps"`), and the other operations on
+task in the worker job (`"/job:worker"`):
+
+```python
+with tf.device("/job:ps/task:0"):
+  weights_1 = tf.Variable(tf.truncated_normal([784, 100]))
+  biases_1 = tf.Variable(tf.zeroes([100]))
+
+with tf.device("/job:ps/task:1"):
+  weights_2 = tf.Variable(tf.truncated_normal([100, 10]))
+  biases_2 = tf.Variable(tf.zeroes([10]))
+
+with tf.device("/job:worker"):
+  layer_1 = tf.matmul(train_batch, weights_1) + biases_1
+  layer_2 = tf.matmul(train_batch, weights_2) + biases_2
+```
+
+@{tf.device} gives you a lot of flexibility to choose placements for individual
+operations or broad regions of a TensorFlow graph. In many cases, there are
+simple heuristics that work well. For example, the
+@{tf.train.replica_device_setter} API can be used with @{tf.device} to place
+operations for **data-parallel distributed training**. For example, the
+following code fragment shows how @{tf.train.replica_device_setter} applies
+different placement policies to @{tf.Variable} objects and other operations:
+
+```python
+with tf.device(tf.train.replica_device_setter(ps_tasks=3)):
+  # tf.Variable objects are, by default, placed on tasks in "/job:ps" in a
+  # round-robin fashion.
+  w_0 = tf.Variable(...)  # placed on "/job:ps/task:0"
+  b_0 = tf.Variable(...)  # placed on "/job:ps/task:1"
+  w_1 = tf.Variable(...)  # placed on "/job:ps/task:2"
+  b_1 = tf.Variable(...)  # placed on "/job:ps/task:0"
+
+  input_data = tf.placeholder(tf.float32)     # placed on "/job:worker"
+  layer_0 = tf.matmul(input_data, w_0) + b_0  # placed on "/job:worker"
+  layer_1 = tf.matmul(layer_0, w_1) + b_1     # placed on "/job:worker"
+```
+
+## Tensor-like objects
+
+Many TensorFlow operations take one or more @{tf.Tensor} objects as arguments.
+For example, @{tf.matmul} takes two @{tf.Tensor} objects, and @{tf.add_n} takes
+a list of `n` @{tf.Tensor} objects. For convenience, these functions will accept
+a **tensor-like object** in place of a @{tf.Tensor}, and implicitly convert it
+to a @{tf.Tensor} using the @{tf.convert_to_tensor} method. Tensor-like objects
+include elements of the following types:
+
+* @{tf.Tensor}
+* @{tf.Variable}
+* [`numpy.ndarray`](https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.html)
+* `list` (and lists of tensor-like objects)
+* Scalar Python types: `bool`, `float`, `int`, `str`
+
+You can register additional tensor-like types using
+@{tf.register_tensor_conversion_function}.
+
+Note: By default, TensorFlow will create a new @{tf.Tensor} each time you use
+the same tensor-like object. If the tensor-like object is large (e.g. a
+`numpy.ndarray` containing a set of training examples) and you use it multiple
+times, you may run out of memory. To avoid this, manually call
+@{tf.convert_to_tensor} on the tensor-like object once and use the returned
+@{tf.Tensor} instead.
+
+## Executing a graph in a @{tf.Session}
+
+TensorFlow uses the @{tf.Session} class to represent a connection between the
+client program---typically a Python program, although a similar interface is
+available in other languages---and the C++ runtime. A @{tf.Session} object
+provides access to devices in the local machine, and remote devices using the
+distributed TensorFlow runtime. It also caches information about your
+@{tf.Graph} so that you can efficiently run the same computation multiple times.
+
+### Creating a @{tf.Session}
+
+If you are using the low-level TensorFlow API, you can create a @{tf.Session}
+for the current default graph as follows:
+
+```python
+# Create a default in-process session.
+with tf.Session() as sess:
+  # ...
+
+# Create a remote session.
+with tf.Session("grpc://example.org:2222"):
+  # ...
+```
+
+Since a @{tf.Session} owns physical resources (such as GPUs and
+network connections), it is typically used as a context manager (in a `with`
+block) that automatically closes the session when you exit the block. It is
+also possible to create a session without using a `with` block, but you should
+explicitly call @{tf.Session.close} when you are finished with it to free the
+resources.
+
+Note: Higher-level APIs such as @{tf.train.MonitoredTrainingSession} or
+@{tf.estimator.Estimator} will create and manage a @{tf.Session} for you. These
+APIs accept optional `target` and `config` arguments (either directly, or as
+part of a @{tf.estimator.RunConfig} object), with the same meaning as
+described below.
+
+@{tf.Session.__init__} accepts three optional arguments:
+
+* **`target`.** If this argument is left empty (the default), the session will
+  only use devices in the local machine. However, you may also specify a
+  `grpc://` URL to specify the address of a TensorFlow server, which gives the
+  session access to all devices on machines that this server controls. See
+  @{tf.train.Server} for details of how to create a TensorFlow
+  server. For example, in the common **between-graph replication**
+  configuration, the @{tf.Session} connects to a @{tf.train.Server} in the same
+  process as the client. The [distributed TensorFlow](../deploy/distributed.md)
+  deployment guide describes other common scenarios.
+
+* **`graph`.** By default, a new @{tf.Session} will be bound to---and only able
+  to run operations in---the current default graph. If you are using multiple
+  graphs in your program (see [Programming with multiple
+  graphs](#programming_with_multiple_graphs) for more details), you can specify
+  an explicit @{tf.Graph} when you construct the session.
+
+* **`config`.** This argument allows you to specify a @{tf.ConfigProto} that
+  controls the behavior of the session. For example, some of the configuration
+  options include:
+
+    * `allow_soft_placement`. Set this to `True` to enable a "soft" device
+    placement algorithm, which ignores @{tf.device} annotations that attempt
+    to place CPU-only operations on a GPU device, and places them on the CPU
+    instead.
+
+    * `cluster_def`. When using distributed TensorFlow, this option allows you
+    to specify what machines to use in the computation, and provide a mapping
+    between job names, task indices, and network addresses. See
+    @{tf.train.ClusterSpec.as_cluster_def} for details.
+
+    * `graph_options.optimizer_options`. Provides control over the optimizations
+    that TensorFlow performs on your graph before executing it.
+
+    * `gpu_options.allow_growth`. Set this to `True` to change the GPU memory
+    allocator so that it gradually increases the amount of memory allocated,
+    rather than allocating most of the memory at startup.
+
+
+### Using @{tf.Session.run} to execute operations
+
+The @{tf.Session.run} method is the main mechanism for running a @{tf.Operation}
+or evaluating a @{tf.Tensor}. You can pass one or more @{tf.Operation} or
+@{tf.Tensor} objects to @{tf.Session.run}, and TensorFlow will execute the
+operations that are needed to compute the result.
+
+@{tf.Session.run} requires you to specify a list of **fetches**, which determine
+the return values, and may be a @{tf.Operation}, a @{tf.Tensor}, or
+a [tensor-like type](#tensor-like_objects) such as @{tf.Variable}. These fetches
+determine what **subgraph** of the overall @{tf.Graph} must be executed to
+produce the result: this is the subgraph that contains all operations named in
+the fetch list, plus all operations whose outputs are used to compute the value
+of the fetches. For example, the following code fragment shows how different
+arguments to @{tf.Session.run} cause different subgraphs to be executed:
+
+```python
+x = tf.constant([[37.0, -23.0], [1.0, 4.0]])
+w = tf.Variable(tf.random_uniform([2, 2]))
+y = tf.matmul(x, w)
+output = tf.nn.softmax(y)
+init_op = w.initializer
+
+with tf.Session() as sess:
+  # Run the initializer on `w`.
+  sess.run(init_op)
+
+  # Evaluate `output`. `sess.run(output)` will return a NumPy array containing
+  # the result of the computation.
+  print(sess.run(output))
+
+  # Evaluate `y` and `output`. Note that `y` will only be computed once, and its
+  # result used both to return `y_val` and as an input to the `tf.nn.softmax()`
+  # op. Both `y_val` and `output_val` will be NumPy arrays.
+  y_val, output_val = sess.run([y, output])
+```
+
+@{tf.Session.run} also optionally takes a dictionary of **feeds**, which is a
+mapping from @{tf.Tensor} objects (typically @{tf.placeholder} tensors) to
+values (typically Python scalars, lists, or NumPy arrays) that will be
+substituted for those tensors in the execution. For example:
+
+```python
+# Define a placeholder that expects a vector of three floating-point values,
+# and a computation that depends on it.
+x = tf.placeholder(tf.float32, shape=[3])
+y = tf.square(x)
+
+with tf.Session() as sess:
+  # Feeding a value changes the result that is returned when you evaluate `y`.
+  print(sess.run(y, {x: [1.0, 2.0, 3.0]}))  # => "[1.0, 4.0, 9.0]"
+  print(sess.run(y, {x: [0.0, 0.0, 5.0]}))  # => "[0.0, 0.0, 25.0]"
+
+  # Raises `tf.errors.InvalidArgumentError`, because you must feed a value for
+  # a `tf.placeholder()` when evaluating a tensor that depends on it.
+  sess.run(y)
+
+  # Raises `ValueError`, because the shape of `37.0` does not match the shape
+  # of placeholder `x`.
+  sess.run(y, {x: 37.0})
+```
+
+@{tf.Session.run} also accepts an optional `options` argument that enables you
+to specify options about the call, and an optional `run_metadata` argument that
+enables you to collect metadata about the execution. For example, you can use
+these options together to collect tracing information about the execution:
+
+```
+y = tf.matmul([[37.0, -23.0], [1.0, 4.0]], tf.random_uniform([2, 2]))
+
+with tf.Session() as sess:
+  # Define options for the `sess.run()` call.
+  options = tf.RunOptions()
+  options.output_partition_graphs = True
+  options.trace_level = tf.RunOptions.FULL_TRACE
+
+  # Define a container for the returned metadata.
+  metadata = tf.RunMetadata()
+
+  sess.run(y, options=options, run_metadata=metadata)
+
+  # Print the subgraphs that executed on each device.
+  print(metadata.partition_graphs)
+
+  # Print the timings of each operation that executed.
+  print(metadata.step_stats)
+```
+
+
+## Visualizing your graph
+
+TensorFlow includes tools that can help you to understand the code in a graph.
+The **graph visualizer** is a component of TensorBoard that renders the
+structure of your graph visually in a browser. The easiest way to create a
+visualization is to pass a @{tf.Graph} when creating the
+@{tf.summary.FileWriter}:
+
+```python
+# Build your graph.
+x = tf.constant([[37.0, -23.0], [1.0, 4.0]])
+w = tf.Variable(tf.random_uniform([2, 2]))
+y = tf.matmul(x, w)
+# ...
+loss = ...
+train_op = tf.train.AdagradOptimizer(0.01).minimize(loss)
+
+with tf.Session() as sess:
+  # `sess.graph` provides access to the graph used in a `tf.Session`.
+  writer = tf.summary.FileWriter("/tmp/log/...", sess.graph)
+
+  # Perform your computation...
+  for i in range(1000):
+    sess.run(train_op)
+    # ...
+
+  writer.close()
+```
+
+Note: If you are using a @{tf.estimator.Estimator}, the graph (and any
+summaries) will be logged automatically to the `model_dir` that you specified
+when creating the estimator.
+
+You can then open the log in `tensorboard`, navigate to the "Graph" tab, and
+see a high-level visualization of your graph's structure. Note that a typical
+TensorFlow graph---especially training graphs with automatically computed
+gradients---has too many nodes to visualize at once. The graph visualizer makes
+use of name scopes to group related operations into "super" nodes. You can
+click on the orange "+" button on any of these super nodes to expand the
+subgraph inside.
+
+![](../images/mnist_deep.png)
+
+For more information about visualizing your TensorFlow application with
+TensorBoard, see the [TensorBoard tutorial](../get_started/summaries_and_tensorboard.md).
+
+## Programming with multiple graphs
+
+Note: When training a model, a common way of organizing your code is to use one
+graph for training your model, and a separate graph for evaluating or performing
+inference with a trained model. In many cases, the inference graph will be
+different from the training graph: for example, techniques like dropout and
+batch normalization use different operations in each case. Furthermore, by
+default utilities like @{tf.train.Saver} use the names of @{tf.Variable} objects
+(which have names based on an underlying @{tf.Operation}) to identify each
+variable in a saved checkpoint. When programming this way, you can either use
+completely separate Python processes to build and execute the graphs, or you can
+use multiple graphs in the same process. This section describes how to use
+multiple graphs in the same process.
+
+As noted above, TensorFlow provides a "default graph" that is implicitly passed
+to all API functions in the same context. For many applications, a single graph
+is sufficient. However, TensorFlow also provides methods for manipulating
+the default graph, which can be useful in more advanced use cases. For example:
+
+* A @{tf.Graph} defines the namespace for @{tf.Operation} objects: each
+  operation in a single graph must have a unique name. TensorFlow will
+  "uniquify" the names of operations by appending `"_1"`, `"_2"`, and so on to
+  their names if the requested name is already taken. Using multiple explicitly
+  created graphs gives you more control over what name is given to each
+  operation.
+
+* The default graph stores information about every @{tf.Operation} and
+  @{tf.Tensor} that was ever added to it. If your program creates a large number
+  of unconnected subgraphs, it may be more efficient to use a different
+  @{tf.Graph} to build each subgraph, so that unrelated state can be garbage
+  collected.
+
+You can install a different @{tf.Graph} as the default graph, using the
+@{tf.Graph.as_default} context manager:
+
+```python
+g_1 = tf.Graph()
+with g_1.as_default():
+  # Operations created in this scope will be added to `g_1`.
+  c = tf.constant("Node in g_1")
+
+  # Sessions created in this scope will run operations from `g_1`.
+  sess_1 = tf.Session()
+
+g_2 = tf.Graph()
+with g_2.as_default():
+  # Operations created in this scope will be added to `g_2`.
+  d = tf.constant("Node in g_2")
+
+# Alternatively, you can pass a graph when constructing a `tf.Session`:
+# `sess_2` will run operations from `g_2`.
+sess_2 = tf.Session(graph=g_2)
+
+assert c.graph is g_1
+assert sess_1.graph is g_1
+
+assert d.graph is g_2
+assert sess_2.graph is g_2
+```
+
+To inspect the current default graph, call @{tf.get_default_graph}, which
+returns a @{tf.Graph} object:
+
+```python
+# Print all of the operations in the default graph.
+g = tf.get_default_graph()
+print(g.get_operations())
+```
diff --git a/tensorflow/docs_src/guide/index.md b/tensorflow/docs_src/guide/index.md
new file mode 100644
index 0000000000..eefdb9ceae
--- /dev/null
+++ b/tensorflow/docs_src/guide/index.md
@@ -0,0 +1,86 @@
+# TensorFlow Guide
+
+The documents in this unit dive into the details of how TensorFlow
+works. The units are as follows:
+
+## High Level APIs
+
+  * @{$guide/keras}, TensorFlow's high-level API for building and
+    training deep learning models.
+  * @{$guide/eager}, an API for writing TensorFlow code
+    imperatively, like you would use Numpy.
+  * @{$guide/estimators}, a high-level API that provides
+    fully-packaged models ready for large-scale training and production.
+  * @{$guide/datasets}, easy input pipelines to bring your data into
+    your TensorFlow program.
+
+## Estimators
+
+* @{$estimators} provides an introduction.
+* @{$premade_estimators}, introduces Estimators for machine learning.
+* @{$custom_estimators}, which demonstrates how to build and train models you
+  design yourself.
+* @{$feature_columns}, which shows how an Estimator can handle a variety of input
+  data types without changes to the model.
+* @{$datasets_for_estimators} describes using tf.data with estimators.
+* @{$checkpoints}, which explains how to save training progress and resume where
+  you left off.
+
+## Accelerators
+
+  * @{$using_gpu} explains how TensorFlow assigns operations to
+    devices and how you can change the arrangement manually.
+  * @{$using_tpu} explains how to modify `Estimator` programs to run on a TPU.
+
+## Low Level APIs
+
+  * @{$guide/low_level_intro}, which introduces the
+    basics of how you can use TensorFlow outside of the high Level APIs.
+  * @{$guide/tensors}, which explains how to create,
+    manipulate, and access Tensors--the fundamental object in TensorFlow.
+  * @{$guide/variables}, which details how
+    to represent shared, persistent state in your program.
+  * @{$guide/graphs}, which explains:
+      * dataflow graphs, which are TensorFlow's representation of computations
+        as dependencies between operations.
+      * sessions, which are TensorFlow's mechanism for running dataflow graphs
+        across one or more local or remote devices.
+    If you are programming with the low-level TensorFlow API, this unit
+    is essential. If you are programming with a high-level TensorFlow API
+    such as Estimators or Keras, the high-level API creates and manages
+    graphs and sessions for you, but understanding graphs and sessions
+    can still be helpful.
+  * @{$guide/saved_model}, which
+    explains how to save and restore variables and models.
+
+## ML Concepts
+
+  * @{$guide/embedding}, which introduces the concept
+    of embeddings, provides a simple example of training an embedding in
+    TensorFlow, and explains how to view embeddings with the TensorBoard
+    Embedding Projector.
+
+## Debugging
+
+  * @{$guide/debugger}, which
+    explains how to use the TensorFlow debugger (tfdbg).
+
+## TensorBoard
+
+TensorBoard is a utility to visualize different aspects of machine learning.
+The following guides explain how to use TensorBoard:
+
+  * @{$guide/summaries_and_tensorboard},
+    which introduces TensorBoard.
+  * @{$guide/graph_viz}, which
+    explains how to visualize the computational graph.
+  * @{$guide/tensorboard_histograms} which demonstrates the how to
+    use TensorBoard's histogram dashboard.
+
+
+## Misc
+
+  * @{$guide/version_compat},
+    which explains backward compatibility guarantees and non-guarantees.
+  * @{$guide/faq}, which contains frequently asked
+    questions about TensorFlow.
diff --git a/tensorflow/docs_src/guide/keras.md b/tensorflow/docs_src/guide/keras.md
new file mode 100644
index 0000000000..83172dab7f
--- /dev/null
+++ b/tensorflow/docs_src/guide/keras.md
@@ -0,0 +1,623 @@
+# Keras
+
+Keras is a high-level API to build and train deep learning models. It's used for
+fast prototyping, advanced research, and production, with three key advantages:
+
+- *User friendly*<br>
+  Keras has a simple, consistent interface optimized for common use cases. It
+  provides clear and actionable feedback for user errors.
+- *Modular and composable*<br>
+  Keras models are made by connecting configurable building blocks together,
+  with few restrictions.
+- *Easy to extend*<br> Write custom building blocks to express new ideas for
+  research. Create new layers, loss functions, and develop state-of-the-art
+  models.
+
+## Import tf.keras
+
+`tf.keras` is TensorFlow's implementation of the
+[Keras API specification](https://keras.io){:.external}. This is a high-level
+API to build and train models that includes first-class support for
+TensorFlow-specific functionality, such as [eager execution](#eager_execution),
+`tf.data` pipelines, and [Estimators](./estimators.md).
+`tf.keras` makes TensorFlow easier to use without sacrificing flexibility and
+performance.
+
+To get started, import `tf.keras` as part of your TensorFlow program setup:
+
+```python
+import tensorflow as tf
+from tensorflow import keras
+```
+
+`tf.keras` can run any Keras-compatible code, but keep in mind:
+
+* The `tf.keras` version in the latest TensorFlow release might not be the same
+  as the latest `keras` version from PyPI. Check `tf.keras.__version__`.
+* When [saving a model's weights](#weights_only), `tf.keras` defaults to the
+  [checkpoint format](../get_started/checkpoints.md). Pass `save_format='h5'` to
+  use HDF5.
+
+## Build a simple model
+
+### Sequential model
+
+In Keras, you assemble *layers* to build *models*. A model is (usually) a graph
+of layers. The most common type of model is a stack of layers: the
+`tf.keras.Sequential` model.
+
+To build a simple, fully-connected network (i.e. multi-layer perceptron):
+
+```python
+model = keras.Sequential()
+# Adds a densely-connected layer with 64 units to the model:
+model.add(keras.layers.Dense(64, activation='relu'))
+# Add another:
+model.add(keras.layers.Dense(64, activation='relu'))
+# Add a softmax layer with 10 output units:
+model.add(keras.layers.Dense(10, activation='softmax'))
+```
+
+### Configure the layers
+
+There are many `tf.keras.layers` available with some common constructor
+parameters:
+
+* `activation`: Set the activation function for the layer. This parameter is
+  specified by the name of a built-in function or as a callable object. By
+  default, no activation is applied.
+* `kernel_initializer` and `bias_initializer`: The initialization schemes
+  that create the layer's weights (kernel and bias). This parameter is a name or
+  a callable object. This defaults to the `"Glorot uniform"` initializer.
+* `kernel_regularizer` and `bias_regularizer`: The regularization schemes
+  that apply the layer's weights (kernel and bias), such as L1 or L2
+  regularization. By default, no regularization is applied.
+
+The following instantiates `tf.keras.layers.Dense` layers using constructor
+arguments:
+
+```python
+# Create a sigmoid layer:
+layers.Dense(64, activation='sigmoid')
+# Or:
+layers.Dense(64, activation=tf.sigmoid)
+
+# A linear layer with L1 regularization of factor 0.01 applied to the kernel matrix:
+layers.Dense(64, kernel_regularizer=keras.regularizers.l1(0.01))
+# A linear layer with L2 regularization of factor 0.01 applied to the bias vector:
+layers.Dense(64, bias_regularizer=keras.regularizers.l2(0.01))
+
+# A linear layer with a kernel initialized to a random orthogonal matrix:
+layers.Dense(64, kernel_initializer='orthogonal')
+# A linear layer with a bias vector initialized to 2.0s:
+layers.Dense(64, bias_initializer=keras.initializers.constant(2.0))
+```
+
+## Train and evaluate
+
+### Set up training
+
+After the model is constructed, configure its learning process by calling the
+`compile` method:
+
+```python
+model.compile(optimizer=tf.train.AdamOptimizer(0.001),
+              loss='categorical_crossentropy',
+              metrics=['accuracy'])
+```
+
+`tf.keras.Model.compile` takes three important arguments:
+
+* `optimizer`: This object specifies the training procedure. Pass it optimizer
+  instances from the `tf.train` module, such as
+  [`AdamOptimizer`](/api_docs/python/tf/train/AdamOptimizer),
+  [`RMSPropOptimizer`](/api_docs/python/tf/train/RMSPropOptimizer), or
+  [`GradientDescentOptimizer`](/api_docs/python/tf/train/GradientDescentOptimizer).
+* `loss`: The function to minimize during optimization. Common choices include
+  mean square error (`mse`), `categorical_crossentropy`, and
+  `binary_crossentropy`. Loss functions are specified by name or by
+  passing a callable object from the `tf.keras.losses` module.
+* `metrics`: Used to monitor training. These are string names or callables from
+  the `tf.keras.metrics` module.
+
+The following shows a few examples of configuring a model for training:
+
+```python
+# Configure a model for mean-squared error regression.
+model.compile(optimizer=tf.train.AdamOptimizer(0.01),
+              loss='mse',       # mean squared error
+              metrics=['mae'])  # mean absolute error
+
+# Configure a model for categorical classification.
+model.compile(optimizer=tf.train.RMSPropOptimizer(0.01),
+              loss=keras.losses.categorical_crossentropy,
+              metrics=[keras.metrics.categorical_accuracy])
+```
+
+### Input NumPy data
+
+For small datasets, use in-memory [NumPy](https://www.numpy.org/){:.external}
+arrays to train and evaluate a model. The model is "fit" to the training data
+using the `fit` method:
+
+```python
+import numpy as np
+
+data = np.random.random((1000, 32))
+labels = np.random.random((1000, 10))
+
+model.fit(data, labels, epochs=10, batch_size=32)
+```
+
+`tf.keras.Model.fit` takes three important arguments:
+
+* `epochs`: Training is structured into *epochs*. An epoch is one iteration over
+  the entire input data (this is done in smaller batches).
+* `batch_size`: When passed NumPy data, the model slices the data into smaller
+  batches and iterates over these batches during training. This integer
+  specifies the size of each batch. Be aware that the last batch may be smaller
+  if the total number of samples is not divisible by the batch size.
+* `validation_data`: When prototyping a model, you want to easily monitor its
+  performance on some validation data. Passing this argument—a tuple of inputs
+  and labels—allows the model to display the loss and metrics in inference mode
+  for the passed data, at the end of each epoch.
+
+Here's an example using `validation_data`:
+
+```python
+import numpy as np
+
+data = np.random.random((1000, 32))
+labels = np.random.random((1000, 10))
+
+val_data = np.random.random((100, 32))
+val_labels = np.random.random((100, 10))
+
+model.fit(data, labels, epochs=10, batch_size=32,
+          validation_data=(val_data, val_labels))
+```
+
+### Input tf.data datasets
+
+Use the [Datasets API](./datasets.md) to scale to large datasets
+or multi-device training. Pass a `tf.data.Dataset` instance to the `fit`
+method:
+
+```python
+# Instantiates a toy dataset instance:
+dataset = tf.data.Dataset.from_tensor_slices((data, labels))
+dataset = dataset.batch(32)
+dataset = dataset.repeat()
+
+# Don't forget to specify `steps_per_epoch` when calling `fit` on a dataset.
+model.fit(dataset, epochs=10, steps_per_epoch=30)
+```
+
+Here, the `fit` method uses the `steps_per_epoch` argument—this is the number of
+training steps the model runs before it moves to the next epoch. Since the
+`Dataset` yields batches of data, this snippet does not require a `batch_size`.
+
+Datasets can also be used for validation:
+
+```python
+dataset = tf.data.Dataset.from_tensor_slices((data, labels))
+dataset = dataset.batch(32).repeat()
+
+val_dataset = tf.data.Dataset.from_tensor_slices((val_data, val_labels))
+val_dataset = val_dataset.batch(32).repeat()
+
+model.fit(dataset, epochs=10, steps_per_epoch=30,
+          validation_data=val_dataset,
+          validation_steps=3)
+```
+
+### Evaluate and predict
+
+The `tf.keras.Model.evaluate` and `tf.keras.Model.predict` methods can use NumPy
+data and a `tf.data.Dataset`.
+
+To *evaluate* the inference-mode loss and metrics for the data provided:
+
+```python
+model.evaluate(x, y, batch_size=32)
+
+model.evaluate(dataset, steps=30
+```
+
+And to *predict* the output of the last layer in inference for the data provided,
+as a NumPy array:
+
+```
+model.predict(x, batch_size=32)
+
+model.predict(dataset, steps=30)
+```
+
+
+## Build advanced models
+
+### Functional API
+
+The `tf.keras.Sequential` model is a simple stack of layers that cannot
+represent arbitrary models. Use the
+[Keras functional API](https://keras.io/getting-started/functional-api-guide/){:.external}
+to build complex model topologies such as:
+
+* Multi-input models,
+* Multi-output models,
+* Models with shared layers (the same layer called several times),
+* Models with non-sequential data flows (e.g. residual connections).
+
+Building a model with the functional API works like this:
+
+1. A layer instance is callable and returns a tensor.
+2. Input tensors and output tensors are used to define a `tf.keras.Model`
+   instance.
+3. This model is trained just like the `Sequential` model.
+
+The following example uses the functional API to build a simple, fully-connected
+network:
+
+```python
+inputs = keras.Input(shape=(32,))  # Returns a placeholder tensor
+
+# A layer instance is callable on a tensor, and returns a tensor.
+x = keras.layers.Dense(64, activation='relu')(inputs)
+x = keras.layers.Dense(64, activation='relu')(x)
+predictions = keras.layers.Dense(10, activation='softmax')(x)
+
+# Instantiate the model given inputs and outputs.
+model = keras.Model(inputs=inputs, outputs=predictions)
+
+# The compile step specifies the training configuration.
+model.compile(optimizer=tf.train.RMSPropOptimizer(0.001),
+              loss='categorical_crossentropy',
+              metrics=['accuracy'])
+
+# Trains for 5 epochs
+model.fit(data, labels, batch_size=32, epochs=5)
+```
+
+### Model subclassing
+
+Build a fully-customizable model by subclassing `tf.keras.Model` and defining
+your own forward pass. Create layers in the `__init__` method and set them as
+attributes of the class instance. Define the forward pass in the `call` method.
+
+Model subclassing is particularly useful when
+[eager execution](./eager.md) is enabled since the forward pass
+can be written imperatively.
+
+Key Point: Use the right API for the job. While model subclassing offers
+flexibility, it comes at a cost of greater complexity and more opportunities for
+user errors. If possible, prefer the functional API.
+
+The following example shows a subclassed `tf.keras.Model` using a custom forward
+pass:
+
+```python
+class MyModel(keras.Model):
+
+  def __init__(self, num_classes=10):
+    super(MyModel, self).__init__(name='my_model')
+    self.num_classes = num_classes
+    # Define your layers here.
+    self.dense_1 = keras.layers.Dense(32, activation='relu')
+    self.dense_2 = keras.layers.Dense(num_classes, activation='sigmoid')
+
+  def call(self, inputs):
+    # Define your forward pass here,
+    # using layers you previously defined (in `__init__`).
+    x = self.dense_1(inputs)
+    return self.dense_2(x)
+
+  def compute_output_shape(self, input_shape):
+    # You need to override this function if you want to use the subclassed model
+    # as part of a functional-style model.
+    # Otherwise, this method is optional.
+    shape = tf.TensorShape(input_shape).as_list()
+    shape[-1] = self.num_classes
+    return tf.TensorShape(shape)
+
+
+# Instantiates the subclassed model.
+model = MyModel(num_classes=10)
+
+# The compile step specifies the training configuration.
+model.compile(optimizer=tf.train.RMSPropOptimizer(0.001),
+              loss='categorical_crossentropy',
+              metrics=['accuracy'])
+
+# Trains for 5 epochs.
+model.fit(data, labels, batch_size=32, epochs=5)
+```
+
+
+### Custom layers
+
+Create a custom layer by subclassing `tf.keras.layers.Layer` and implementing
+the following methods:
+
+* `build`: Create the weights of the layer. Add weights with the `add_weight`
+  method.
+* `call`: Define the forward pass.
+* `compute_output_shape`: Specify how to compute the output shape of the layer
+  given the input shape.
+* Optionally, a layer can be serialized by implementing the `get_config` method
+  and the `from_config` class method.
+
+Here's an example of a custom layer that implements a `matmul` of an input with
+a kernel matrix:
+
+```python
+class MyLayer(keras.layers.Layer):
+
+  def __init__(self, output_dim, **kwargs):
+    self.output_dim = output_dim
+    super(MyLayer, self).__init__(**kwargs)
+
+  def build(self, input_shape):
+    shape = tf.TensorShape((input_shape[1], self.output_dim))
+    # Create a trainable weight variable for this layer.
+    self.kernel = self.add_weight(name='kernel',
+                                  shape=shape,
+                                  initializer='uniform',
+                                  trainable=True)
+    # Be sure to call this at the end
+    super(MyLayer, self).build(input_shape)
+
+  def call(self, inputs):
+    return tf.matmul(inputs, self.kernel)
+
+  def compute_output_shape(self, input_shape):
+    shape = tf.TensorShape(input_shape).as_list()
+    shape[-1] = self.output_dim
+    return tf.TensorShape(shape)
+
+  def get_config(self):
+    base_config = super(MyLayer, self).get_config()
+    base_config['output_dim'] = self.output_dim
+
+  @classmethod
+  def from_config(cls, config):
+    return cls(**config)
+
+
+# Create a model using the custom layer
+model = keras.Sequential([MyLayer(10),
+                          keras.layers.Activation('softmax')])
+
+# The compile step specifies the training configuration
+model.compile(optimizer=tf.train.RMSPropOptimizer(0.001),
+              loss='categorical_crossentropy',
+              metrics=['accuracy'])
+
+# Trains for 5 epochs.
+model.fit(data, targets, batch_size=32, epochs=5)
+```
+
+
+## Callbacks
+
+A callback is an object passed to a model to customize and extend its behavior
+during training. You can write your own custom callback, or use the built-in
+`tf.keras.callbacks` that include:
+
+* `tf.keras.callbacks.ModelCheckpoint`: Save checkpoints of your model at
+  regular intervals.
+* `tf.keras.callbacks.LearningRateScheduler`: Dynamically change the learning
+  rate.
+* `tf.keras.callbacks.EarlyStopping`: Interrupt training when validation
+  performance has stopped improving.
+* `tf.keras.callbacks.TensorBoard`: Monitor the model's behavior using
+  [TensorBoard](./summaries_and_tensorboard.md).
+
+To use a `tf.keras.callbacks.Callback`, pass it to the model's `fit` method:
+
+```python
+callbacks = [
+  # Interrupt training if `val_loss` stops improving for over 2 epochs
+  keras.callbacks.EarlyStopping(patience=2, monitor='val_loss'),
+  # Write TensorBoard logs to `./logs` directory
+  keras.callbacks.TensorBoard(log_dir='./logs')
+]
+model.fit(data, labels, batch_size=32, epochs=5, callbacks=callbacks,
+          validation_data=(val_data, val_targets))
+```
+
+
+## Save and restore
+
+### Weights only
+
+Save and load the weights of a model using `tf.keras.Model.save_weights`:
+
+```python
+# Save weights to a TensorFlow Checkpoint file
+model.save_weights('./my_model')
+
+# Restore the model's state,
+# this requires a model with the same architecture.
+model.load_weights('my_model')
+```
+
+By default, this saves the model's weights in the
+[TensorFlow checkpoint](../get_started/checkpoints.md) file format. Weights can
+also be saved to the Keras HDF5 format (the default for the multi-backend
+implementation of Keras):
+
+```python
+# Save weights to a HDF5 file
+model.save_weights('my_model.h5', save_format='h5')
+
+# Restore the model's state
+model.load_weights('my_model.h5')
+```
+
+
+### Configuration only
+
+A model's configuration can be saved—this serializes the model architecture
+without any weights. A saved configuration can recreate and initialize the same
+model, even without the code that defined the original model. Keras supports
+JSON and YAML serialization formats:
+
+```python
+# Serialize a model to JSON format
+json_string = model.to_json()
+
+# Recreate the model (freshly initialized)
+fresh_model = keras.models.from_json(json_string)
+
+# Serializes a model to YAML format
+yaml_string = model.to_yaml()
+
+# Recreate the model
+fresh_model = keras.models.from_yaml(yaml_string)
+```
+
+Caution: Subclassed models are not serializable because their architecture is
+defined by the Python code in the body of the `call` method.
+
+
+### Entire model
+
+The entire model can be saved to a file that contains the weight values, the
+model's configuration, and even the optimizer's configuration. This allows you
+to checkpoint a model and resume training later—from the exact same
+state—without access to the original code.
+
+```python
+# Create a trivial model
+model = keras.Sequential([
+  keras.layers.Dense(10, activation='softmax', input_shape=(32,)),
+  keras.layers.Dense(10, activation='softmax')
+])
+model.compile(optimizer='rmsprop',
+              loss='categorical_crossentropy',
+              metrics=['accuracy'])
+model.fit(data, targets, batch_size=32, epochs=5)
+
+
+# Save entire model to a HDF5 file
+model.save('my_model.h5')
+
+# Recreate the exact same model, including weights and optimizer.
+model = keras.models.load_model('my_model.h5')
+```
+
+
+## Eager execution
+
+[Eager execution](./eager.md) is an imperative programming
+environment that evaluates operations immediately. This is not required for
+Keras, but is supported by `tf.keras` and useful for inspecting your program and
+debugging.
+
+All of the `tf.keras` model-building APIs are compatible with eager execution.
+And while the `Sequential` and functional APIs can be used, eager execution
+especially benefits *model subclassing* and building *custom layers*—the APIs
+that require you to write the forward pass as code (instead of the APIs that
+create models by assembling existing layers).
+
+See the [eager execution guide](./eager.md#build_a_model) for
+examples of using Keras models with custom training loops and `tf.GradientTape`.
+
+
+## Distribution
+
+### Estimators
+
+The [Estimators](./estimators.md) API is used for training models
+for distributed environments. This targets industry use cases such as
+distributed training on large datasets that can export a model for production.
+
+A `tf.keras.Model` can be trained with the `tf.estimator` API by converting the
+model to an `tf.estimator.Estimator` object with
+`tf.keras.estimator.model_to_estimator`. See
+[Creating Estimators from Keras models](./estimators.md#creating_estimators_from_keras_models).
+
+```python
+model = keras.Sequential([layers.Dense(10,activation='softmax'),
+                          layers.Dense(10,activation='softmax')])
+
+model.compile(optimizer=tf.train.RMSPropOptimizer(0.001),
+              loss='categorical_crossentropy',
+              metrics=['accuracy'])
+
+estimator = keras.estimator.model_to_estimator(model)
+```
+
+Note: Enable [eager execution](./eager.md) for debugging
+[Estimator input functions](./premade_estimators.md#create_input_functions)
+and inspecting data.
+
+### Multiple GPUs
+
+`tf.keras` models can run on multiple GPUs using
+`tf.contrib.distribute.DistributionStrategy`. This API provides distributed
+training on multiple GPUs with almost no changes to existing code.
+
+Currently, `tf.contrib.distribute.MirroredStrategy` is the only supported
+distribution strategy. `MirroredStrategy` does in-graph replication with
+synchronous training using all-reduce on a single machine. To use
+`DistributionStrategy` with Keras, convert the `tf.keras.Model` to a
+`tf.estimator.Estimator` with `tf.keras.estimator.model_to_estimator`, then
+train the estimator
+
+The following example distributes a `tf.keras.Model` across multiple GPUs on a
+single machine.
+
+First, define a simple model:
+
+```python
+model = keras.Sequential()
+model.add(keras.layers.Dense(16, activation='relu', input_shape=(10,)))
+model.add(keras.layers.Dense(1, activation='sigmoid'))
+
+optimizer = tf.train.GradientDescentOptimizer(0.2)
+
+model.compile(loss='binary_crossentropy', optimizer=optimizer)
+model.summary()
+```
+
+Convert the Keras model to a `tf.estimator.Estimator` instance:
+
+```python
+keras_estimator = keras.estimator.model_to_estimator(
+  keras_model=model,
+  config=config,
+  model_dir='/tmp/model_dir')
+```
+
+Define an *input pipeline*. The `input_fn` returns a `tf.data.Dataset` object
+used to distribute the data across multiple devices—with each device processing
+a slice of the input batch.
+
+```python
+def input_fn():
+  x = np.random.random((1024, 10))
+  y = np.random.randint(2, size=(1024, 1))
+  x = tf.cast(x, tf.float32)
+  dataset = tf.data.Dataset.from_tensor_slices((x, y))
+  dataset = dataset.repeat(10)
+  dataset = dataset.batch(32)
+  return dataset
+```
+
+Next, create a `tf.estimator.RunConfig` and set the `train_distribute` argument
+to the `tf.contrib.distribute.MirroredStrategy` instance. When creating
+`MirroredStrategy`, you can specify a list of devices or set the `num_gpus`
+argument. The default uses all available GPUs, like the following:
+
+```python
+strategy = tf.contrib.distribute.MirroredStrategy()
+config = tf.estimator.RunConfig(train_distribute=strategy)
+```
+
+Finally, train the `Estimator` instance by providing the `input_fn` and `steps`
+arguments:
+
+```python
+keras_estimator.train(input_fn=input_fn, steps=10)
+```
diff --git a/tensorflow/docs_src/guide/leftnav_files b/tensorflow/docs_src/guide/leftnav_files
new file mode 100644
index 0000000000..357a2a1cb9
--- /dev/null
+++ b/tensorflow/docs_src/guide/leftnav_files
@@ -0,0 +1,40 @@
+index.md
+
+### High Level APIs
+keras.md
+eager.md
+datasets.md
+
+### Estimators
+estimators.md: Introduction to Estimators
+premade_estimators.md
+custom_estimators.md
+feature_columns.md
+datasets_for_estimators.md
+checkpoints.md
+
+### Accelerators
+using_gpu.md
+using_tpu.md
+
+### Low Level APIs
+low_level_intro.md
+tensors.md
+variables.md
+graphs.md
+saved_model.md
+
+### ML Concepts
+embedding.md
+
+### Debugging
+debugger.md
+
+### TensorBoard
+summaries_and_tensorboard.md: Visualizing Learning
+graph_viz.md: Graphs
+tensorboard_histograms.md: Histograms
+
+### Misc
+version_compat.md
+faq.md
diff --git a/tensorflow/docs_src/guide/low_level_intro.md b/tensorflow/docs_src/guide/low_level_intro.md
new file mode 100644
index 0000000000..665a5568b4
--- /dev/null
+++ b/tensorflow/docs_src/guide/low_level_intro.md
@@ -0,0 +1,604 @@
+# Introduction
+
+This guide gets you started programming in the low-level TensorFlow APIs
+(TensorFlow Core), showing you how to:
+
+  * Manage your own TensorFlow program (a `tf.Graph`) and TensorFlow
+    runtime (a `tf.Session`), instead of relying on Estimators to manage them.
+  * Run TensorFlow operations, using a `tf.Session`.
+  * Use high level components ([datasets](#datasets), [layers](#layers), and
+    [feature_columns](#feature_columns)) in this low level environment.
+  * Build your own training loop, instead of using the one
+    @{$premade_estimators$provided by Estimators}.
+
+We recommend using the higher level APIs to build models when possible.
+Knowing TensorFlow Core is valuable for the following reasons:
+
+  * Experimentation and debugging are both more straight forward
+    when you can use low level TensorFlow operations directly.
+  * It gives you a mental model of how things work internally when
+    using the higher level APIs.
+
+## Setup
+
+Before using this guide, @{$install$install TensorFlow}.
+
+To get the most out of this guide, you should know the following:
+
+*   How to program in Python.
+*   At least a little bit about arrays.
+*   Ideally, something about machine learning.
+
+Feel free to launch `python` and follow along with this walkthrough.
+Run the following lines to set up your Python environment:
+
+```python
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import tensorflow as tf
+```
+
+## Tensor Values
+
+The central unit of data in TensorFlow is the **tensor**. A tensor consists of a
+set of primitive values shaped into an array of any number of dimensions. A
+tensor's **rank** is its number of dimensions, while its **shape** is a tuple
+of integers specifying the array's length along each dimension. Here are some
+examples of tensor values:
+
+```python
+3. # a rank 0 tensor; a scalar with shape [],
+[1., 2., 3.] # a rank 1 tensor; a vector with shape [3]
+[[1., 2., 3.], [4., 5., 6.]] # a rank 2 tensor; a matrix with shape [2, 3]
+[[[1., 2., 3.]], [[7., 8., 9.]]] # a rank 3 tensor with shape [2, 1, 3]
+```
+
+TensorFlow uses numpy arrays to represent tensor **values**.
+
+## TensorFlow Core Walkthrough
+
+You might think of TensorFlow Core programs as consisting of two discrete
+sections:
+
+1.  Building the computational graph (a @{tf.Graph}).
+2.  Running the computational graph (using a @{tf.Session}).
+
+### Graph
+
+A **computational graph** is a series of TensorFlow operations arranged into a
+graph. The graph is composed of two types of objects.
+
+  * @{tf.Operation$Operations} (or "ops"): The nodes of the graph.
+    Operations describe calculations that consume and produce tensors.
+  * @{tf.Tensor$Tensors}: The edges in the graph. These represent the values
+    that will flow through the graph. Most TensorFlow functions return
+    `tf.Tensors`.
+
+Important: `tf.Tensors` do not have values, they are just handles to elements
+in the computation graph.
+
+Let's build a simple computational graph. The most basic operation is a
+constant. The Python function that builds the operation takes a tensor value as
+input. The resulting operation takes no inputs. When run, it outputs the
+value that was passed to the constructor. We can create two floating point
+constants `a` and `b` as follows:
+
+```python
+a = tf.constant(3.0, dtype=tf.float32)
+b = tf.constant(4.0) # also tf.float32 implicitly
+total = a + b
+print(a)
+print(b)
+print(total)
+```
+
+The print statements produce:
+
+```
+Tensor("Const:0", shape=(), dtype=float32)
+Tensor("Const_1:0", shape=(), dtype=float32)
+Tensor("add:0", shape=(), dtype=float32)
+```
+
+Notice that printing the tensors does not output the values `3.0`, `4.0`, and
+`7.0` as you might expect. The above statements only build the computation
+graph. These `tf.Tensor` objects just represent the results of the operations
+that will be run.
+
+Each operation in a graph is given a unique name. This name is independent of
+the names the objects are assigned to in Python. Tensors are named after the
+operation that produces them followed by an output index, as in
+`"add:0"` above.
+
+### TensorBoard
+
+TensorFlow provides a utility called TensorBoard. One of TensorBoard's many
+capabilities is visualizing a computation graph. You can easily do this with
+a few simple commands.
+
+First you save the computation graph to a TensorBoard summary file as
+follows:
+
+```
+writer = tf.summary.FileWriter('.')
+writer.add_graph(tf.get_default_graph())
+```
+
+This will produce an `event` file in the current directory with a name in the
+following format:
+
+```
+events.out.tfevents.{timestamp}.{hostname}
+```
+
+Now, in a new terminal, launch TensorBoard with the following shell command:
+
+```bsh
+tensorboard --logdir .
+```
+
+Then open TensorBoard's [graphs page](http://localhost:6006/#graphs) in your
+browser, and you should see a graph similar to the following:
+
+![TensorBoard screenshot](https://www.tensorflow.org/images/getting_started_add.png)
+
+For more about TensorBoard's graph visualization tools see @{$graph_viz}.
+
+### Session
+
+To evaluate tensors, instantiate a @{tf.Session} object, informally known as a
+**session**. A session encapsulates the state of the TensorFlow runtime, and
+runs TensorFlow operations. If a `tf.Graph` is like a `.py` file, a `tf.Session`
+is like the `python` executable.
+
+The following code creates a `tf.Session` object and then invokes its `run`
+method to evaluate the `total` tensor we created above:
+
+```python
+sess = tf.Session()
+print(sess.run(total))
+```
+
+When you request the output of a node with `Session.run` TensorFlow backtracks
+through the graph and runs all the nodes that provide input to the requested
+output node. So this prints the expected value of 7.0:
+
+```
+7.0
+```
+
+You can pass multiple tensors to `tf.Session.run`. The `run` method
+transparently handles any combination of tuples or dictionaries, as in the
+following example:
+
+```python
+print(sess.run({'ab':(a, b), 'total':total}))
+```
+
+which returns the results in a structure of the same layout:
+
+``` None
+{'total': 7.0, 'ab': (3.0, 4.0)}
+```
+
+During a call to `tf.Session.run` any `tf.Tensor` only has a single value.
+For example, the following code calls `tf.random_uniform` to produce a
+`tf.Tensor` that generates a random 3-element vector (with values in `[0,1)`):
+
+```python
+vec = tf.random_uniform(shape=(3,))
+out1 = vec + 1
+out2 = vec + 2
+print(sess.run(vec))
+print(sess.run(vec))
+print(sess.run((out1, out2)))
+```
+
+The result shows a different random value on each call to `run`, but
+a consistent value during a single `run` (`out1` and `out2` receive the same
+random input):
+
+```
+[ 0.52917576  0.64076328  0.68353939]
+[ 0.66192627  0.89126778  0.06254101]
+(
+  array([ 1.88408756,  1.87149239,  1.84057522], dtype=float32),
+  array([ 2.88408756,  2.87149239,  2.84057522], dtype=float32)
+)
+```
+
+Some TensorFlow functions return `tf.Operations` instead of `tf.Tensors`.
+The result of calling `run` on an Operation is `None`. You run an operation
+to cause a side-effect, not to retrieve a value. Examples of this include the
+[initialization](#Initializing Layers), and [training](#Training) ops
+demonstrated later.
+
+### Feeding
+
+As it stands, this graph is not especially interesting because it always
+produces a constant result. A graph can be parameterized to accept external
+inputs, known as **placeholders**. A **placeholder** is a promise to provide a
+value later, like a function argument.
+
+```python
+x = tf.placeholder(tf.float32)
+y = tf.placeholder(tf.float32)
+z = x + y
+```
+
+The preceding three lines are a bit like a function in which we
+define two input parameters (`x` and `y`) and then an operation on them. We can
+evaluate this graph with multiple inputs by using the `feed_dict` argument of
+the @{tf.Session.run$run method} to feed concrete values to the placeholders:
+
+```python
+print(sess.run(z, feed_dict={x: 3, y: 4.5}))
+print(sess.run(z, feed_dict={x: [1, 3], y: [2, 4]}))
+```
+This results in the following output:
+
+```
+7.5
+[ 3.  7.]
+```
+
+Also note that the `feed_dict` argument can be used to overwrite any tensor in
+the graph. The only difference between placeholders and other `tf.Tensors` is
+that placeholders throw an error if no value is fed to them.
+
+## Datasets
+
+Placeholders work for simple experiments, but @{tf.data$Datasets} are the
+preferred method of streaming data into a model.
+
+To get a runnable `tf.Tensor` from a Dataset you must first convert it to a
+@{tf.data.Iterator}, and then call the Iterator's
+@{tf.data.Iterator.get_next$`get_next`} method.
+
+The simplest way to create an Iterator is with the
+@{tf.data.Dataset.make_one_shot_iterator$`make_one_shot_iterator`} method.
+For example, in the following code the `next_item` tensor will return a row from
+the `my_data` array on each `run` call:
+
+``` python
+my_data = [
+    [0, 1,],
+    [2, 3,],
+    [4, 5,],
+    [6, 7,],
+]
+slices = tf.data.Dataset.from_tensor_slices(my_data)
+next_item = slices.make_one_shot_iterator().get_next()
+```
+
+Reaching the end of the data stream causes `Dataset` to throw an
+@{tf.errors.OutOfRangeError$`OutOfRangeError`}. For example, the following code
+reads the `next_item` until there is no more data to read:
+
+``` python
+while True:
+  try:
+    print(sess.run(next_item))
+  except tf.errors.OutOfRangeError:
+    break
+```
+
+If the `Dataset` depends on stateful operations you may need to
+initialize the iterator before using it, as shown below:
+
+``` python
+r = tf.random_normal([10,3])
+dataset = tf.data.Dataset.from_tensor_slices(r)
+iterator = dataset.make_initializable_iterator()
+next_row = iterator.get_next()
+
+sess.run(iterator.initializer)
+while True:
+  try:
+    print(sess.run(next_row))
+  except tf.errors.OutOfRangeError:
+    break
+```
+
+For more details on Datasets and Iterators see: @{$guide/datasets}.
+
+## Layers
+
+A trainable model must modify the values in the graph to get new outputs with
+the same input.  @{tf.layers$Layers} are the preferred way to add trainable
+parameters to a graph.
+
+Layers package together both the variables and the operations that act
+on them. For example a
+[densely-connected layer](https://developers.google.com/machine-learning/glossary/#fully_connected_layer)
+performs a weighted sum across all inputs
+for each output and applies an optional
+[activation function](https://developers.google.com/machine-learning/glossary/#activation_function).
+The connection weights and biases are managed by the layer object.
+
+### Creating Layers
+
+The following code creates a @{tf.layers.Dense$`Dense`} layer that takes a
+batch of input vectors, and produces a single output value for each. To apply a
+layer to an input, call the layer as if it were a function. For example:
+
+```python
+x = tf.placeholder(tf.float32, shape=[None, 3])
+linear_model = tf.layers.Dense(units=1)
+y = linear_model(x)
+```
+
+The layer inspects its input to determine sizes for its internal variables. So
+here we must set the shape of the `x` placeholder so that the layer can
+build a weight matrix of the correct size.
+
+Now that we have defined the calculation of the output, `y`, there is one more
+detail we need to take care of before we run the calculation.
+
+### Initializing Layers
+
+The layer contains variables that must be **initialized** before they can be
+used. While it is possible to initialize variables individually, you can easily
+initialize all the variables in a TensorFlow graph as follows:
+
+```python
+init = tf.global_variables_initializer()
+sess.run(init)
+```
+
+Important: Calling `tf.global_variables_initializer` only
+creates and returns a handle to a TensorFlow operation. That op
+will initialize all the global variables when we run it with `tf.Session.run`.
+
+Also note that this `global_variables_initializer` only initializes variables
+that existed in the graph when the  initializer was created. So the initializer
+should be one of the last things added during graph construction.
+
+### Executing Layers
+
+Now that the layer is initialized, we can evaluate the `linear_model`'s output
+tensor as we would any other tensor. For example, the following code:
+
+```python
+print(sess.run(y, {x: [[1, 2, 3],[4, 5, 6]]}))
+```
+
+will generate a two-element output vector such as the following:
+
+```
+[[-3.41378999]
+ [-9.14999008]]
+```
+
+### Layer Function shortcuts
+
+For each layer class (like @{tf.layers.Dense}) TensorFlow also supplies a
+shortcut function (like @{tf.layers.dense}). The only difference is that the
+shortcut function versions create and run the layer in a single call. For
+example, the following code is equivalent to the earlier version:
+
+```python
+x = tf.placeholder(tf.float32, shape=[None, 3])
+y = tf.layers.dense(x, units=1)
+
+init = tf.global_variables_initializer()
+sess.run(init)
+
+print(sess.run(y, {x: [[1, 2, 3], [4, 5, 6]]}))
+```
+
+While convenient, this approach allows no access to the @{tf.layers.Layer}
+object. This makes introspection and debugging more difficult,
+and layer reuse impossible.
+
+## Feature columns
+
+The easiest way to experiment with feature columns is using the
+@{tf.feature_column.input_layer} function. This function only accepts
+@{$feature_columns$dense columns} as inputs, so to view the result
+of a categorical column you must wrap it in an
+@{tf.feature_column.indicator_column}. For example:
+
+``` python
+features = {
+    'sales' : [[5], [10], [8], [9]],
+    'department': ['sports', 'sports', 'gardening', 'gardening']}
+
+department_column = tf.feature_column.categorical_column_with_vocabulary_list(
+        'department', ['sports', 'gardening'])
+department_column = tf.feature_column.indicator_column(department_column)
+
+columns = [
+    tf.feature_column.numeric_column('sales'),
+    department_column
+]
+
+inputs = tf.feature_column.input_layer(features, columns)
+```
+
+Running the `inputs` tensor will parse the `features` into a batch of vectors.
+
+Feature columns can have internal state, like layers, so they often need to be
+initialized. Categorical columns use @{tf.contrib.lookup$lookup tables}
+internally and these require a separate initialization op,
+@{tf.tables_initializer}.
+
+``` python
+var_init = tf.global_variables_initializer()
+table_init = tf.tables_initializer()
+sess = tf.Session()
+sess.run((var_init, table_init))
+```
+
+Once the internal state has been initialized you can run `inputs` like any
+other `tf.Tensor`:
+
+```python
+print(sess.run(inputs))
+```
+
+This shows how the feature columns have packed the input vectors, with the
+one-hot "department" as the first two indices and "sales" as the third.
+
+```None
+[[  1.   0.   5.]
+ [  1.   0.  10.]
+ [  0.   1.   8.]
+ [  0.   1.   9.]]
+```
+
+## Training
+
+Now that you're familiar with the basics of core TensorFlow, let's train a
+small regression model manually.
+
+### Define the data
+
+First let's define some inputs, `x`, and the expected output for each input,
+`y_true`:
+
+```python
+x = tf.constant([[1], [2], [3], [4]], dtype=tf.float32)
+y_true = tf.constant([[0], [-1], [-2], [-3]], dtype=tf.float32)
+```
+
+### Define the model
+
+Next, build a simple linear model, with 1 output:
+
+``` python
+linear_model = tf.layers.Dense(units=1)
+
+y_pred = linear_model(x)
+```
+
+You can evaluate the predictions as follows:
+
+``` python
+sess = tf.Session()
+init = tf.global_variables_initializer()
+sess.run(init)
+
+print(sess.run(y_pred))
+```
+
+The model hasn't yet been trained, so the four "predicted" values aren't very
+good. Here's what we got; your own output will almost certainly differ:
+
+``` None
+[[ 0.02631879]
+ [ 0.05263758]
+ [ 0.07895637]
+ [ 0.10527515]]
+```
+
+### Loss
+
+To optimize a model, you first need to define the loss. We'll use the mean
+square error, a standard loss for regression problems.
+
+While you could do this manually with lower level math operations,
+the @{tf.losses} module provides a set of common loss functions. You can use it
+to calculate the mean square error as follows:
+
+``` python
+loss = tf.losses.mean_squared_error(labels=y_true, predictions=y_pred)
+
+print(sess.run(loss))
+```
+This will produce a loss value, something like:
+
+``` None
+2.23962
+```
+
+### Training
+
+TensorFlow provides
+[**optimizers**](https://developers.google.com/machine-learning/glossary/#optimizer)
+implementing standard optimization algorithms. These are implemented as
+sub-classes of @{tf.train.Optimizer}. They incrementally change each
+variable in order to minimize the loss. The simplest optimization algorithm is
+[**gradient descent**](https://developers.google.com/machine-learning/glossary/#gradient_descent),
+implemented by @{tf.train.GradientDescentOptimizer}. It modifies each
+variable according to the magnitude of the derivative of loss with respect to
+that variable. For example:
+
+```python
+optimizer = tf.train.GradientDescentOptimizer(0.01)
+train = optimizer.minimize(loss)
+```
+
+This code builds all the graph components necessary for the optimization, and
+returns a training operation. When run, the training op will update variables
+in the graph. You might run it as follows:
+
+```python
+for i in range(100):
+  _, loss_value = sess.run((train, loss))
+  print(loss_value)
+```
+
+Since `train` is an op, not a tensor, it doesn't return a value when run.
+To see the progression of the loss during training, we run the loss tensor at
+the same time, producing output like the following:
+
+``` None
+1.35659
+1.00412
+0.759167
+0.588829
+0.470264
+0.387626
+0.329918
+0.289511
+0.261112
+0.241046
+...
+```
+
+### Complete program
+
+```python
+x = tf.constant([[1], [2], [3], [4]], dtype=tf.float32)
+y_true = tf.constant([[0], [-1], [-2], [-3]], dtype=tf.float32)
+
+linear_model = tf.layers.Dense(units=1)
+
+y_pred = linear_model(x)
+loss = tf.losses.mean_squared_error(labels=y_true, predictions=y_pred)
+
+optimizer = tf.train.GradientDescentOptimizer(0.01)
+train = optimizer.minimize(loss)
+
+init = tf.global_variables_initializer()
+
+sess = tf.Session()
+sess.run(init)
+for i in range(100):
+  _, loss_value = sess.run((train, loss))
+  print(loss_value)
+
+print(sess.run(y_pred))
+```
+
+## Next steps
+
+To learn more about building models with TensorFlow consider the following:
+
+* @{$custom_estimators$Custom Estimators}, to learn how to build
+  customized models with TensorFlow. Your knowledge of TensorFlow Core will
+  help you understand and debug your own models.
+
+If you want to learn more about the inner workings of TensorFlow consider the
+following documents, which go into more depth on many of the topics discussed
+here:
+
+* @{$graphs}
+* @{$tensors}
+* @{$variables}
+
+
diff --git a/tensorflow/docs_src/guide/premade_estimators.md b/tensorflow/docs_src/guide/premade_estimators.md
new file mode 100644
index 0000000000..3e910c1fe2
--- /dev/null
+++ b/tensorflow/docs_src/guide/premade_estimators.md
@@ -0,0 +1,430 @@
+# Premade Estimators
+
+This document introduces the TensorFlow programming environment and shows you
+how to solve the Iris classification problem in TensorFlow.
+
+## Prerequisites
+
+Prior to using the sample code in this document, you'll need to do the
+following:
+
+* @{$install$Install TensorFlow}.
+* If you installed TensorFlow with virtualenv or Anaconda, activate your
+  TensorFlow environment.
+* Install or upgrade pandas by issuing the following command:
+
+        pip install pandas
+
+## Getting the sample code
+
+Take the following steps to get the sample code we'll be going through:
+
+1. Clone the TensorFlow Models repository from GitHub by entering the following
+   command:
+
+        git clone https://github.com/tensorflow/models
+
+1. Change directory within that branch to the location containing the examples
+   used in this document:
+
+        cd models/samples/core/get_started/
+
+The program described in this document is
+[`premade_estimator.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/premade_estimator.py).
+This program uses
+[`iris_data.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/iris_data.py)
+to fetch its training data.
+
+### Running the program
+
+You run TensorFlow programs as you would run any Python program. For example:
+
+``` bsh
+python premade_estimator.py
+```
+
+The program should output training logs followed by some predictions against
+the test set. For example, the first line in the following output shows that
+the model thinks there is a 99.6% chance that the first example in the test
+set is a Setosa. Since the test set expected Setosa, this appears to be
+a good prediction.
+
+``` None
+...
+Prediction is "Setosa" (99.6%), expected "Setosa"
+
+Prediction is "Versicolor" (99.8%), expected "Versicolor"
+
+Prediction is "Virginica" (97.9%), expected "Virginica"
+```
+
+If the program generates errors instead of answers, ask yourself the following
+questions:
+
+* Did you install TensorFlow properly?
+* Are you using the correct version of TensorFlow?
+* Did you activate the environment you installed TensorFlow in? (This is
+  only relevant in certain installation mechanisms.)
+
+## The programming stack
+
+Before getting into the details of the program itself, let's investigate the
+programming environment. As the following illustration shows, TensorFlow
+provides a programming stack consisting of multiple API layers:
+
+<div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/tensorflow_programming_environment.png">
+</div>
+
+We strongly recommend writing TensorFlow programs with the following APIs:
+
+* @{$guide/estimators$Estimators}, which represent a complete model.
+  The Estimator API provides methods to train the model, to judge the model's
+  accuracy, and to generate predictions.
+* @{$guide/datasets_for_estimators}, which build a data input
+  pipeline. The Dataset API has methods to load and manipulate data, and feed
+  it into your model. The Dataset API meshes well with the Estimators API.
+
+## Classifying irises: an overview
+
+The sample program in this document builds and tests a model that
+classifies Iris flowers into three different species based on the size of their
+[sepals](https://en.wikipedia.org/wiki/Sepal) and
+[petals](https://en.wikipedia.org/wiki/Petal).
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%"
+  alt="Petal geometry compared for three iris species: Iris setosa, Iris virginica, and Iris versicolor"
+  src="../images/iris_three_species.jpg">
+</div>
+
+**From left to right,
+[*Iris setosa*](https://commons.wikimedia.org/w/index.php?curid=170298) (by
+[Radomil](https://commons.wikimedia.org/wiki/User:Radomil), CC BY-SA 3.0),
+[*Iris versicolor*](https://commons.wikimedia.org/w/index.php?curid=248095) (by
+[Dlanglois](https://commons.wikimedia.org/wiki/User:Dlanglois), CC BY-SA 3.0),
+and [*Iris virginica*](https://www.flickr.com/photos/33397993@N05/3352169862)
+(by [Frank Mayfield](https://www.flickr.com/photos/33397993@N05), CC BY-SA
+2.0).**
+
+### The data set
+
+The Iris data set contains four features and one
+[label](https://developers.google.com/machine-learning/glossary/#label).
+The four features identify the following botanical characteristics of
+individual Iris flowers:
+
+* sepal length
+* sepal width
+* petal length
+* petal width
+
+Our model will represent these features as `float32` numerical data.
+
+The label identifies the Iris species, which must be one of the following:
+
+* Iris setosa (0)
+* Iris versicolor (1)
+* Iris virginica (2)
+
+Our model will represent the label as `int32` categorical data.
+
+The following table shows three examples in the data set:
+
+|sepal length | sepal width | petal length | petal width| species (label) |
+|------------:|------------:|-------------:|-----------:|:---------------:|
+|         5.1 |         3.3 |          1.7 |        0.5 |   0 (Setosa)   |
+|         5.0 |         2.3 |          3.3 |        1.0 |   1 (versicolor)|
+|         6.4 |         2.8 |          5.6 |        2.2 |   2 (virginica) |
+
+### The algorithm
+
+The program trains a Deep Neural Network classifier model having the following
+topology:
+
+* 2 hidden layers.
+* Each hidden layer contains 10 nodes.
+
+The following figure illustrates the features, hidden layers, and predictions
+(not all of the nodes in the hidden layers are shown):
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%"
+  alt="A diagram of the network architecture: Inputs, 2 hidden layers, and outputs"
+  src="../images/custom_estimators/full_network.png">
+</div>
+
+### Inference
+
+Running the trained model on an unlabeled example yields three predictions,
+namely, the likelihood that this flower is the given Iris species. The sum of
+those output predictions will be 1.0. For example, the prediction on an
+unlabeled example might be something like the following:
+
+* 0.03 for Iris Setosa
+* 0.95 for Iris Versicolor
+* 0.02 for Iris Virginica
+
+The preceding prediction indicates a 95% probability that the given unlabeled
+example is an Iris Versicolor.
+
+## Overview of programming with Estimators
+
+An Estimator is TensorFlow's high-level representation of a complete model. It
+handles the details of initialization, logging, saving and restoring, and many
+other features so you can concentrate on your model. For more details see
+@{$guide/estimators}.
+
+An Estimator is any class derived from @{tf.estimator.Estimator}. TensorFlow
+provides a collection of
+@{tf.estimator$pre-made Estimators}
+(for example, `LinearRegressor`) to implement common ML algorithms. Beyond
+those, you may write your own
+@{$custom_estimators$custom Estimators}.
+We recommend using pre-made Estimators when just getting started.
+
+To write a TensorFlow program based on pre-made Estimators, you must perform the
+following tasks:
+
+* Create one or more input functions.
+* Define the model's feature columns.
+* Instantiate an Estimator, specifying the feature columns and various
+  hyperparameters.
+* Call one or more methods on the Estimator object, passing the appropriate
+  input function as the source of the data.
+
+Let's see how those tasks are implemented for Iris classification.
+
+## Create input functions
+
+You must create input functions to supply data for training,
+evaluating, and prediction.
+
+An **input function** is a function that returns a @{tf.data.Dataset} object
+which outputs the following two-element tuple:
+
+* [`features`](https://developers.google.com/machine-learning/glossary/#feature) - A Python dictionary in which:
+    * Each key is the name of a feature.
+    * Each value is an array containing all of that feature's values.
+* `label` - An array containing the values of the
+  [label](https://developers.google.com/machine-learning/glossary/#label) for
+  every example.
+
+Just to demonstrate the format of the input function, here's a simple
+implementation:
+
+```python
+def input_evaluation_set():
+    features = {'SepalLength': np.array([6.4, 5.0]),
+                'SepalWidth':  np.array([2.8, 2.3]),
+                'PetalLength': np.array([5.6, 3.3]),
+                'PetalWidth':  np.array([2.2, 1.0])}
+    labels = np.array([2, 1])
+    return features, labels
+```
+
+Your input function may generate the `features` dictionary and `label` list any
+way you like. However, we recommend using TensorFlow's Dataset API, which can
+parse all sorts of data. At a high level, the Dataset API consists of the
+following classes:
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%"
+  alt="A diagram showing subclasses of the Dataset class"
+  src="../images/dataset_classes.png">
+</div>
+
+Where the individual members are:
+
+* `Dataset` - Base class containing methods to create and transform
+  datasets. Also allows you to initialize a dataset from data in memory, or from
+  a Python generator.
+* `TextLineDataset` - Reads lines from text files.
+* `TFRecordDataset` - Reads records from TFRecord files.
+* `FixedLengthRecordDataset` - Reads fixed size records from binary files.
+* `Iterator` - Provides a way to access one data set element at a time.
+
+The Dataset API can handle a lot of common cases for you. For example,
+using the Dataset API, you can easily read in records from a large collection
+of files in parallel and join them into a single stream.
+
+To keep things simple in this example we are going to load the data with
+[pandas](https://pandas.pydata.org/), and build our input pipeline from this
+in-memory data.
+
+Here is the input function used for training in this program, which is available
+in [`iris_data.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/iris_data.py):
+
+``` python
+def train_input_fn(features, labels, batch_size):
+    """An input function for training"""
+    # Convert the inputs to a Dataset.
+    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
+
+    # Shuffle, repeat, and batch the examples.
+    return dataset.shuffle(1000).repeat().batch(batch_size)
+```
+
+## Define the feature columns
+
+A [**feature column**](https://developers.google.com/machine-learning/glossary/#feature_columns)
+is an object describing how the model should use raw input data from the
+features dictionary. When you build an Estimator model, you pass it a list of
+feature columns that describes each of the features you want the model to use.
+The @{tf.feature_column} module provides many options for representing data
+to the model.
+
+For Iris, the 4 raw features are numeric values, so we'll build a list of
+feature columns to tell the Estimator model to represent each of the four
+features as 32-bit floating-point values. Therefore, the code to create the
+feature column is:
+
+```python
+# Feature columns describe how to use the input.
+my_feature_columns = []
+for key in train_x.keys():
+    my_feature_columns.append(tf.feature_column.numeric_column(key=key))
+```
+
+Feature columns can be far more sophisticated than those we're showing here.  We
+detail feature columns @{$feature_columns$later on} in our Getting
+Started guide.
+
+Now that we have the description of how we want the model to represent the raw
+features, we can build the estimator.
+
+
+## Instantiate an estimator
+
+The Iris problem is a classic classification problem. Fortunately, TensorFlow
+provides several pre-made classifier Estimators, including:
+
+* @{tf.estimator.DNNClassifier} for deep models that perform multi-class
+  classification.
+* @{tf.estimator.DNNLinearCombinedClassifier} for wide & deep models.
+* @{tf.estimator.LinearClassifier} for classifiers based on linear models.
+
+For the Iris problem, `tf.estimator.DNNClassifier` seems like the best choice.
+Here's how we instantiated this Estimator:
+
+```python
+# Build a DNN with 2 hidden layers and 10 nodes in each hidden layer.
+classifier = tf.estimator.DNNClassifier(
+    feature_columns=my_feature_columns,
+    # Two hidden layers of 10 nodes each.
+    hidden_units=[10, 10],
+    # The model must choose between 3 classes.
+    n_classes=3)
+```
+
+## Train, Evaluate, and Predict
+
+Now that we have an Estimator object, we can call methods to do the following:
+
+* Train the model.
+* Evaluate the trained model.
+* Use the trained model to make predictions.
+
+### Train the model
+
+Train the model by calling the Estimator's `train` method as follows:
+
+```python
+# Train the Model.
+classifier.train(
+    input_fn=lambda:iris_data.train_input_fn(train_x, train_y, args.batch_size),
+    steps=args.train_steps)
+```
+
+Here we wrap up our `input_fn` call in a
+[`lambda`](https://docs.python.org/3/tutorial/controlflow.html)
+to capture the arguments while providing an input function that takes no
+arguments, as expected by the Estimator. The `steps` argument tells the method
+to stop training after a number of training steps.
+
+### Evaluate the trained model
+
+Now that the model has been trained, we can get some statistics on its
+performance. The following code block evaluates the accuracy of the trained
+model on the test data:
+
+```python
+# Evaluate the model.
+eval_result = classifier.evaluate(
+    input_fn=lambda:iris_data.eval_input_fn(test_x, test_y, args.batch_size))
+
+print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result))
+```
+
+Unlike our call to the `train` method, we did not pass the `steps`
+argument to evaluate. Our `eval_input_fn` only yields a single
+[epoch](https://developers.google.com/machine-learning/glossary/#epoch) of data.
+
+Running this code yields the following output (or something similar):
+
+```none
+Test set accuracy: 0.967
+```
+
+### Making predictions (inferring) from the trained model
+
+We now have a trained model that produces good evaluation results.
+We can now use the trained model to predict the species of an Iris flower
+based on some unlabeled measurements. As with training and evaluation, we make
+predictions using a single function call:
+
+```python
+# Generate predictions from the model
+expected = ['Setosa', 'Versicolor', 'Virginica']
+predict_x = {
+    'SepalLength': [5.1, 5.9, 6.9],
+    'SepalWidth': [3.3, 3.0, 3.1],
+    'PetalLength': [1.7, 4.2, 5.4],
+    'PetalWidth': [0.5, 1.5, 2.1],
+}
+
+predictions = classifier.predict(
+    input_fn=lambda:iris_data.eval_input_fn(predict_x,
+                                            batch_size=args.batch_size))
+```
+
+The `predict` method returns a Python iterable, yielding a dictionary of
+prediction results for each example. The following code prints a few
+predictions and their probabilities:
+
+
+``` python
+template = ('\nPrediction is "{}" ({:.1f}%), expected "{}"')
+
+for pred_dict, expec in zip(predictions, expected):
+    class_id = pred_dict['class_ids'][0]
+    probability = pred_dict['probabilities'][class_id]
+
+    print(template.format(iris_data.SPECIES[class_id],
+                          100 * probability, expec))
+```
+
+Running the preceding code yields the following output:
+
+``` None
+...
+Prediction is "Setosa" (99.6%), expected "Setosa"
+
+Prediction is "Versicolor" (99.8%), expected "Versicolor"
+
+Prediction is "Virginica" (97.9%), expected "Virginica"
+```
+
+
+## Summary
+
+Pre-made Estimators are an effective way to quickly create standard models.
+
+Now that you've gotten started writing TensorFlow programs, consider the
+following material:
+
+* @{$checkpoints$Checkpoints} to learn how to save and restore models.
+* @{$guide/datasets_for_estimators} to learn more about importing
+  data into your model.
+* @{$custom_estimators$Creating Custom Estimators} to learn how to
+  write your own Estimator, customized for a particular problem.
diff --git a/tensorflow/docs_src/guide/saved_model.md b/tensorflow/docs_src/guide/saved_model.md
new file mode 100644
index 0000000000..27ef7bb0da
--- /dev/null
+++ b/tensorflow/docs_src/guide/saved_model.md
@@ -0,0 +1,999 @@
+# Save and Restore
+
+The @{tf.train.Saver} class provides methods to save and restore models. The
+@{tf.saved_model.simple_save} function is an easy way to build a
+@{tf.saved_model$saved model} suitable for serving.
+[Estimators](@{$guide/estimators}) automatically save and restore
+variables in the `model_dir`.
+
+## Save and restore variables
+
+TensorFlow @{$variables} are the best way to represent shared, persistent state
+manipulated by your program. The `tf.train.Saver` constructor adds `save` and
+`restore` ops to the graph for all, or a specified list, of the variables in the
+graph.  The `Saver` object provides methods to run these ops, specifying paths
+for the checkpoint files to write to or read from.
+
+`Saver` restores all variables already defined in your model. If you're
+loading a model without knowing how to build its graph (for example, if you're
+writing a generic program to load models), then read the
+[Overview of saving and restoring models](#models) section
+later in this document.
+
+TensorFlow saves variables in binary *checkpoint files* that map variable
+names to tensor values.
+
+Caution: TensorFlow model files are code. Be careful with untrusted code.
+See [Using TensorFlow Securely](https://github.com/tensorflow/tensorflow/blob/master/SECURITY.md)
+for details.
+
+### Save variables
+
+Create a `Saver` with `tf.train.Saver()` to manage all variables in the
+model. For example, the following snippet demonstrates how to call the
+`tf.train.Saver.save` method to save variables to checkpoint files:
+
+```python
+# Create some variables.
+v1 = tf.get_variable("v1", shape=[3], initializer = tf.zeros_initializer)
+v2 = tf.get_variable("v2", shape=[5], initializer = tf.zeros_initializer)
+
+inc_v1 = v1.assign(v1+1)
+dec_v2 = v2.assign(v2-1)
+
+# Add an op to initialize the variables.
+init_op = tf.global_variables_initializer()
+
+# Add ops to save and restore all the variables.
+saver = tf.train.Saver()
+
+# Later, launch the model, initialize the variables, do some work, and save the
+# variables to disk.
+with tf.Session() as sess:
+  sess.run(init_op)
+  # Do some work with the model.
+  inc_v1.op.run()
+  dec_v2.op.run()
+  # Save the variables to disk.
+  save_path = saver.save(sess, "/tmp/model.ckpt")
+  print("Model saved in path: %s" % save_path)
+```
+
+### Restore variables
+
+The `tf.train.Saver` object not only saves variables to checkpoint files, it
+also restores variables. Note that when you restore variables you do not have
+to initialize them beforehand. For example, the following snippet demonstrates
+how to call the `tf.train.Saver.restore` method to restore variables from the
+checkpoint files:
+
+```python
+tf.reset_default_graph()
+
+# Create some variables.
+v1 = tf.get_variable("v1", shape=[3])
+v2 = tf.get_variable("v2", shape=[5])
+
+# Add ops to save and restore all the variables.
+saver = tf.train.Saver()
+
+# Later, launch the model, use the saver to restore variables from disk, and
+# do some work with the model.
+with tf.Session() as sess:
+  # Restore variables from disk.
+  saver.restore(sess, "/tmp/model.ckpt")
+  print("Model restored.")
+  # Check the values of the variables
+  print("v1 : %s" % v1.eval())
+  print("v2 : %s" % v2.eval())
+```
+
+Note: There is not a physical file called `/tmp/model.ckpt`. It is the *prefix* of
+filenames created for the checkpoint. Users only interact with the prefix
+instead of physical checkpoint files.
+
+### Choose variables to save and restore
+
+If you do not pass any arguments to `tf.train.Saver()`, the saver handles all
+variables in the graph.  Each variable is saved under the name that was passed
+when the variable was created.
+
+It is sometimes useful to explicitly specify names for variables in the
+checkpoint files.  For example, you may have trained a model with a variable
+named `"weights"` whose value you want to restore into a variable named
+`"params"`.
+
+It is also sometimes useful to only save or restore a subset of the variables
+used by a model.  For example, you may have trained a neural net with five
+layers, and you now want to train a new model with six layers that reuses the
+existing weights of the five trained layers. You can use the saver to restore
+the weights of just the first five layers.
+
+You can easily specify the names and variables to save or load by passing to the
+`tf.train.Saver()` constructor either of the following:
+
+* A list of variables (which will be stored under their own names).
+* A Python dictionary in which keys are the names to use and the values are the
+variables to manage.
+
+Continuing from the save/restore examples shown earlier:
+
+```python
+tf.reset_default_graph()
+# Create some variables.
+v1 = tf.get_variable("v1", [3], initializer = tf.zeros_initializer)
+v2 = tf.get_variable("v2", [5], initializer = tf.zeros_initializer)
+
+# Add ops to save and restore only `v2` using the name "v2"
+saver = tf.train.Saver({"v2": v2})
+
+# Use the saver object normally after that.
+with tf.Session() as sess:
+  # Initialize v1 since the saver will not.
+  v1.initializer.run()
+  saver.restore(sess, "/tmp/model.ckpt")
+
+  print("v1 : %s" % v1.eval())
+  print("v2 : %s" % v2.eval())
+```
+
+Notes:
+
+*  You can create as many `Saver` objects as you want if you need to save and
+   restore different subsets of the model variables.  The same variable can be
+   listed in multiple saver objects; its value is only changed when the
+   `Saver.restore()` method is run.
+
+*  If you only restore a subset of the model variables at the start of a
+   session, you have to run an initialize op for the other variables.  See
+   @{tf.variables_initializer} for more information.
+
+*  To inspect the variables in a checkpoint, you can use the
+   [`inspect_checkpoint`](https://www.tensorflow.org/code/tensorflow/python/tools/inspect_checkpoint.py)
+   library, particularly the `print_tensors_in_checkpoint_file` function.
+
+*  By default, `Saver` uses the value of the @{tf.Variable.name} property
+   for each variable.  However, when you create a `Saver` object, you may
+   optionally choose names for the variables in the checkpoint files.
+
+
+### Inspect variables in a checkpoint
+
+We can quickly inspect variables in a checkpoint with the
+[`inspect_checkpoint`](https://www.tensorflow.org/code/tensorflow/python/tools/inspect_checkpoint.py) library.
+
+Continuing from the save/restore examples shown earlier:
+
+```python
+# import the inspect_checkpoint library
+from tensorflow.python.tools import inspect_checkpoint as chkp
+
+# print all tensors in checkpoint file
+chkp.print_tensors_in_checkpoint_file("/tmp/model.ckpt", tensor_name='', all_tensors=True)
+
+# tensor_name:  v1
+# [ 1.  1.  1.]
+# tensor_name:  v2
+# [-1. -1. -1. -1. -1.]
+
+# print only tensor v1 in checkpoint file
+chkp.print_tensors_in_checkpoint_file("/tmp/model.ckpt", tensor_name='v1', all_tensors=False)
+
+# tensor_name:  v1
+# [ 1.  1.  1.]
+
+# print only tensor v2 in checkpoint file
+chkp.print_tensors_in_checkpoint_file("/tmp/model.ckpt", tensor_name='v2', all_tensors=False)
+
+# tensor_name:  v2
+# [-1. -1. -1. -1. -1.]
+```
+
+
+<a name="models"></a>
+## Save and restore models
+
+Use `SavedModel` to save and load your model—variables, the graph, and the
+graph's metadata. This is a language-neutral, recoverable, hermetic
+serialization format that enables higher-level systems and tools to produce,
+consume, and transform TensorFlow models. TensorFlow provides several ways to
+interact with `SavedModel`, including the @{tf.saved_model} APIs,
+@{tf.estimator.Estimator}, and a command-line interface.
+
+
+## Build and load a SavedModel
+
+### Simple save
+
+The easiest way to create a `SavedModel` is to use the @{tf.saved_model.simple_save}
+function:
+
+```python
+simple_save(session,
+            export_dir,
+            inputs={"x": x, "y": y},
+            outputs={"z": z})
+```
+
+This configures the `SavedModel` so it can be loaded by
+[TensorFlow serving](/serving/serving_basic) and supports the
+[Predict API](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/apis/predict.proto).
+To access the classify, regress, or multi-inference APIs, use the manual
+`SavedModel` builder APIs or an @{tf.estimator.Estimator}.
+
+### Manually build a SavedModel
+
+If your use case isn't covered by @{tf.saved_model.simple_save}, use the manual
+@{tf.saved_model.builder$builder APIs} to create a `SavedModel`.
+
+The @{tf.saved_model.builder.SavedModelBuilder} class provides functionality to
+save multiple `MetaGraphDef`s.  A **MetaGraph** is a dataflow graph, plus
+its associated variables, assets, and signatures.  A **`MetaGraphDef`**
+is the protocol buffer representation of a MetaGraph.  A **signature** is
+the set of inputs to and outputs from a graph.
+
+If assets need to be saved and written or copied to disk, they can be provided
+when the first `MetaGraphDef` is added. If multiple `MetaGraphDef`s are
+associated with an asset of the same name, only the first version is retained.
+
+Each `MetaGraphDef` added to the SavedModel must be annotated with
+user-specified tags. The tags provide a means to identify the specific
+`MetaGraphDef` to load and restore, along with the shared set of variables
+and assets. These tags
+typically annotate a `MetaGraphDef` with its functionality (for example,
+serving or training), and optionally with hardware-specific aspects (for
+example, GPU).
+
+For example, the following code suggests a typical way to use
+`SavedModelBuilder` to build a SavedModel:
+
+```python
+export_dir = ...
+...
+builder = tf.saved_model.builder.SavedModelBuilder(export_dir)
+with tf.Session(graph=tf.Graph()) as sess:
+  ...
+  builder.add_meta_graph_and_variables(sess,
+                                       [tag_constants.TRAINING],
+                                       signature_def_map=foo_signatures,
+                                       assets_collection=foo_assets,
+                                       strip_default_attrs=True)
+...
+# Add a second MetaGraphDef for inference.
+with tf.Session(graph=tf.Graph()) as sess:
+  ...
+  builder.add_meta_graph([tag_constants.SERVING], strip_default_attrs=True)
+...
+builder.save()
+```
+
+<a name="forward_compatibility"></a>
+#### Forward compatibility via `strip_default_attrs=True`
+
+Following the guidance below gives you forward compatibility only if the set of
+Ops has not changed.
+
+The @{tf.saved_model.builder.SavedModelBuilder$`SavedModelBuilder`} class allows
+users to control whether default-valued attributes must be stripped from the
+@{$extend/tool_developers#nodes$`NodeDefs`}
+while adding a meta graph to the SavedModel bundle. Both
+@{tf.saved_model.builder.SavedModelBuilder.add_meta_graph_and_variables$`SavedModelBuilder.add_meta_graph_and_variables`}
+and @{tf.saved_model.builder.SavedModelBuilder.add_meta_graph$`SavedModelBuilder.add_meta_graph`}
+methods accept a Boolean flag `strip_default_attrs` that controls this behavior.
+
+If `strip_default_attrs` is `False`, the exported @{tf.MetaGraphDef} will have
+the default valued attributes in all its @{tf.NodeDef} instances.
+This can break forward compatibility with a sequence of events such as the
+following:
+
+*  An existing Op (`Foo`) is updated to include a new attribute (`T`) with a
+   default (`bool`) at version 101.
+*  A model producer such as a "trainer binary" picks up this change (version 101)
+   to the `OpDef` and re-exports an existing model that uses Op `Foo`.
+*  A model consumer (such as [Tensorflow Serving](/serving)) running an older
+   binary (version 100) doesn't have attribute `T` for Op `Foo`, but tries to
+   import this model. The model consumer doesn't recognize attribute `T` in a
+   `NodeDef` that uses Op `Foo` and therefore fails to load the model.
+*  By setting `strip_default_attrs` to True, the model producers can strip away
+   any default valued attributes in the `NodeDefs`. This helps ensure that newly
+   added attributes with defaults don't cause older model consumers to fail
+   loading models regenerated with newer training binaries.
+
+See [compatibility guidance](./version_compat.md)
+for more information.
+
+### Loading a SavedModel in Python
+
+The Python version of the SavedModel
+@{tf.saved_model.loader$loader}
+provides load and restore capability for a SavedModel. The `load` operation
+requires the following information:
+
+* The session in which to restore the graph definition and variables.
+* The tags used to identify the MetaGraphDef to load.
+* The location (directory) of the SavedModel.
+
+Upon a load, the subset of variables, assets, and signatures supplied as part of
+the specific MetaGraphDef will be restored into the supplied session.
+
+
+```python
+export_dir = ...
+...
+with tf.Session(graph=tf.Graph()) as sess:
+  tf.saved_model.loader.load(sess, [tag_constants.TRAINING], export_dir)
+  ...
+```
+
+
+### Load a SavedModel in C++
+
+The C++ version of the SavedModel
+[loader](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/cc/saved_model/loader.h)
+provides an API to load a SavedModel from a path, while allowing
+`SessionOptions` and `RunOptions`.
+You have to specify the tags associated with the graph to be loaded.
+The loaded version of SavedModel is referred to as `SavedModelBundle`
+and contains the MetaGraphDef and the session within which it is loaded.
+
+```c++
+const string export_dir = ...
+SavedModelBundle bundle;
+...
+LoadSavedModel(session_options, run_options, export_dir, {kSavedModelTagTrain},
+               &bundle);
+```
+
+### Load and serve a SavedModel in TensorFlow serving
+
+You can easily load and serve a SavedModel with the TensorFlow Serving Model
+Server binary. See [instructions](https://www.tensorflow.org/serving/setup#installing_using_apt-get)
+on how to install the server, or build it if you wish.
+
+Once you have the Model Server, run it with:
+```
+tensorflow_model_server --port=port-numbers --model_name=your-model-name --model_base_path=your_model_base_path
+```
+Set the port and model_name flags to values of your choosing. The
+model_base_path flag expects to be to a base directory, with each version of
+your model residing in a numerically named subdirectory. If you only have a
+single version of your model, simply place it in a subdirectory like so:
+* Place the model in /tmp/model/0001
+* Set model_base_path to /tmp/model
+
+Store different versions of your model in numerically named subdirectories of a
+common base directory. For example, suppose the base directory is `/tmp/model`.
+If you have only one version of your model, store it in `/tmp/model/0001`. If
+you have two versions of your model, store the second version in
+`/tmp/model/0002`, and so on.  Set the `--model-base_path` flag to the base
+directory (`/tmp/model`, in this example).  TensorFlow Model Server will serve
+the model in the highest numbered subdirectory of that base directory.
+
+### Standard constants
+
+SavedModel offers the flexibility to build and load TensorFlow graphs for a
+variety of use-cases. For the most common use-cases, SavedModel's APIs
+provide a set of constants in Python and C++ that are easy to
+reuse and share across tools consistently.
+
+#### Standard MetaGraphDef tags
+
+You may use sets of tags to uniquely identify a `MetaGraphDef` saved in a
+SavedModel. A subset of commonly used tags is specified in:
+
+* [Python](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/tag_constants.py)
+* [C++](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/cc/saved_model/tag_constants.h)
+
+
+#### Standard SignatureDef constants
+
+A [**SignatureDef**](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/meta_graph.proto)
+is a protocol buffer that defines the signature of a computation
+supported by a graph.
+Commonly used input keys, output keys, and method names are
+defined in:
+
+* [Python](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/signature_constants.py)
+* [C++](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/cc/saved_model/signature_constants.h)
+
+## Using SavedModel with Estimators
+
+After training an `Estimator` model, you may want to create a service
+from that model that takes requests and returns a result.  You can run such a
+service locally on your machine or deploy it in the cloud.
+
+To prepare a trained Estimator for serving, you must export it in the standard
+SavedModel format. This section explains how to:
+
+* Specify the output nodes and the corresponding
+  [APIs](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/apis/prediction_service.proto)
+  that can be served (Classify, Regress, or Predict).
+* Export your model to the SavedModel format.
+* Serve the model from a local server and request predictions.
+
+
+### Prepare serving inputs
+
+During training, an @{$premade_estimators#input_fn$`input_fn()`} ingests data
+and prepares it for use by the model.  At serving time, similarly, a
+`serving_input_receiver_fn()` accepts inference requests and prepares them for
+the model.  This function has the following purposes:
+
+*  To add placeholders to the graph that the serving system will feed
+   with inference requests.
+*  To add any additional ops needed to convert data from the input format
+   into the feature `Tensor`s expected by the model.
+
+The function returns a @{tf.estimator.export.ServingInputReceiver} object,
+which packages the placeholders and the resulting feature `Tensor`s together.
+
+A typical pattern is that inference requests arrive in the form of serialized
+`tf.Example`s, so the `serving_input_receiver_fn()` creates a single string
+placeholder to receive them.  The `serving_input_receiver_fn()` is then also
+responsible for parsing the `tf.Example`s by adding a @{tf.parse_example} op to
+the graph.
+
+When writing such a `serving_input_receiver_fn()`, you must pass a parsing
+specification to @{tf.parse_example} to tell the parser what feature names to
+expect and how to map them to `Tensor`s. A parsing specification takes the
+form of a dict from feature names to @{tf.FixedLenFeature}, @{tf.VarLenFeature},
+and @{tf.SparseFeature}.  Note this parsing specification should not include
+any label or weight columns, since those will not be available at serving
+time&mdash;in contrast to a parsing specification used in the `input_fn()` at
+training time.
+
+In combination, then:
+
+```py
+feature_spec = {'foo': tf.FixedLenFeature(...),
+                'bar': tf.VarLenFeature(...)}
+
+def serving_input_receiver_fn():
+  """An input receiver that expects a serialized tf.Example."""
+  serialized_tf_example = tf.placeholder(dtype=tf.string,
+                                         shape=[default_batch_size],
+                                         name='input_example_tensor')
+  receiver_tensors = {'examples': serialized_tf_example}
+  features = tf.parse_example(serialized_tf_example, feature_spec)
+  return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)
+```
+
+The @{tf.estimator.export.build_parsing_serving_input_receiver_fn} utility
+function provides that input receiver for the common case.
+
+> Note: when training a model to be served using the Predict API with a local
+> server, the parsing step is not needed because the model will receive raw
+> feature data.
+
+Even if you require no parsing or other input processing&mdash;that is, if the
+serving system will feed feature `Tensor`s directly&mdash;you must still provide
+a `serving_input_receiver_fn()` that creates placeholders for the feature
+`Tensor`s and passes them through.  The
+@{tf.estimator.export.build_raw_serving_input_receiver_fn} utility provides for
+this.
+
+If these utilities do not meet your needs, you are free to write your own
+`serving_input_receiver_fn()`.  One case where this may be needed is if your
+training `input_fn()` incorporates some preprocessing logic that must be
+recapitulated at serving time.  To reduce the risk of training-serving skew, we
+recommend encapsulating such processing in a function which is then called
+from both `input_fn()` and `serving_input_receiver_fn()`.
+
+Note that the `serving_input_receiver_fn()` also determines the *input*
+portion of the signature.  That is, when writing a
+`serving_input_receiver_fn()`, you must tell the parser what signatures
+to expect and how to map them to your model's expected inputs.
+By contrast, the *output* portion of the signature is determined by the model.
+
+<a name="specify_outputs"></a>
+### Specify the outputs of a custom model
+
+When writing a custom `model_fn`, you must populate the `export_outputs` element
+of the @{tf.estimator.EstimatorSpec} return value. This is a dict of
+`{name: output}` describing the output signatures to be exported and used during
+serving.
+
+In the usual case of making a single prediction, this dict contains
+one element, and the `name` is immaterial.  In a multi-headed model, each head
+is represented by an entry in this dict.  In this case the `name` is a string
+of your choice that can be used to request a specific head at serving time.
+
+Each `output` value must be an `ExportOutput` object  such as
+@{tf.estimator.export.ClassificationOutput},
+@{tf.estimator.export.RegressionOutput}, or
+@{tf.estimator.export.PredictOutput}.
+
+These output types map straightforwardly to the
+[TensorFlow Serving APIs](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/apis/prediction_service.proto),
+and so determine which request types will be honored.
+
+Note: In the multi-headed case, a `SignatureDef` will be generated for each
+element of the `export_outputs` dict returned from the model_fn, named using
+the same keys.  These `SignatureDef`s differ only in their outputs, as
+provided by the corresponding `ExportOutput` entry.  The inputs are always
+those provided by the `serving_input_receiver_fn`.
+An inference request may specify the head by name.  One head must be named
+using [`signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY`](https://www.tensorflow.org/code/tensorflow/python/saved_model/signature_constants.py)
+indicating which `SignatureDef` will be served when an inference request
+does not specify one.
+
+<a name="perform_export"></a>
+### Perform the export
+
+To export your trained Estimator, call
+@{tf.estimator.Estimator.export_savedmodel} with the export base path and
+the `serving_input_receiver_fn`.
+
+```py
+estimator.export_savedmodel(export_dir_base, serving_input_receiver_fn,
+                            strip_default_attrs=True)
+```
+
+This method builds a new graph by first calling the
+`serving_input_receiver_fn()` to obtain feature `Tensor`s, and then calling
+this `Estimator`'s `model_fn()` to generate the model graph based on those
+features. It starts a fresh `Session`, and, by default, restores the most recent
+checkpoint into it.  (A different checkpoint may be passed, if needed.)
+Finally it creates a time-stamped export directory below the given
+`export_dir_base` (i.e., `export_dir_base/<timestamp>`), and writes a
+SavedModel into it containing a single `MetaGraphDef` saved from this
+Session.
+
+> Note: It is your responsibility to garbage-collect old exports.
+> Otherwise, successive exports will accumulate under `export_dir_base`.
+
+### Serve the exported model locally
+
+For local deployment, you can serve your model using
+[TensorFlow Serving](https://github.com/tensorflow/serving), an open-source project that loads a
+SavedModel and exposes it as a [gRPC](https://www.grpc.io/) service.
+
+First, [install TensorFlow Serving](https://github.com/tensorflow/serving).
+
+Then build and run the local model server, substituting `$export_dir_base` with
+the path to the SavedModel you exported above:
+
+```sh
+bazel build //tensorflow_serving/model_servers:tensorflow_model_server
+bazel-bin/tensorflow_serving/model_servers/tensorflow_model_server --port=9000 --model_base_path=$export_dir_base
+```
+
+Now you have a server listening for inference requests via gRPC on port 9000!
+
+
+### Request predictions from a local server
+
+The server responds to gRPC requests according to the
+[PredictionService](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/apis/prediction_service.proto#L15)
+gRPC API service definition.  (The nested protocol buffers are defined in
+various [neighboring files](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/apis)).
+
+From the API service definition, the gRPC framework generates client libraries
+in various languages providing remote access to the API.  In a project using the
+Bazel build tool, these libraries are built automatically and provided via
+dependencies like these (using Python for example):
+
+```build
+  deps = [
+    "//tensorflow_serving/apis:classification_proto_py_pb2",
+    "//tensorflow_serving/apis:regression_proto_py_pb2",
+    "//tensorflow_serving/apis:predict_proto_py_pb2",
+    "//tensorflow_serving/apis:prediction_service_proto_py_pb2"
+  ]
+```
+
+Python client code can then import the libraries thus:
+
+```py
+from tensorflow_serving.apis import classification_pb2
+from tensorflow_serving.apis import regression_pb2
+from tensorflow_serving.apis import predict_pb2
+from tensorflow_serving.apis import prediction_service_pb2
+```
+
+> Note: `prediction_service_pb2` defines the service as a whole and so
+> is always required.  However a typical client will need only one of
+> `classification_pb2`, `regression_pb2`, and `predict_pb2`, depending on the
+> type of requests being made.
+
+Sending a gRPC request is then accomplished by assembling a protocol buffer
+containing the request data and passing it to the service stub.  Note how the
+request protocol buffer is created empty and then populated via the
+[generated protocol buffer API](https://developers.google.com/protocol-buffers/docs/reference/python-generated).
+
+```py
+from grpc.beta import implementations
+
+channel = implementations.insecure_channel(host, int(port))
+stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
+
+request = classification_pb2.ClassificationRequest()
+example = request.input.example_list.examples.add()
+example.features.feature['x'].float_list.value.extend(image[0].astype(float))
+
+result = stub.Classify(request, 10.0)  # 10 secs timeout
+```
+
+The returned result in this example is a `ClassificationResponse` protocol
+buffer.
+
+This is a skeletal example; please see the @{$deploy$Tensorflow Serving}
+documentation and [examples](https://github.com/tensorflow/serving/tree/master/tensorflow_serving/example)
+for more details.
+
+> Note: `ClassificationRequest` and `RegressionRequest` contain a
+> `tensorflow.serving.Input` protocol buffer, which in turn contains a list of
+> `tensorflow.Example` protocol buffers.  `PredictRequest`, by contrast,
+> contains a mapping from feature names to values encoded via `TensorProto`.
+> Correspondingly: When using the `Classify` and `Regress` APIs, TensorFlow
+> Serving feeds serialized `tf.Example`s to the graph, so your
+> `serving_input_receiver_fn()` should include a `tf.parse_example()` Op.
+> When using the generic `Predict` API, however, TensorFlow Serving feeds raw
+> feature data to the graph, so a pass through `serving_input_receiver_fn()`
+> should be used.
+
+
+<!-- TODO(soergel): give examples of making requests against this server, using
+the different Tensorflow Serving APIs, selecting the signature by key, etc. -->
+
+<!-- TODO(soergel): document ExportStrategy here once Experiment moves
+from contrib to core. -->
+
+
+
+
+## CLI to inspect and execute SavedModel
+
+You can use the SavedModel Command Line Interface (CLI) to inspect and
+execute a SavedModel.
+For example, you can use the CLI to inspect the model's `SignatureDef`s.
+The CLI enables you to quickly confirm that the input
+@{$tensors$Tensor dtype and shape} match the model. Moreover, if you
+want to test your model, you can use the CLI to do a sanity check by
+passing in sample inputs in various formats (for example, Python
+expressions) and then fetching the output.
+
+
+### Install the SavedModel CLI
+
+Broadly speaking, you can install TensorFlow in either of the following
+two ways:
+
+*  By installing a pre-built TensorFlow binary.
+*  By building TensorFlow from source code.
+
+If you installed TensorFlow through a pre-built TensorFlow binary,
+then the SavedModel CLI is already installed on your system
+at pathname `bin\saved_model_cli`.
+
+If you built TensorFlow from source code, you must run the following
+additional command to build `saved_model_cli`:
+
+```
+$ bazel build tensorflow/python/tools:saved_model_cli
+```
+
+### Overview of commands
+
+The SavedModel CLI supports the following two commands on a
+`MetaGraphDef` in a SavedModel:
+
+* `show`, which shows a computation on a `MetaGraphDef` in a SavedModel.
+* `run`, which runs a computation on a `MetaGraphDef`.
+
+
+### `show` command
+
+A SavedModel contains one or more `MetaGraphDef`s, identified by their tag-sets.
+To serve a model, you
+might wonder what kind of `SignatureDef`s are in each model, and what are their
+inputs and outputs.  The `show` command let you examine the contents of the
+SavedModel in hierarchical order.  Here's the syntax:
+
+```
+usage: saved_model_cli show [-h] --dir DIR [--all]
+[--tag_set TAG_SET] [--signature_def SIGNATURE_DEF_KEY]
+```
+
+For example, the following command shows all available
+MetaGraphDef tag-sets in the SavedModel:
+
+```
+$ saved_model_cli show --dir /tmp/saved_model_dir
+The given SavedModel contains the following tag-sets:
+serve
+serve, gpu
+```
+
+The following command shows all available `SignatureDef` keys in
+a `MetaGraphDef`:
+
+```
+$ saved_model_cli show --dir /tmp/saved_model_dir --tag_set serve
+The given SavedModel `MetaGraphDef` contains `SignatureDefs` with the
+following keys:
+SignatureDef key: "classify_x2_to_y3"
+SignatureDef key: "classify_x_to_y"
+SignatureDef key: "regress_x2_to_y3"
+SignatureDef key: "regress_x_to_y"
+SignatureDef key: "regress_x_to_y2"
+SignatureDef key: "serving_default"
+```
+
+If a `MetaGraphDef` has *multiple* tags in the tag-set, you must specify
+all tags, each tag separated by a comma. For example:
+
+```none
+$ saved_model_cli show --dir /tmp/saved_model_dir --tag_set serve,gpu
+```
+
+To show all inputs and outputs TensorInfo for a specific `SignatureDef`, pass in
+the `SignatureDef` key to `signature_def` option. This is very useful when you
+want to know the tensor key value, dtype and shape of the input tensors for
+executing the computation graph later. For example:
+
+```
+$ saved_model_cli show --dir \
+/tmp/saved_model_dir --tag_set serve --signature_def serving_default
+The given SavedModel SignatureDef contains the following input(s):
+  inputs['x'] tensor_info:
+      dtype: DT_FLOAT
+      shape: (-1, 1)
+      name: x:0
+The given SavedModel SignatureDef contains the following output(s):
+  outputs['y'] tensor_info:
+      dtype: DT_FLOAT
+      shape: (-1, 1)
+      name: y:0
+Method name is: tensorflow/serving/predict
+```
+
+To show all available information in the SavedModel, use the `--all` option.
+For example:
+
+```none
+$ saved_model_cli show --dir /tmp/saved_model_dir --all
+MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:
+
+signature_def['classify_x2_to_y3']:
+  The given SavedModel SignatureDef contains the following input(s):
+    inputs['inputs'] tensor_info:
+        dtype: DT_FLOAT
+        shape: (-1, 1)
+        name: x2:0
+  The given SavedModel SignatureDef contains the following output(s):
+    outputs['scores'] tensor_info:
+        dtype: DT_FLOAT
+        shape: (-1, 1)
+        name: y3:0
+  Method name is: tensorflow/serving/classify
+
+...
+
+signature_def['serving_default']:
+  The given SavedModel SignatureDef contains the following input(s):
+    inputs['x'] tensor_info:
+        dtype: DT_FLOAT
+        shape: (-1, 1)
+        name: x:0
+  The given SavedModel SignatureDef contains the following output(s):
+    outputs['y'] tensor_info:
+        dtype: DT_FLOAT
+        shape: (-1, 1)
+        name: y:0
+  Method name is: tensorflow/serving/predict
+```
+
+
+### `run` command
+
+Invoke the `run` command to run a graph computation, passing
+inputs and then displaying (and optionally saving) the outputs.
+Here's the syntax:
+
+```
+usage: saved_model_cli run [-h] --dir DIR --tag_set TAG_SET --signature_def
+                           SIGNATURE_DEF_KEY [--inputs INPUTS]
+                           [--input_exprs INPUT_EXPRS] [--outdir OUTDIR]
+                           [--overwrite] [--tf_debug]
+```
+
+The `run` command provides the following two ways to pass inputs to the model:
+
+* `--inputs` option enables you to pass numpy ndarray in files.
+* `--input_exprs` option enables you to pass Python expressions.
+* `--input_examples` option enables you to pass `tf.train.Example`.
+
+
+#### `--inputs`
+
+To pass input data in files, specify the `--inputs` option, which takes the
+following general format:
+
+```bsh
+--inputs <INPUTS>
+```
+
+where *INPUTS* is either of the following formats:
+
+*  `<input_key>=<filename>`
+*  `<input_key>=<filename>[<variable_name>]`
+
+You may pass multiple *INPUTS*. If you do pass multiple inputs, use a semicolon
+to separate each of the *INPUTS*.
+
+`saved_model_cli` uses `numpy.load` to load the *filename*.
+The *filename* may be in any of the following formats:
+
+*  `.npy`
+*  `.npz`
+*  pickle format
+
+A `.npy` file always contains a numpy ndarray. Therefore, when loading from
+a `.npy` file, the content will be directly assigned to the specified input
+tensor. If you specify a *variable_name* with that `.npy` file, the
+*variable_name* will be ignored and a warning will be issued.
+
+When loading from a `.npz` (zip) file, you may optionally specify a
+*variable_name* to identify the variable within the zip file to load for
+the input tensor key.  If you don't specify a *variable_name*, the SavedModel
+CLI will check that only one file is included in the zip file and load it
+for the specified input tensor key.
+
+When loading from a pickle file, if no `variable_name` is specified in the
+square brackets, whatever that is inside the pickle file will be passed to the
+specified input tensor key. Otherwise, the SavedModel CLI will assume a
+dictionary is stored in the pickle file and the value corresponding to
+the *variable_name* will be used.
+
+
+#### `--inputs_exprs`
+
+To pass inputs through Python expressions, specify the `--input_exprs` option.
+This can be useful for when you don't have data
+files lying around, but still want to sanity check the model with some simple
+inputs that match the dtype and shape of the model's `SignatureDef`s.
+For example:
+
+```bsh
+`<input_key>=[[1],[2],[3]]`
+```
+
+In addition to Python expressions, you may also pass numpy functions. For
+example:
+
+```bsh
+`<input_key>=np.ones((32,32,3))`
+```
+
+(Note that the `numpy` module is already available to you as `np`.)
+
+
+#### `--inputs_examples`
+
+To pass `tf.train.Example` as inputs, specify the `--input_examples` option.
+For each input key, it takes a list of dictionary, where each dictionary is an
+instance of `tf.train.Example`. The dictionary keys are the features and the
+values are the value lists for each feature.
+For example:
+
+```bsh
+`<input_key>=[{"age":[22,24],"education":["BS","MS"]}]`
+```
+
+#### Save output
+
+By default, the SavedModel CLI writes output to stdout. If a directory is
+passed to `--outdir` option, the outputs will be saved as npy files named after
+output tensor keys under the given directory.
+
+Use `--overwrite` to overwrite existing output files.
+
+
+#### TensorFlow debugger (tfdbg) integration
+
+If `--tf_debug` option is set, the SavedModel CLI will use the
+TensorFlow Debugger (tfdbg) to watch the intermediate Tensors and runtime
+graphs or subgraphs while running the SavedModel.
+
+
+#### Full examples of `run`
+
+Given:
+
+*  Your model simply adds `x1` and `x2` to get output `y`.
+*  All tensors in the model have shape `(-1, 1)`.
+*  You have two `npy` files:
+   *  `/tmp/my_data1.npy`, which contains a numpy ndarray `[[1], [2], [3]]`.
+   *  `/tmp/my_data2.npy`, which contains another numpy
+      ndarray `[[0.5], [0.5], [0.5]]`.
+
+To run these two `npy` files through the model to get output `y`, issue
+the following command:
+
+```
+$ saved_model_cli run --dir /tmp/saved_model_dir --tag_set serve \
+--signature_def x1_x2_to_y --inputs x1=/tmp/my_data1.npy;x2=/tmp/my_data2.npy \
+--outdir /tmp/out
+Result for output key y:
+[[ 1.5]
+ [ 2.5]
+ [ 3.5]]
+```
+
+Let's change the preceding example slightly. This time, instead of two
+`.npy` files, you now have an `.npz` file and a pickle file. Furthermore,
+you want to overwrite any existing output file.  Here's the command:
+
+```
+$ saved_model_cli run --dir /tmp/saved_model_dir --tag_set serve \
+--signature_def x1_x2_to_y \
+--inputs x1=/tmp/my_data1.npz[x];x2=/tmp/my_data2.pkl --outdir /tmp/out \
+--overwrite
+Result for output key y:
+[[ 1.5]
+ [ 2.5]
+ [ 3.5]]
+```
+
+You may specify python expression instead of an input file. For example,
+the following command replaces input `x2` with a Python expression:
+
+```
+$ saved_model_cli run --dir /tmp/saved_model_dir --tag_set serve \
+--signature_def x1_x2_to_y --inputs x1=/tmp/my_data1.npz[x] \
+--input_exprs 'x2=np.ones((3,1))'
+Result for output key y:
+[[ 2]
+ [ 3]
+ [ 4]]
+```
+
+To run the model with the TensorFlow Debugger on, issue the
+following command:
+
+```
+$ saved_model_cli run --dir /tmp/saved_model_dir --tag_set serve \
+--signature_def serving_default --inputs x=/tmp/data.npz[x] --tf_debug
+```
+
+
+<a name="structure"></a>
+## Structure of a SavedModel directory
+
+When you save a model in SavedModel format, TensorFlow creates
+a SavedModel directory consisting of the following subdirectories
+and files:
+
+```bsh
+assets/
+assets.extra/
+variables/
+    variables.data-?????-of-?????
+    variables.index
+saved_model.pb|saved_model.pbtxt
+```
+
+where:
+
+* `assets` is a subfolder containing auxiliary (external) files,
+  such as vocabularies.  Assets are copied to the SavedModel location
+  and can be read when loading a specific `MetaGraphDef`.
+* `assets.extra` is a subfolder where higher-level libraries and users can
+  add their own assets that co-exist with the model, but are not loaded by
+  the graph.  This subfolder is not managed by the SavedModel libraries.
+* `variables` is a subfolder that includes output from
+  `tf.train.Saver`.
+* `saved_model.pb` or `saved_model.pbtxt` is the SavedModel protocol buffer.
+  It includes the graph definitions as `MetaGraphDef` protocol buffers.
+
+A single SavedModel can represent multiple graphs.  In this case, all the
+graphs in the SavedModel share a *single* set of checkpoints (variables)
+and assets. For example, the following diagram shows one SavedModel
+containing three `MetaGraphDef`s, all three of which share the same set
+of checkpoints and assets:
+
+![SavedModel represents checkpoints, assets, and one or more MetaGraphDefs](../images/SavedModel.svg)
+
+Each graph is associated with a specific set of tags, which enables
+identification during a load or restore operation.
diff --git a/tensorflow/docs_src/guide/summaries_and_tensorboard.md b/tensorflow/docs_src/guide/summaries_and_tensorboard.md
new file mode 100644
index 0000000000..fadfa03e78
--- /dev/null
+++ b/tensorflow/docs_src/guide/summaries_and_tensorboard.md
@@ -0,0 +1,225 @@
+# TensorBoard: Visualizing Learning
+
+The computations you'll use TensorFlow for - like training a massive
+deep neural network - can be complex and confusing. To make it easier to
+understand, debug, and optimize TensorFlow programs, we've included a suite of
+visualization tools called TensorBoard. You can use TensorBoard to visualize
+your TensorFlow graph, plot quantitative metrics about the execution of your
+graph, and show additional data like images that pass through it. When
+TensorBoard is fully configured, it looks like this:
+
+![MNIST TensorBoard](https://www.tensorflow.org/images/mnist_tensorboard.png "MNIST TensorBoard")
+
+<div class="video-wrapper">
+  <iframe class="devsite-embedded-youtube-video" data-video-id="eBbEDRsCmv4"
+          data-autohide="1" data-showinfo="0" frameborder="0" allowfullscreen>
+  </iframe>
+</div>
+
+This 30-minute tutorial is intended to get you started with simple TensorBoard
+usage. It assumes a basic understanding of TensorFlow.
+
+There are other resources available as well! The [TensorBoard GitHub](https://github.com/tensorflow/tensorboard)
+has a lot more information on using individual dashboards within TensorBoard
+including tips & tricks and debugging information.
+
+## Setup
+
+[Install TensorFlow](https://www.tensorflow.org/install/). Installing TensorFlow
+via pip should also automatically install TensorBoard.
+
+## Serializing the data
+
+TensorBoard operates by reading TensorFlow events files, which contain summary
+data that you can generate when running TensorFlow. Here's the general
+lifecycle for summary data within TensorBoard.
+
+First, create the TensorFlow graph that you'd like to collect summary
+data from, and decide which nodes you would like to annotate with
+@{$python/summary$summary operations}.
+
+For example, suppose you are training a convolutional neural network for
+recognizing MNIST digits. You'd like to record how the learning rate
+varies over time, and how the objective function is changing. Collect these by
+attaching @{tf.summary.scalar} ops
+to the nodes that output the learning rate and loss respectively. Then, give
+each `scalar_summary` a meaningful `tag`, like `'learning rate'` or `'loss
+function'`.
+
+Perhaps you'd also like to visualize the distributions of activations coming
+off a particular layer, or the distribution of gradients or weights. Collect
+this data by attaching
+@{tf.summary.histogram} ops to
+the gradient outputs and to the variable that holds your weights, respectively.
+
+For details on all of the summary operations available, check out the docs on
+@{$python/summary$summary operations}.
+
+Operations in TensorFlow don't do anything until you run them, or an op that
+depends on their output. And the summary nodes that we've just created are
+peripheral to your graph: none of the ops you are currently running depend on
+them. So, to generate summaries, we need to run all of these summary nodes.
+Managing them by hand would be tedious, so use
+@{tf.summary.merge_all}
+to combine them into a single op that generates all the summary data.
+
+Then, you can just run the merged summary op, which will generate a serialized
+`Summary` protobuf object with all of your summary data at a given step.
+Finally, to write this summary data to disk, pass the summary protobuf to a
+@{tf.summary.FileWriter}.
+
+The `FileWriter` takes a logdir in its constructor - this logdir is quite
+important, it's the directory where all of the events will be written out.
+Also, the `FileWriter` can optionally take a `Graph` in its constructor.
+If it receives a `Graph` object, then TensorBoard will visualize your graph
+along with tensor shape information. This will give you a much better sense of
+what flows through the graph: see
+@{$graph_viz#tensor-shape-information$Tensor shape information}.
+
+Now that you've modified your graph and have a `FileWriter`, you're ready to
+start running your network! If you want, you could run the merged summary op
+every single step, and record a ton of training data. That's likely to be more
+data than you need, though. Instead, consider running the merged summary op
+every `n` steps.
+
+The code example below is a modification of the
+[simple MNIST tutorial](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/tutorials/mnist/mnist.py),
+in which we have added some summary ops, and run them every ten steps. If you
+run this and then launch `tensorboard --logdir=/tmp/tensorflow/mnist`, you'll be able
+to visualize statistics, such as how the weights or accuracy varied during
+training. The code below is an excerpt; full source is
+[here](https://www.tensorflow.org/code/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py).
+
+```python
+def variable_summaries(var):
+  """Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
+  with tf.name_scope('summaries'):
+    mean = tf.reduce_mean(var)
+    tf.summary.scalar('mean', mean)
+    with tf.name_scope('stddev'):
+      stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
+    tf.summary.scalar('stddev', stddev)
+    tf.summary.scalar('max', tf.reduce_max(var))
+    tf.summary.scalar('min', tf.reduce_min(var))
+    tf.summary.histogram('histogram', var)
+
+def nn_layer(input_tensor, input_dim, output_dim, layer_name, act=tf.nn.relu):
+  """Reusable code for making a simple neural net layer.
+
+  It does a matrix multiply, bias add, and then uses relu to nonlinearize.
+  It also sets up name scoping so that the resultant graph is easy to read,
+  and adds a number of summary ops.
+  """
+  # Adding a name scope ensures logical grouping of the layers in the graph.
+  with tf.name_scope(layer_name):
+    # This Variable will hold the state of the weights for the layer
+    with tf.name_scope('weights'):
+      weights = weight_variable([input_dim, output_dim])
+      variable_summaries(weights)
+    with tf.name_scope('biases'):
+      biases = bias_variable([output_dim])
+      variable_summaries(biases)
+    with tf.name_scope('Wx_plus_b'):
+      preactivate = tf.matmul(input_tensor, weights) + biases
+      tf.summary.histogram('pre_activations', preactivate)
+    activations = act(preactivate, name='activation')
+    tf.summary.histogram('activations', activations)
+    return activations
+
+hidden1 = nn_layer(x, 784, 500, 'layer1')
+
+with tf.name_scope('dropout'):
+  keep_prob = tf.placeholder(tf.float32)
+  tf.summary.scalar('dropout_keep_probability', keep_prob)
+  dropped = tf.nn.dropout(hidden1, keep_prob)
+
+# Do not apply softmax activation yet, see below.
+y = nn_layer(dropped, 500, 10, 'layer2', act=tf.identity)
+
+with tf.name_scope('cross_entropy'):
+  # The raw formulation of cross-entropy,
+  #
+  # tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.softmax(y)),
+  #                               reduction_indices=[1]))
+  #
+  # can be numerically unstable.
+  #
+  # So here we use tf.losses.sparse_softmax_cross_entropy on the
+  # raw logit outputs of the nn_layer above.
+  with tf.name_scope('total'):
+    cross_entropy = tf.losses.sparse_softmax_cross_entropy(labels=y_, logits=y)
+tf.summary.scalar('cross_entropy', cross_entropy)
+
+with tf.name_scope('train'):
+  train_step = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(
+      cross_entropy)
+
+with tf.name_scope('accuracy'):
+  with tf.name_scope('correct_prediction'):
+    correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
+  with tf.name_scope('accuracy'):
+    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
+tf.summary.scalar('accuracy', accuracy)
+
+# Merge all the summaries and write them out to /tmp/mnist_logs (by default)
+merged = tf.summary.merge_all()
+train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train',
+                                      sess.graph)
+test_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/test')
+tf.global_variables_initializer().run()
+```
+
+After we've initialized the `FileWriters`, we have to add summaries to the
+`FileWriters` as we train and test the model.
+
+```python
+# Train the model, and also write summaries.
+# Every 10th step, measure test-set accuracy, and write test summaries
+# All other steps, run train_step on training data, & add training summaries
+
+def feed_dict(train):
+  """Make a TensorFlow feed_dict: maps data onto Tensor placeholders."""
+  if train or FLAGS.fake_data:
+    xs, ys = mnist.train.next_batch(100, fake_data=FLAGS.fake_data)
+    k = FLAGS.dropout
+  else:
+    xs, ys = mnist.test.images, mnist.test.labels
+    k = 1.0
+  return {x: xs, y_: ys, keep_prob: k}
+
+for i in range(FLAGS.max_steps):
+  if i % 10 == 0:  # Record summaries and test-set accuracy
+    summary, acc = sess.run([merged, accuracy], feed_dict=feed_dict(False))
+    test_writer.add_summary(summary, i)
+    print('Accuracy at step %s: %s' % (i, acc))
+  else:  # Record train set summaries, and train
+    summary, _ = sess.run([merged, train_step], feed_dict=feed_dict(True))
+    train_writer.add_summary(summary, i)
+```
+
+You're now all set to visualize this data using TensorBoard.
+
+
+## Launching TensorBoard
+
+To run TensorBoard, use the following command (alternatively `python -m
+tensorboard.main`)
+
+```bash
+tensorboard --logdir=path/to/log-directory
+```
+
+where `logdir` points to the directory where the `FileWriter` serialized its
+data.  If this `logdir` directory contains subdirectories which contain
+serialized data from separate runs, then TensorBoard will visualize the data
+from all of those runs. Once TensorBoard is running, navigate your web browser
+to `localhost:6006` to view the TensorBoard.
+
+When looking at TensorBoard, you will see the navigation tabs in the top right
+corner. Each tab represents a set of serialized data that can be visualized.
+
+For in depth information on how to use the *graph* tab to visualize your graph,
+see @{$graph_viz$TensorBoard: Graph Visualization}.
+
+For more usage information on TensorBoard in general, see the
+[TensorBoard GitHub](https://github.com/tensorflow/tensorboard).
diff --git a/tensorflow/docs_src/guide/tensorboard_histograms.md b/tensorflow/docs_src/guide/tensorboard_histograms.md
new file mode 100644
index 0000000000..918deda190
--- /dev/null
+++ b/tensorflow/docs_src/guide/tensorboard_histograms.md
@@ -0,0 +1,245 @@
+# TensorBoard Histogram Dashboard
+
+The TensorBoard Histogram Dashboard displays how the distribution of some
+`Tensor` in your TensorFlow graph has changed over time. It does this by showing
+many histograms visualizations of your tensor at different points in time.
+
+## A Basic Example
+
+Let's start with a simple case: a normally-distributed variable, where the mean
+shifts over time.
+TensorFlow has an op
+[`tf.random_normal`](https://www.tensorflow.org/api_docs/python/tf/random_normal)
+which is perfect for this purpose. As is usually the case with TensorBoard, we
+will ingest data using a summary op; in this case,
+['tf.summary.histogram'](https://www.tensorflow.org/api_docs/python/tf/summary/histogram).
+For a primer on how summaries work, please see the general
+[TensorBoard tutorial](https://www.tensorflow.org/get_started/summaries_and_tensorboard).
+
+Here is a code snippet that will generate some histogram summaries containing
+normally distributed data, where the mean of the distribution increases over
+time.
+
+```python
+import tensorflow as tf
+
+k = tf.placeholder(tf.float32)
+
+# Make a normal distribution, with a shifting mean
+mean_moving_normal = tf.random_normal(shape=[1000], mean=(5*k), stddev=1)
+# Record that distribution into a histogram summary
+tf.summary.histogram("normal/moving_mean", mean_moving_normal)
+
+# Setup a session and summary writer
+sess = tf.Session()
+writer = tf.summary.FileWriter("/tmp/histogram_example")
+
+summaries = tf.summary.merge_all()
+
+# Setup a loop and write the summaries to disk
+N = 400
+for step in range(N):
+  k_val = step/float(N)
+  summ = sess.run(summaries, feed_dict={k: k_val})
+  writer.add_summary(summ, global_step=step)
+```
+
+Once that code runs, we can load the data into TensorBoard via the command line:
+
+
+```sh
+tensorboard --logdir=/tmp/histogram_example
+```
+
+Once TensorBoard is running, load it in Chrome or Firefox and navigate to the
+Histogram Dashboard. Then we can see a histogram visualization for our normally
+distributed data.
+
+![](https://www.tensorflow.org/images/tensorboard/histogram_dashboard/1_moving_mean.png)
+
+`tf.summary.histogram` takes an arbitrarily sized and shaped Tensor, and
+compresses it into a histogram data structure consisting of many bins with
+widths and counts. For example, let's say we want to organize the numbers
+`[0.5, 1.1, 1.3, 2.2, 2.9, 2.99]` into bins. We could make three bins:
+* a bin
+containing everything from 0 to 1 (it would contain one element, 0.5),
+* a bin
+containing everything from 1-2 (it would contain two elements, 1.1 and 1.3),
+* a bin containing everything from 2-3 (it would contain three elements: 2.2,
+2.9 and 2.99).
+
+TensorFlow uses a similar approach to create bins, but unlike in our example, it
+doesn't create integer bins. For large, sparse datasets, that might result in
+many thousands of bins.
+Instead, [the bins are exponentially distributed, with many bins close to 0 and
+comparatively few bins for very large numbers.](https://github.com/tensorflow/tensorflow/blob/c8b59c046895fa5b6d79f73e0b5817330fcfbfc1/tensorflow/core/lib/histogram/histogram.cc#L28)
+However, visualizing exponentially-distributed bins is tricky; if height is used
+to encode count, then wider bins take more space, even if they have the same
+number of elements. Conversely, encoding count in the area makes height
+comparisons impossible. Instead, the histograms [resample the data](https://github.com/tensorflow/tensorflow/blob/17c47804b86e340203d451125a721310033710f1/tensorflow/tensorboard/components/tf_backend/backend.ts#L400)
+into uniform bins. This can lead to unfortunate artifacts in some cases.
+
+Each slice in the histogram visualizer displays a single histogram.
+The slices are organized by step;
+older slices (e.g. step 0) are further "back" and darker, while newer slices
+(e.g. step 400) are close to the foreground, and lighter in color.
+The y-axis on the right shows the step number.
+
+You can mouse over the histogram to see tooltips with some more detailed
+information. For example, in the following image we can see that the histogram
+at timestep 176 has a bin centered at 2.25 with 177 elements in that bin.
+
+![](https://www.tensorflow.org/images/tensorboard/histogram_dashboard/2_moving_mean_tooltip.png)
+
+Also, you may note that the histogram slices are not always evenly spaced in
+step count or time. This is because TensorBoard uses
+[reservoir sampling](https://en.wikipedia.org/wiki/Reservoir_sampling) to keep a
+subset of all the histograms, to save on memory. Reservoir sampling guarantees
+that every sample has an equal likelihood of being included, but because it is
+a randomized algorithm, the samples chosen don't occur at even steps.
+
+## Overlay Mode
+
+There is a control on the left of the dashboard that allows you to toggle the
+histogram mode from "offset" to "overlay":
+
+![](https://www.tensorflow.org/images/tensorboard/histogram_dashboard/3_overlay_offset.png)
+
+In "offset" mode, the visualization rotates 45 degrees, so that the individual
+histogram slices are no longer spread out in time, but instead are all plotted
+on the same y-axis.
+
+![](https://www.tensorflow.org/images/tensorboard/histogram_dashboard/4_overlay.png)
+Now, each slice is a separate line on the chart, and the y-axis shows the item
+count within each bucket. Darker lines are older, earlier steps, and lighter
+lines are more recent, later steps. Once again, you can mouse over the chart to
+see some additional information.
+
+![](https://www.tensorflow.org/images/tensorboard/histogram_dashboard/5_overlay_tooltips.png)
+
+In general, the overlay visualization is useful if you want to directly compare
+the counts of different histograms.
+
+## Multimodal Distributions
+
+The Histogram Dashboard is great for visualizing multimodal
+distributions. Let's construct a simple bimodal distribution by concatenating
+the outputs from two different normal distributions. The code will look like
+this:
+
+```python
+import tensorflow as tf
+
+k = tf.placeholder(tf.float32)
+
+# Make a normal distribution, with a shifting mean
+mean_moving_normal = tf.random_normal(shape=[1000], mean=(5*k), stddev=1)
+# Record that distribution into a histogram summary
+tf.summary.histogram("normal/moving_mean", mean_moving_normal)
+
+# Make a normal distribution with shrinking variance
+variance_shrinking_normal = tf.random_normal(shape=[1000], mean=0, stddev=1-(k))
+# Record that distribution too
+tf.summary.histogram("normal/shrinking_variance", variance_shrinking_normal)
+
+# Let's combine both of those distributions into one dataset
+normal_combined = tf.concat([mean_moving_normal, variance_shrinking_normal], 0)
+# We add another histogram summary to record the combined distribution
+tf.summary.histogram("normal/bimodal", normal_combined)
+
+summaries = tf.summary.merge_all()
+
+# Setup a session and summary writer
+sess = tf.Session()
+writer = tf.summary.FileWriter("/tmp/histogram_example")
+
+# Setup a loop and write the summaries to disk
+N = 400
+for step in range(N):
+  k_val = step/float(N)
+  summ = sess.run(summaries, feed_dict={k: k_val})
+  writer.add_summary(summ, global_step=step)
+```
+
+You already remember our "moving mean" normal distribution from the example
+above. Now we also have a "shrinking variance" distribution. Side-by-side, they
+look like this:
+![](https://www.tensorflow.org/images/tensorboard/histogram_dashboard/6_two_distributions.png)
+
+When we concatenate them, we get a chart that clearly reveals the divergent,
+bimodal structure:
+![](https://www.tensorflow.org/images/tensorboard/histogram_dashboard/7_bimodal.png)
+
+## Some more distributions
+
+Just for fun, let's generate and visualize a few more distributions, and then
+combine them all into one chart. Here's the code we'll use:
+
+```python
+import tensorflow as tf
+
+k = tf.placeholder(tf.float32)
+
+# Make a normal distribution, with a shifting mean
+mean_moving_normal = tf.random_normal(shape=[1000], mean=(5*k), stddev=1)
+# Record that distribution into a histogram summary
+tf.summary.histogram("normal/moving_mean", mean_moving_normal)
+
+# Make a normal distribution with shrinking variance
+variance_shrinking_normal = tf.random_normal(shape=[1000], mean=0, stddev=1-(k))
+# Record that distribution too
+tf.summary.histogram("normal/shrinking_variance", variance_shrinking_normal)
+
+# Let's combine both of those distributions into one dataset
+normal_combined = tf.concat([mean_moving_normal, variance_shrinking_normal], 0)
+# We add another histogram summary to record the combined distribution
+tf.summary.histogram("normal/bimodal", normal_combined)
+
+# Add a gamma distribution
+gamma = tf.random_gamma(shape=[1000], alpha=k)
+tf.summary.histogram("gamma", gamma)
+
+# And a poisson distribution
+poisson = tf.random_poisson(shape=[1000], lam=k)
+tf.summary.histogram("poisson", poisson)
+
+# And a uniform distribution
+uniform = tf.random_uniform(shape=[1000], maxval=k*10)
+tf.summary.histogram("uniform", uniform)
+
+# Finally, combine everything together!
+all_distributions = [mean_moving_normal, variance_shrinking_normal,
+                     gamma, poisson, uniform]
+all_combined = tf.concat(all_distributions, 0)
+tf.summary.histogram("all_combined", all_combined)
+
+summaries = tf.summary.merge_all()
+
+# Setup a session and summary writer
+sess = tf.Session()
+writer = tf.summary.FileWriter("/tmp/histogram_example")
+
+# Setup a loop and write the summaries to disk
+N = 400
+for step in range(N):
+  k_val = step/float(N)
+  summ = sess.run(summaries, feed_dict={k: k_val})
+  writer.add_summary(summ, global_step=step)
+```
+### Gamma Distribution
+![](https://www.tensorflow.org/images/tensorboard/histogram_dashboard/8_gamma.png)
+
+### Uniform Distribution
+![](https://www.tensorflow.org/images/tensorboard/histogram_dashboard/9_uniform.png)
+
+### Poisson Distribution
+![](https://www.tensorflow.org/images/tensorboard/histogram_dashboard/10_poisson.png)
+The poisson distribution is defined over the integers. So, all of the values
+being generated are perfect integers. The histogram compression moves the data
+into floating-point bins, causing the visualization to show little
+bumps over the integer values rather than perfect spikes.
+
+### All Together Now
+Finally, we can concatenate all of the data into one funny-looking curve.
+![](https://www.tensorflow.org/images/tensorboard/histogram_dashboard/11_all_combined.png)
+
diff --git a/tensorflow/docs_src/guide/tensors.md b/tensorflow/docs_src/guide/tensors.md
new file mode 100644
index 0000000000..7227260f1a
--- /dev/null
+++ b/tensorflow/docs_src/guide/tensors.md
@@ -0,0 +1,330 @@
+# Tensors
+
+TensorFlow, as the name indicates, is a framework to define and run computations
+involving tensors. A **tensor** is a generalization of vectors and matrices to
+potentially higher dimensions. Internally, TensorFlow represents tensors as
+n-dimensional arrays of base datatypes.
+
+When writing a TensorFlow program, the main object you manipulate and pass
+around is the `tf.Tensor`. A `tf.Tensor` object represents a partially defined
+computation that will eventually produce a value. TensorFlow programs work by
+first building a graph of `tf.Tensor` objects, detailing how each tensor is
+computed based on the other available tensors and then by running parts of this
+graph to achieve the desired results.
+
+A `tf.Tensor` has the following properties:
+
+ * a data type (`float32`, `int32`, or `string`, for example)
+ * a shape
+
+
+Each element in the Tensor has the same data type, and the data type is always
+known. The shape (that is, the number of dimensions it has and the size of each
+dimension) might be only partially known. Most operations produce tensors of
+fully-known shapes if the shapes of their inputs are also fully known, but in
+some cases it's only possible to find the shape of a tensor at graph execution
+time.
+
+Some types of tensors are special, and these will be covered in other
+units of the TensorFlow guide. The main ones are:
+
+  * `tf.Variable`
+  * `tf.constant`
+  * `tf.placeholder`
+  * `tf.SparseTensor`
+
+With the exception of `tf.Variable`, the value of a tensor is immutable, which
+means that in the context of a single execution tensors only have a single
+value. However, evaluating the same tensor twice can return different values;
+for example that tensor can be the result of reading data from disk, or
+generating a random number.
+
+## Rank
+
+The **rank** of a `tf.Tensor` object is its number of dimensions. Synonyms for
+rank include **order** or **degree** or **n-dimension**.
+Note that rank in TensorFlow is not the same as matrix rank in mathematics.
+As the following table shows, each rank in TensorFlow corresponds to a
+different mathematical entity:
+
+Rank | Math entity
+--- | ---
+0 | Scalar (magnitude only)
+1 | Vector (magnitude and direction)
+2 | Matrix (table of numbers)
+3 | 3-Tensor (cube of numbers)
+n | n-Tensor (you get the idea)
+
+
+### Rank 0
+
+The following snippet demonstrates creating a few rank 0 variables:
+
+```python
+mammal = tf.Variable("Elephant", tf.string)
+ignition = tf.Variable(451, tf.int16)
+floating = tf.Variable(3.14159265359, tf.float64)
+its_complicated = tf.Variable(12.3 - 4.85j, tf.complex64)
+```
+
+Note: A string is treated as a single item in TensorFlow, not as a sequence of
+characters. It is possible to have scalar strings, vectors of strings, etc.
+
+### Rank 1
+
+To create a rank 1 `tf.Tensor` object, you can pass a list of items as the
+initial value. For example:
+
+```python
+mystr = tf.Variable(["Hello"], tf.string)
+cool_numbers  = tf.Variable([3.14159, 2.71828], tf.float32)
+first_primes = tf.Variable([2, 3, 5, 7, 11], tf.int32)
+its_very_complicated = tf.Variable([12.3 - 4.85j, 7.5 - 6.23j], tf.complex64)
+```
+
+
+### Higher ranks
+
+A rank 2 `tf.Tensor` object consists of at least one row and at least
+one column:
+
+```python
+mymat = tf.Variable([[7],[11]], tf.int16)
+myxor = tf.Variable([[False, True],[True, False]], tf.bool)
+linear_squares = tf.Variable([[4], [9], [16], [25]], tf.int32)
+squarish_squares = tf.Variable([ [4, 9], [16, 25] ], tf.int32)
+rank_of_squares = tf.rank(squarish_squares)
+mymatC = tf.Variable([[7],[11]], tf.int32)
+```
+
+Higher-rank Tensors, similarly, consist of an n-dimensional array. For example,
+during image processing, many tensors of rank 4 are used, with dimensions
+corresponding to example-in-batch, image width, image height, and color channel.
+
+``` python
+my_image = tf.zeros([10, 299, 299, 3])  # batch x height x width x color
+```
+
+### Getting a `tf.Tensor` object's rank
+
+To determine the rank of a `tf.Tensor` object, call the `tf.rank` method.
+For example, the following method programmatically determines the rank
+of the `tf.Tensor` defined in the previous section:
+
+```python
+r = tf.rank(my_image)
+# After the graph runs, r will hold the value 4.
+```
+
+### Referring to `tf.Tensor` slices
+
+Since a `tf.Tensor` is an n-dimensional array of cells, to access a single cell
+in a `tf.Tensor` you need to specify n indices.
+
+For a rank 0 tensor (a scalar), no indices are necessary, since it is already a
+single number.
+
+For a rank 1 tensor (a vector), passing a single index allows you to access a
+number:
+
+```python
+my_scalar = my_vector[2]
+```
+
+Note that the index passed inside the `[]` can itself be a scalar `tf.Tensor`, if
+you want to dynamically choose an element from the vector.
+
+For tensors of rank 2 or higher, the situation is more interesting. For a
+`tf.Tensor` of rank 2, passing two numbers returns a scalar, as expected:
+
+
+```python
+my_scalar = my_matrix[1, 2]
+```
+
+
+Passing a single number, however, returns a subvector of a matrix, as follows:
+
+
+```python
+my_row_vector = my_matrix[2]
+my_column_vector = my_matrix[:, 3]
+```
+
+The `:` notation is python slicing syntax for "leave this dimension alone". This
+is useful in higher-rank Tensors, as it allows you to access its subvectors,
+submatrices, and even other subtensors.
+
+
+## Shape
+
+The **shape** of a tensor is the number of elements in each dimension.
+TensorFlow automatically infers shapes during graph construction. These inferred
+shapes might have known or unknown rank. If the rank is known, the sizes of each
+dimension might be known or unknown.
+
+The TensorFlow documentation uses three notational conventions to describe
+tensor dimensionality: rank, shape, and dimension number. The following table
+shows how these relate to one another:
+
+Rank | Shape | Dimension number | Example
+--- | --- | --- | ---
+0 | [] | 0-D | A 0-D tensor.  A scalar.
+1 | [D0] | 1-D | A 1-D tensor with shape [5].
+2 | [D0, D1] | 2-D | A 2-D tensor with shape [3, 4].
+3 | [D0, D1, D2] | 3-D | A 3-D tensor with shape [1, 4, 3].
+n | [D0, D1, ... Dn-1] | n-D | A tensor with shape [D0, D1, ... Dn-1].
+
+Shapes can be represented via Python lists / tuples of ints, or with the
+@{tf.TensorShape}.
+
+### Getting a `tf.Tensor` object's shape
+
+There are two ways of accessing the shape of a `tf.Tensor`. While building the
+graph, it is often useful to ask what is already known about a tensor's
+shape. This can be done by reading the `shape` property of a `tf.Tensor` object.
+This method returns a `TensorShape` object, which is a convenient way of
+representing partially-specified shapes (since, when building the graph, not all
+shapes will be fully known).
+
+It is also possible to get a `tf.Tensor` that will represent the fully-defined
+shape of another `tf.Tensor` at runtime. This is done by calling the `tf.shape`
+operation. This way, you can build a graph that manipulates the shapes of
+tensors by building other tensors that depend on the dynamic shape of the input
+`tf.Tensor`.
+
+For example, here is how to make a vector of zeros with the same size as the
+number of columns in a given matrix:
+
+``` python
+zeros = tf.zeros(my_matrix.shape[1])
+```
+
+### Changing the shape of a `tf.Tensor`
+
+The **number of elements** of a tensor is the product of the sizes of all its
+shapes. The number of elements of a scalar is always `1`. Since there are often
+many different shapes that have the same number of elements, it's often
+convenient to be able to change the shape of a `tf.Tensor`, keeping its elements
+fixed. This can be done with `tf.reshape`.
+
+The following examples demonstrate how to reshape tensors:
+
+```python
+rank_three_tensor = tf.ones([3, 4, 5])
+matrix = tf.reshape(rank_three_tensor, [6, 10])  # Reshape existing content into
+                                                 # a 6x10 matrix
+matrixB = tf.reshape(matrix, [3, -1])  #  Reshape existing content into a 3x20
+                                       # matrix. -1 tells reshape to calculate
+                                       # the size of this dimension.
+matrixAlt = tf.reshape(matrixB, [4, 3, -1])  # Reshape existing content into a
+                                             #4x3x5 tensor
+
+# Note that the number of elements of the reshaped Tensors has to match the
+# original number of elements. Therefore, the following example generates an
+# error because no possible value for the last dimension will match the number
+# of elements.
+yet_another = tf.reshape(matrixAlt, [13, 2, -1])  # ERROR!
+```
+
+## Data types
+
+In addition to dimensionality, Tensors have a data type. Refer to the
+`tf.DType` page for a complete list of the data types.
+
+It is not possible to have a `tf.Tensor` with more than one data type. It is
+possible, however, to serialize arbitrary data structures as `string`s and store
+those in `tf.Tensor`s.
+
+It is possible to cast `tf.Tensor`s from one datatype to another using
+`tf.cast`:
+
+``` python
+# Cast a constant integer tensor into floating point.
+float_tensor = tf.cast(tf.constant([1, 2, 3]), dtype=tf.float32)
+```
+
+To inspect a `tf.Tensor`'s data type use the `Tensor.dtype` property.
+
+When creating a `tf.Tensor` from a python object you may optionally specify the
+datatype. If you don't, TensorFlow chooses a datatype that can represent your
+data. TensorFlow converts Python integers to `tf.int32` and python floating
+point numbers to `tf.float32`. Otherwise TensorFlow uses the same rules numpy
+uses when converting to arrays.
+
+## Evaluating Tensors
+
+Once the computation graph has been built, you can run the computation that
+produces a particular `tf.Tensor` and fetch the value assigned to it. This is
+often useful for debugging as well as being required for much of TensorFlow to
+work.
+
+The simplest way to evaluate a Tensor is using the `Tensor.eval` method. For
+example:
+
+```python
+constant = tf.constant([1, 2, 3])
+tensor = constant * constant
+print(tensor.eval())
+```
+
+The `eval` method only works when a default `tf.Session` is active (see
+Graphs and Sessions for more information).
+
+`Tensor.eval` returns a numpy array with the same contents as the tensor.
+
+Sometimes it is not possible to evaluate a `tf.Tensor` with no context because
+its value might depend on dynamic information that is not available. For
+example, tensors that depend on `placeholder`s can't be evaluated without
+providing a value for the `placeholder`.
+
+``` python
+p = tf.placeholder(tf.float32)
+t = p + 1.0
+t.eval()  # This will fail, since the placeholder did not get a value.
+t.eval(feed_dict={p:2.0})  # This will succeed because we're feeding a value
+                           # to the placeholder.
+```
+
+Note that it is possible to feed any `tf.Tensor`, not just placeholders.
+
+Other model constructs might make evaluating a `tf.Tensor`
+complicated. TensorFlow can't directly evaluate `tf.Tensor`s defined inside
+functions or inside control flow constructs. If a `tf.Tensor` depends on a value
+from a queue, evaluating the `tf.Tensor` will only work once something has been
+enqueued; otherwise, evaluating it will hang. When working with queues, remember
+to call `tf.train.start_queue_runners` before evaluating any `tf.Tensor`s.
+
+## Printing Tensors
+
+For debugging purposes you might want to print the value of a `tf.Tensor`. While
+ @{$debugger$tfdbg} provides advanced debugging support, TensorFlow also has an
+ operation to directly print the value of a `tf.Tensor`.
+
+Note that you rarely want to use the following pattern when printing a
+`tf.Tensor`:
+
+``` python
+t = <<some tensorflow operation>>
+print(t)  # This will print the symbolic tensor when the graph is being built.
+          # This tensor does not have a value in this context.
+```
+
+This code prints the `tf.Tensor` object (which represents deferred computation)
+and not its value. Instead, TensorFlow provides the `tf.Print` operation, which
+returns its first tensor argument unchanged while printing the set of
+`tf.Tensor`s it is passed as the second argument.
+
+To correctly use `tf.Print` its return value must be used. See the example below
+
+``` python
+t = <<some tensorflow operation>>
+tf.Print(t, [t])  # This does nothing
+t = tf.Print(t, [t])  # Here we are using the value returned by tf.Print
+result = t + 1  # Now when result is evaluated the value of `t` will be printed.
+```
+
+When you evaluate `result` you will evaluate everything `result` depends
+upon. Since `result` depends upon `t`, and evaluating `t` has the side effect of
+printing its input (the old value of `t`), `t` gets printed.
+
diff --git a/tensorflow/docs_src/guide/using_gpu.md b/tensorflow/docs_src/guide/using_gpu.md
new file mode 100644
index 0000000000..c429ca4750
--- /dev/null
+++ b/tensorflow/docs_src/guide/using_gpu.md
@@ -0,0 +1,215 @@
+# Using GPUs
+
+## Supported devices
+
+On a typical system, there are multiple computing devices. In TensorFlow, the
+supported device types are `CPU` and `GPU`. They are represented as `strings`.
+For example:
+
+*   `"/cpu:0"`: The CPU of your machine.
+*   `"/device:GPU:0"`: The GPU of your machine, if you have one.
+*   `"/device:GPU:1"`: The second GPU of your machine, etc.
+
+If a TensorFlow operation has both CPU and GPU implementations, the GPU devices
+will be given priority when the operation is assigned to a device. For example,
+`matmul` has both CPU and GPU kernels. On a system with devices `cpu:0` and
+`gpu:0`, `gpu:0` will be selected to run `matmul`.
+
+## Logging Device placement
+
+To find out which devices your operations and tensors are assigned to, create
+the session with `log_device_placement` configuration option set to `True`.
+
+```python
+# Creates a graph.
+a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a')
+b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b')
+c = tf.matmul(a, b)
+# Creates a session with log_device_placement set to True.
+sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
+# Runs the op.
+print(sess.run(c))
+```
+
+You should see the following output:
+
+```
+Device mapping:
+/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: Tesla K40c, pci bus
+id: 0000:05:00.0
+b: /job:localhost/replica:0/task:0/device:GPU:0
+a: /job:localhost/replica:0/task:0/device:GPU:0
+MatMul: /job:localhost/replica:0/task:0/device:GPU:0
+[[ 22.  28.]
+ [ 49.  64.]]
+
+```
+
+## Manual device placement
+
+If you would like a particular operation to run on a device of your choice
+instead of what's automatically selected for you, you can use `with tf.device`
+to create a device context such that all the operations within that context will
+have the same device assignment.
+
+```python
+# Creates a graph.
+with tf.device('/cpu:0'):
+  a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a')
+  b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b')
+c = tf.matmul(a, b)
+# Creates a session with log_device_placement set to True.
+sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
+# Runs the op.
+print(sess.run(c))
+```
+
+You will see that now `a` and `b` are assigned to `cpu:0`. Since a device was
+not explicitly specified for the `MatMul` operation, the TensorFlow runtime will
+choose one based on the operation and available devices (`gpu:0` in this
+example) and automatically copy tensors between devices if required.
+
+```
+Device mapping:
+/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: Tesla K40c, pci bus
+id: 0000:05:00.0
+b: /job:localhost/replica:0/task:0/cpu:0
+a: /job:localhost/replica:0/task:0/cpu:0
+MatMul: /job:localhost/replica:0/task:0/device:GPU:0
+[[ 22.  28.]
+ [ 49.  64.]]
+```
+
+## Allowing GPU memory growth
+
+By default, TensorFlow maps nearly all of the GPU memory of all GPUs (subject to
+[`CUDA_VISIBLE_DEVICES`](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars))
+visible to the process. This is done to more efficiently use the relatively
+precious GPU memory resources on the devices by reducing [memory
+fragmentation](https://en.wikipedia.org/wiki/Fragmentation_\(computing\)).
+
+In some cases it is desirable for the process to only allocate a subset of the
+available memory, or to only grow the memory usage as is needed by the process.
+TensorFlow provides two Config options on the Session to control this.
+
+The first is the `allow_growth` option, which attempts to allocate only as much
+GPU memory based on runtime allocations: it starts out allocating very little
+memory, and as Sessions get run and more GPU memory is needed, we extend the GPU
+memory region needed by the TensorFlow process. Note that we do not release
+memory, since that can lead to even worse memory fragmentation. To turn this
+option on, set the option in the ConfigProto by:
+
+```python
+config = tf.ConfigProto()
+config.gpu_options.allow_growth = True
+session = tf.Session(config=config, ...)
+```
+
+The second method is the `per_process_gpu_memory_fraction` option, which
+determines the fraction of the overall amount of memory that each visible GPU
+should be allocated. For example, you can tell TensorFlow to only allocate 40%
+of the total memory of each GPU by:
+
+```python
+config = tf.ConfigProto()
+config.gpu_options.per_process_gpu_memory_fraction = 0.4
+session = tf.Session(config=config, ...)
+```
+
+This is useful if you want to truly bound the amount of GPU memory available to
+the TensorFlow process.
+
+## Using a single GPU on a multi-GPU system
+
+If you have more than one GPU in your system, the GPU with the lowest ID will be
+selected by default. If you would like to run on a different GPU, you will need
+to specify the preference explicitly:
+
+```python
+# Creates a graph.
+with tf.device('/device:GPU:2'):
+  a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a')
+  b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b')
+  c = tf.matmul(a, b)
+# Creates a session with log_device_placement set to True.
+sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
+# Runs the op.
+print(sess.run(c))
+```
+
+If the device you have specified does not exist, you will get
+`InvalidArgumentError`:
+
+```
+InvalidArgumentError: Invalid argument: Cannot assign a device to node 'b':
+Could not satisfy explicit device specification '/device:GPU:2'
+   [[Node: b = Const[dtype=DT_FLOAT, value=Tensor<type: float shape: [3,2]
+   values: 1 2 3...>, _device="/device:GPU:2"]()]]
+```
+
+If you would like TensorFlow to automatically choose an existing and supported
+device to run the operations in case the specified one doesn't exist, you can
+set `allow_soft_placement` to `True` in the configuration option when creating
+the session.
+
+```python
+# Creates a graph.
+with tf.device('/device:GPU:2'):
+  a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a')
+  b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b')
+  c = tf.matmul(a, b)
+# Creates a session with allow_soft_placement and log_device_placement set
+# to True.
+sess = tf.Session(config=tf.ConfigProto(
+      allow_soft_placement=True, log_device_placement=True))
+# Runs the op.
+print(sess.run(c))
+```
+
+## Using multiple GPUs
+
+If you would like to run TensorFlow on multiple GPUs, you can construct your
+model in a multi-tower fashion where each tower is assigned to a different GPU.
+For example:
+
+``` python
+# Creates a graph.
+c = []
+for d in ['/device:GPU:2', '/device:GPU:3']:
+  with tf.device(d):
+    a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3])
+    b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2])
+    c.append(tf.matmul(a, b))
+with tf.device('/cpu:0'):
+  sum = tf.add_n(c)
+# Creates a session with log_device_placement set to True.
+sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
+# Runs the op.
+print(sess.run(sum))
+```
+
+You will see the following output.
+
+```
+Device mapping:
+/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: Tesla K20m, pci bus
+id: 0000:02:00.0
+/job:localhost/replica:0/task:0/device:GPU:1 -> device: 1, name: Tesla K20m, pci bus
+id: 0000:03:00.0
+/job:localhost/replica:0/task:0/device:GPU:2 -> device: 2, name: Tesla K20m, pci bus
+id: 0000:83:00.0
+/job:localhost/replica:0/task:0/device:GPU:3 -> device: 3, name: Tesla K20m, pci bus
+id: 0000:84:00.0
+Const_3: /job:localhost/replica:0/task:0/device:GPU:3
+Const_2: /job:localhost/replica:0/task:0/device:GPU:3
+MatMul_1: /job:localhost/replica:0/task:0/device:GPU:3
+Const_1: /job:localhost/replica:0/task:0/device:GPU:2
+Const: /job:localhost/replica:0/task:0/device:GPU:2
+MatMul: /job:localhost/replica:0/task:0/device:GPU:2
+AddN: /job:localhost/replica:0/task:0/cpu:0
+[[  44.   56.]
+ [  98.  128.]]
+```
+
+The @{$deep_cnn$cifar10 tutorial} is a good example
+demonstrating how to do training with multiple GPUs.
diff --git a/tensorflow/docs_src/guide/using_tpu.md b/tensorflow/docs_src/guide/using_tpu.md
new file mode 100644
index 0000000000..41d80d9d60
--- /dev/null
+++ b/tensorflow/docs_src/guide/using_tpu.md
@@ -0,0 +1,395 @@
+# Using TPUs
+
+This document walks through the principal TensorFlow APIs necessary to make
+effective use of a [Cloud TPU](https://cloud.google.com/tpu/), and highlights
+the differences between regular TensorFlow usage, and usage on a TPU.
+
+This doc is aimed at users who:
+
+* Are familiar with TensorFlow's `Estimator` and `Dataset` APIs
+* Have maybe [tried out a Cloud TPU](https://cloud.google.com/tpu/docs/quickstart)
+  using an existing model.
+* Have, perhaps, skimmed the code of an example TPU model
+  [[1]](https://github.com/tensorflow/models/blob/master/official/mnist/mnist_tpu.py)
+  [[2]](https://github.com/tensorflow/tpu/tree/master/models).
+* Are interested in porting an existing `Estimator` model to
+  run on Cloud TPUs
+
+## TPUEstimator
+
+@{tf.estimator.Estimator$Estimators} are TensorFlow's model-level abstraction.
+Standard `Estimators` can drive models on CPU and GPUs. You must use
+@{tf.contrib.tpu.TPUEstimator} to drive a model on TPUs.
+
+Refer to TensorFlow's Getting Started section for an introduction to the basics
+of using a @{$premade_estimators$pre-made `Estimator`}, and
+@{$custom_estimators$custom `Estimator`s}.
+
+The `TPUEstimator` class differs somewhat from the `Estimator` class.
+
+The simplest way to maintain a model that can be run both on CPU/GPU or on a
+Cloud TPU is to define the model's inference phase (from inputs to predictions)
+outside of the `model_fn`. Then maintain separate implementations of the
+`Estimator` setup and `model_fn`, both wrapping this inference step. For an
+example of this pattern compare the `mnist.py` and `mnist_tpu.py` implementation in
+[tensorflow/models](https://github.com/tensorflow/models/tree/master/official/mnist).
+
+### Running a `TPUEstimator` locally
+
+To create a standard `Estimator` you call the constructor, and pass it a
+`model_fn`, for example:
+
+```
+my_estimator = tf.estimator.Estimator(
+  model_fn=my_model_fn)
+```
+
+The changes required to use a @{tf.contrib.tpu.TPUEstimator} on your local
+machine are relatively minor. The constructor requires two additional arguments.
+You should set the `use_tpu` argument to `False`, and pass a
+@{tf.contrib.tpu.RunConfig} as the `config` argument, as shown below:
+
+``` python
+my_tpu_estimator = tf.contrib.tpu.TPUEstimator(
+    model_fn=my_model_fn,
+    config=tf.contrib.tpu.RunConfig()
+    use_tpu=False)
+```
+
+Just this simple change will allow you to run a `TPUEstimator` locally.
+The majority of example TPU models can be run in this local mode,
+by setting the command line flags as follows:
+
+
+```
+$> python mnist_tpu.py --use_tpu=false --master=''
+```
+
+Note: This `use_tpu=False` argument is useful for trying out the `TPUEstimator`
+API. It is not meant to be a complete TPU compatibility test. Successfully
+running a model locally in a `TPUEstimator` does not guarantee that it will
+work on a TPU.
+
+
+### Building a `tpu.RunConfig`
+
+While the default `RunConfig` is sufficient  for local training, these settings
+cannot be ignored in real usage.
+
+A more typical setup for a `RunConfig`, that can be switched to use a Cloud
+TPU, might be as follows:
+
+``` python
+import tempfile
+import subprocess
+
+class FLAGS(object):
+  use_tpu=False
+  tpu_name=None
+  # Use a local temporary path for the `model_dir`
+  model_dir = tempfile.mkdtemp()
+  # Number of training steps to run on the Cloud TPU before returning control.
+  iterations = 50
+  # A single Cloud TPU has 8 shards.
+  num_shards = 8
+
+if FLAGS.use_tpu:
+    my_project_name = subprocess.check_output([
+        'gcloud','config','get-value','project'])
+    my_zone = subprocess.check_output([
+        'gcloud','config','get-value','compute/zone'])
+    cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
+            tpu_names=[FLAGS.tpu_name],
+            zone=my_zone,
+            project=my_project)
+    master = tpu_cluster_resolver.get_master()
+else:
+    master = ''
+
+my_tpu_run_config = tf.contrib.tpu.RunConfig(
+    master=master,
+    evaluation_master=master,
+    model_dir=FLAGS.model_dir,
+    session_config=tf.ConfigProto(
+        allow_soft_placement=True, log_device_placement=True),
+    tpu_config=tf.contrib.tpu.TPUConfig(FLAGS.iterations,
+                                        FLAGS.num_shards),
+)
+```
+
+Then you must pass the @{tf.contrib.tpu.RunConfig} to the constructor:
+
+``` python
+my_tpu_estimator = tf.contrib.tpu.TPUEstimator(
+    model_fn=my_model_fn,
+    config = my_tpu_run_config,
+    use_tpu=FLAGS.use_tpu)
+```
+
+Typically the `FLAGS` would be set by command line arguments. To switch from
+training locally to training on a cloud TPU you would need to:
+
+* Set `FLAGS.use_tpu` to `True`
+* Set `FLAGS.tpu_name` so the `tf.contrib.cluster_resolver.TPUClusterResolver` can find it
+* Set `FLAGS.model_dir` to a Google Cloud Storage bucket url (`gs://`).
+
+
+## Optimizer
+
+When training on a cloud TPU you **must** wrap the optimizer in a
+@{tf.contrib.tpu.CrossShardOptimizer}, which uses an `allreduce` to aggregate
+gradients and broadcast the result to each shard (each TPU core).
+
+The `CrossShardOptimizer` is not compatible with local training. So, to have
+the same code run both locally and on a Cloud TPU, add lines like the following:
+
+``` python
+optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
+if FLAGS.use_tpu:
+  optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer)
+```
+
+If you prefer to avoid a global `FLAGS` variable in your model code, one
+approach is to set the optimizer as one of the `Estimator`'s params,
+as follows:
+
+``` python
+my_tpu_estimator = tf.contrib.tpu.TPUEstimator(
+    model_fn=my_model_fn,
+    config = my_tpu_run_config,
+    use_tpu=FLAGS.use_tpu,
+    params={'optimizer':optimizer})
+```
+
+## Model Function
+
+This section details the changes you must make to the model function
+(`model_fn()`) to make it `TPUEstimator` compatible.
+
+### Static shapes
+
+During regular usage TensorFlow attempts to determine the shapes of each
+`tf.Tensor` during graph construction. During execution any unknown shape
+dimensions are determined dynamically,
+see @{$guide/tensors#shape$Tensor Shapes} for more details.
+
+To run on Cloud TPUs TensorFlow models are compiled using @{$xla$XLA}.
+XLA uses a similar system for determining shapes at compile time. XLA requires
+that all tensor dimensions be statically defined at compile time. All shapes
+must evaluate to a constant, and not depend on external data, or stateful
+operations like variables or a random number generator.
+
+
+### Summaries
+
+Remove any use of `tf.summary` from your model.
+
+@{$summaries_and_tensorboard$TensorBoard summaries} are a great way see inside
+your model. A minimal set of basic summaries are automatically recorded by the
+`TPUEstimator`, to `event` files in the `model_dir`. Custom summaries, however,
+are currently unsupported when training on a Cloud TPU. So while the
+`TPUEstimator` will still run locally with summaries, it will fail if used on a
+TPU.
+
+### Metrics
+
+Build your evaluation metrics dictionary in a stand-alone `metric_fn`.
+
+<!-- TODO(markdaoust) link to guide/metrics when it exists -->
+
+Evaluation metrics are an essential part of training a model. These are fully
+supported on Cloud TPUs, but with a slightly different syntax.
+
+A standard @{tf.metrics} returns two tensors. The first returns the running
+average of the metric value, while the second updates the running average and
+returns the value for this batch:
+
+```
+running_average, current_batch = tf.metrics.accuracy(labels, predictions)
+```
+
+In a standard `Estimator` you create a dictionary of these pairs, and return it
+as part of the `EstimatorSpec`.
+
+```python
+my_metrics = {'accuracy': tf.metrics.accuracy(labels, predictions)}
+
+return tf.estimator.EstimatorSpec(
+  ...
+  eval_metric_ops=my_metrics
+)
+```
+
+In a `TPUEstimator` you instead pass a function (which returns a metrics
+dictionary) and a list of argument tensors, as shown below:
+
+```python
+def my_metric_fn(labels, predictions):
+   return {'accuracy': tf.metrics.accuracy(labels, predictions)}
+
+return tf.contrib.tpu.TPUEstimatorSpec(
+  ...
+  eval_metrics=(my_metric_fn, [labels, predictions])
+)
+```
+
+### Use `TPUEstimatorSpec`
+
+`TPUEstimatorSpec` do not support hooks, and require function wrappers for
+some fields.
+
+An `Estimator`'s `model_fn` must return an `EstimatorSpec`. An `EstimatorSpec`
+is a simple structure of named fields containing all the `tf.Tensors` of the
+model that the `Estimator` may need to interact with.
+
+`TPUEstimators` use a @{tf.contrib.tpu.TPUEstimatorSpec}. There are a few
+differences between it and a standard @{tf.estimator.EstimatorSpec}:
+
+
+*  The `eval_metric_ops` must be wrapped into a `metrics_fn`, this field is
+   renamed `eval_metrics` ([see above](#metrics)).
+*  The @{tf.train.SessionRunHook$hooks} are unsupported, so these fields are
+   omitted.
+*  The @{tf.train.Scaffold$`scaffold`}, if used, must also be wrapped in a
+   function. This field is renamed to `scaffold_fn`.
+
+`Scaffold` and `Hooks` are for advanced usage, and can typically be omitted.
+
+## Input functions
+
+Input functions work mainly unchanged as they run on the host computer, not the
+Cloud TPU itself. This section explains the two necessary adjustments.
+
+### Params argument
+
+<!-- TODO(markdaoust) link to input_fn doc when it exists -->
+
+The `input_fn` for a standard `Estimator` _can_ include a
+`params` argument; the `input_fn` for a `TPUEstimator` *must* include a
+`params` argument. This is necessary to allow the estimator to set the batch
+size for each replica of the input stream. So the minimum signature for an
+`input_fn` for a `TPUEstimator` is:
+
+```
+def my_input_fn(params):
+  pass
+```
+
+Where `params['batch-size']` will contain the batch size.
+
+### Static shapes and batch size
+
+The input pipeline generated by your `input_fn` is run on CPU. So it is mostly
+free from the strict static shape requirements imposed by the XLA/TPU environment.
+The one requirement is that the batches of data fed from your input pipeline to
+the TPU have a static shape, as determined by the standard TensorFlow shape
+inference algorithm. Intermediate tensors are free to have a dynamic shapes.
+If shape inference has failed, but the shape is known it is possible to
+impose the correct shape using `tf.set_shape()`. 
+
+In the example below the shape
+inference algorithm fails, but it is correctly using `set_shape`:
+
+```
+>>> x = tf.zeros(tf.constant([1,2,3])+1)
+>>> x.shape
+
+TensorShape([Dimension(None), Dimension(None), Dimension(None)])
+
+>>> x.set_shape([2,3,4])
+```
+
+In many cases the batch size is the only unknown dimension.
+
+A typical input pipeline, using `tf.data`, will usually produce batches of a
+fixed size. The last batch of a finite `Dataset`, however, is typically smaller,
+containing just the remaining elements. Since a `Dataset` does not know its own
+length or finiteness, the standard @{tf.data.Dataset.batch$`batch`} method
+cannot determine if all batches will have a fixed size batch on its own:
+
+```
+>>> params = {'batch_size':32}
+>>> ds = tf.data.Dataset.from_tensors([0, 1, 2])
+>>> ds = ds.repeat().batch(params['batch-size'])
+>>> ds
+
+<BatchDataset shapes: (?, 3), types: tf.int32>
+```
+
+The most straightforward fix is to
+@{tf.data.Dataset.apply$apply} @{tf.contrib.data.batch_and_drop_remainder}
+as follows:
+
+```
+>>> params = {'batch_size':32}
+>>> ds = tf.data.Dataset.from_tensors([0, 1, 2])
+>>> ds = ds.repeat().apply(
+...     tf.contrib.data.batch_and_drop_remainder(params['batch-size']))
+>>> ds
+
+ <_RestructuredDataset shapes: (32, 3), types: tf.int32>
+```
+
+The one downside to this approach is that, as the name implies, this batching
+method throws out any fractional batch at the end of the dataset. This is fine
+for an infinitely repeating dataset being used for training, but could be a
+problem if you want to train for an exact number of epochs.
+
+To do an exact 1-epoch of _evaluation_ you can work around this by manually
+padding the length of the batches, and setting the padding entries to have zero
+weight when creating your `tf.metrics`.
+
+## Datasets
+
+Efficient use of the `tf.data.Dataset` API is critical when using a Cloud
+TPU, as it is impossible to use the Cloud TPU's unless you can feed it data
+quickly enough. See @{$datasets_performance} for details on dataset performance.
+
+For all but the simplest experimentation (using
+@{tf.data.Dataset.from_tensor_slices} or other in-graph data) you will need to
+store all data files read by the `TPUEstimator`'s `Dataset` in Google Cloud
+Storage Buckets.
+
+<!--TODO(markdaoust): link to the `TFRecord` doc when it exists.-->
+
+For most use-cases, we recommend converting your data into `TFRecord`
+format and using a @{tf.data.TFRecordDataset} to read it. This, however, is not
+a hard requirement and you can use other dataset readers
+(`FixedLengthRecordDataset` or `TextLineDataset`) if you prefer.
+
+Small datasets can be loaded entirely into memory using
+@{tf.data.Dataset.cache}.
+
+Regardless of the data format used, it is strongly recommended that you
+@{$performance_guide#use_large_files$use large files}, on the order of
+100MB. This is especially important in this networked setting as the overhead
+of opening a file is significantly higher.
+
+It is also important, regardless of the type of reader used, to enable buffering
+using the `buffer_size` argument to the constructor. This argument is specified
+in bytes. A minimum of a few MB (`buffer_size=8*1024*1024`) is recommended so
+that data is available when needed.
+
+The TPU-demos repo includes
+[a script](https://github.com/tensorflow/tpu/blob/master/tools/datasets/imagenet_to_gcs.py)
+for downloading the imagenet dataset and converting it to an appropriate format.
+This together with the imagenet
+[models](https://github.com/tensorflow/tpu/tree/master/models)
+included in the repo demonstrate all of these best-practices.
+
+
+## What Next
+
+For details on how to actually set up and run a Cloud TPU see:
+
+ * [Google Cloud TPU Documentation](https://cloud.google.com/tpu/docs/)
+
+This document is by no means exhaustive. The best source of more detail on how
+to make a Cloud TPU compatible model are the example models published in:
+
+ * The [TPU Demos Repository.](https://github.com/tensorflow/tpu)
+
+For more information about tuning TensorFlow code for performance see:
+
+ * The @{$performance$Performance Section.}
+
diff --git a/tensorflow/docs_src/guide/variables.md b/tensorflow/docs_src/guide/variables.md
new file mode 100644
index 0000000000..cd8c4b5b9a
--- /dev/null
+++ b/tensorflow/docs_src/guide/variables.md
@@ -0,0 +1,319 @@
+# Variables
+
+A TensorFlow **variable** is the best way to represent shared, persistent state
+manipulated by your program.
+
+Variables are manipulated via the `tf.Variable` class. A `tf.Variable`
+represents a tensor whose value can be changed by running ops on it. Unlike
+`tf.Tensor` objects, a `tf.Variable` exists outside the context of a single
+`session.run` call.
+
+Internally, a `tf.Variable` stores a persistent tensor. Specific ops allow you
+to read and modify the values of this tensor. These modifications are visible
+across multiple `tf.Session`s, so multiple workers can see the same values for a
+`tf.Variable`.
+
+## Creating a Variable
+
+The best way to create a variable is to call the `tf.get_variable`
+function. This function requires you to specify the Variable's name. This name
+will be used by other replicas to access the same variable, as well as to name
+this variable's value when checkpointing and exporting models. `tf.get_variable`
+also allows you to reuse a previously created variable of the same name, making it
+easy to define models which reuse layers.
+
+To create a variable with `tf.get_variable`, simply provide the name and shape
+
+``` python
+my_variable = tf.get_variable("my_variable", [1, 2, 3])
+```
+
+This creates a variable named "my_variable" which is a three-dimensional tensor
+with shape `[1, 2, 3]`. This variable will, by default, have the `dtype`
+`tf.float32` and its initial value will be randomized via
+`tf.glorot_uniform_initializer`.
+
+You may optionally specify the `dtype` and initializer to `tf.get_variable`. For
+example:
+
+``` python
+my_int_variable = tf.get_variable("my_int_variable", [1, 2, 3], dtype=tf.int32,
+  initializer=tf.zeros_initializer)
+```
+
+TensorFlow provides many convenient initializers. Alternatively, you may
+initialize a `tf.Variable` to have the value of a `tf.Tensor`. For example:
+
+``` python
+other_variable = tf.get_variable("other_variable", dtype=tf.int32,
+  initializer=tf.constant([23, 42]))
+```
+
+Note that when the initializer is a `tf.Tensor` you should not specify the
+variable's shape, as the shape of the initializer tensor will be used.
+
+
+<a name="collections"></a>
+### Variable collections
+
+Because disconnected parts of a TensorFlow program might want to create
+variables, it is sometimes useful to have a single way to access all of
+them. For this reason TensorFlow provides **collections**, which are named lists
+of tensors or other objects, such as `tf.Variable` instances.
+
+By default every `tf.Variable` gets placed in the following two collections:
+
+ * `tf.GraphKeys.GLOBAL_VARIABLES` --- variables that can be shared across
+   multiple devices,
+ * `tf.GraphKeys.TRAINABLE_VARIABLES` --- variables for which TensorFlow will
+   calculate gradients.
+
+If you don't want a variable to be trainable, add it to the
+`tf.GraphKeys.LOCAL_VARIABLES` collection instead. For example, the following
+snippet demonstrates how to add a variable named `my_local` to this collection:
+
+``` python
+my_local = tf.get_variable("my_local", shape=(),
+collections=[tf.GraphKeys.LOCAL_VARIABLES])
+```
+
+Alternatively, you can specify `trainable=False` as an argument to
+`tf.get_variable`:
+
+``` python
+my_non_trainable = tf.get_variable("my_non_trainable",
+                                   shape=(),
+                                   trainable=False)
+```
+
+
+You can also use your own collections. Any string is a valid collection name,
+and there is no need to explicitly create a collection. To add a variable (or
+any other object) to a collection after creating the variable, call
+`tf.add_to_collection`.  For example, the following code adds an existing
+variable named `my_local` to a collection named `my_collection_name`:
+
+``` python
+tf.add_to_collection("my_collection_name", my_local)
+```
+
+And to retrieve a list of all the variables (or other objects) you've placed in
+a collection you can use:
+
+``` python
+tf.get_collection("my_collection_name")
+```
+
+### Device placement
+
+Just like any other TensorFlow operation, you can place variables on particular
+devices. For example, the following snippet creates a variable named `v` and
+places it on the second GPU device:
+
+``` python
+with tf.device("/device:GPU:1"):
+  v = tf.get_variable("v", [1])
+```
+
+It is particularly important for variables to be in the correct device in
+distributed settings. Accidentally putting variables on workers instead of
+parameter servers, for example, can severely slow down training or, in the worst
+case, let each worker blithely forge ahead with its own independent copy of each
+variable. For this reason we provide @{tf.train.replica_device_setter}, which
+can automatically place variables in parameter servers. For example:
+
+``` python
+cluster_spec = {
+    "ps": ["ps0:2222", "ps1:2222"],
+    "worker": ["worker0:2222", "worker1:2222", "worker2:2222"]}
+with tf.device(tf.train.replica_device_setter(cluster=cluster_spec)):
+  v = tf.get_variable("v", shape=[20, 20])  # this variable is placed
+                                            # in the parameter server
+                                            # by the replica_device_setter
+```
+
+## Initializing variables
+
+Before you can use a variable, it must be initialized. If you are programming in
+the low-level TensorFlow API (that is, you are explicitly creating your own
+graphs and sessions), you must explicitly initialize the variables.  Most
+high-level frameworks such as `tf.contrib.slim`, `tf.estimator.Estimator` and
+`Keras` automatically initialize variables for you before training a model.
+
+Explicit initialization is otherwise useful because it allows you not to rerun
+potentially expensive initializers when reloading a model from a checkpoint as
+well as allowing determinism when randomly-initialized variables are shared in a
+distributed setting.
+
+To initialize all trainable variables in one go, before training starts, call
+`tf.global_variables_initializer()`. This function returns a single operation
+responsible for initializing all variables in the
+`tf.GraphKeys.GLOBAL_VARIABLES` collection. Running this operation initializes
+all variables. For example:
+
+``` python
+session.run(tf.global_variables_initializer())
+# Now all variables are initialized.
+```
+
+If you do need to initialize variables yourself, you can run the variable's
+initializer operation. For example:
+
+``` python
+session.run(my_variable.initializer)
+```
+
+
+You can also ask which variables have still not been initialized. For example,
+the following code prints the names of all variables which have not yet been
+initialized:
+
+``` python
+print(session.run(tf.report_uninitialized_variables()))
+```
+
+
+Note that by default `tf.global_variables_initializer` does not specify the
+order in which variables are initialized. Therefore, if the initial value of a
+variable depends on another variable's value, it's likely that you'll get an
+error. Any time you use the value of a variable in a context in which not all
+variables are initialized (say, if you use a variable's value while initializing
+another variable), it is best to use `variable.initialized_value()` instead of
+`variable`:
+
+``` python
+v = tf.get_variable("v", shape=(), initializer=tf.zeros_initializer())
+w = tf.get_variable("w", initializer=v.initialized_value() + 1)
+```
+
+## Using variables
+
+To use the value of a `tf.Variable` in a TensorFlow graph, simply treat it like
+a normal `tf.Tensor`:
+
+``` python
+v = tf.get_variable("v", shape=(), initializer=tf.zeros_initializer())
+w = v + 1  # w is a tf.Tensor which is computed based on the value of v.
+           # Any time a variable is used in an expression it gets automatically
+           # converted to a tf.Tensor representing its value.
+```
+
+To assign a value to a variable, use the methods `assign`, `assign_add`, and
+friends in the `tf.Variable` class. For example, here is how you can call these
+methods:
+
+``` python
+v = tf.get_variable("v", shape=(), initializer=tf.zeros_initializer())
+assignment = v.assign_add(1)
+tf.global_variables_initializer().run()
+sess.run(assignment)  # or assignment.op.run(), or assignment.eval()
+```
+
+Most TensorFlow optimizers have specialized ops that efficiently update the
+values of variables according to some gradient descent-like algorithm. See
+@{tf.train.Optimizer} for an explanation of how to use optimizers.
+
+Because variables are mutable it's sometimes useful to know what version of a
+variable's value is being used at any point in time. To force a re-read of the
+value of a variable after something has happened, you can use
+`tf.Variable.read_value`. For example:
+
+``` python
+v = tf.get_variable("v", shape=(), initializer=tf.zeros_initializer())
+assignment = v.assign_add(1)
+with tf.control_dependencies([assignment]):
+  w = v.read_value()  # w is guaranteed to reflect v's value after the
+                      # assign_add operation.
+```
+
+
+## Sharing variables
+
+TensorFlow supports two ways of sharing variables:
+
+ * Explicitly passing `tf.Variable` objects around.
+ * Implicitly wrapping `tf.Variable` objects within `tf.variable_scope` objects.
+
+While code which explicitly passes variables around is very clear, it is
+sometimes convenient to write TensorFlow functions that implicitly use
+variables in their implementations. Most of the functional layers from
+`tf.layers` use this approach, as well as all `tf.metrics`, and a few other
+library utilities.
+
+Variable scopes allow you to control variable reuse when calling functions which
+implicitly create and use variables. They also allow you to name your variables
+in a hierarchical and understandable way.
+
+For example, let's say we write a function to create a convolutional / relu
+layer:
+
+```python
+def conv_relu(input, kernel_shape, bias_shape):
+    # Create variable named "weights".
+    weights = tf.get_variable("weights", kernel_shape,
+        initializer=tf.random_normal_initializer())
+    # Create variable named "biases".
+    biases = tf.get_variable("biases", bias_shape,
+        initializer=tf.constant_initializer(0.0))
+    conv = tf.nn.conv2d(input, weights,
+        strides=[1, 1, 1, 1], padding='SAME')
+    return tf.nn.relu(conv + biases)
+```
+
+This function uses short names `weights` and `biases`, which is good for
+clarity. In a real model, however, we want many such convolutional layers, and
+calling this function repeatedly would not work:
+
+``` python
+input1 = tf.random_normal([1,10,10,32])
+input2 = tf.random_normal([1,20,20,32])
+x = conv_relu(input1, kernel_shape=[5, 5, 32, 32], bias_shape=[32])
+x = conv_relu(x, kernel_shape=[5, 5, 32, 32], bias_shape = [32])  # This fails.
+```
+
+Since the desired behavior is unclear (create new variables or reuse the
+existing ones?) TensorFlow will fail. Calling `conv_relu` in different scopes,
+however, clarifies that we want to create new variables:
+
+```python
+def my_image_filter(input_images):
+    with tf.variable_scope("conv1"):
+        # Variables created here will be named "conv1/weights", "conv1/biases".
+        relu1 = conv_relu(input_images, [5, 5, 32, 32], [32])
+    with tf.variable_scope("conv2"):
+        # Variables created here will be named "conv2/weights", "conv2/biases".
+        return conv_relu(relu1, [5, 5, 32, 32], [32])
+```
+
+If you do want the variables to be shared, you have two options. First, you can
+create a scope with the same name using `reuse=True`:
+
+``` python
+with tf.variable_scope("model"):
+  output1 = my_image_filter(input1)
+with tf.variable_scope("model", reuse=True):
+  output2 = my_image_filter(input2)
+
+```
+
+You can also call `scope.reuse_variables()` to trigger a reuse:
+
+``` python
+with tf.variable_scope("model") as scope:
+  output1 = my_image_filter(input1)
+  scope.reuse_variables()
+  output2 = my_image_filter(input2)
+
+```
+
+Since depending on exact string names of scopes can feel dangerous, it's also
+possible to initialize a variable scope based on another one:
+
+``` python
+with tf.variable_scope("model") as scope:
+  output1 = my_image_filter(input1)
+with tf.variable_scope(scope, reuse=True):
+  output2 = my_image_filter(input2)
+
+```
+
diff --git a/tensorflow/docs_src/guide/version_compat.md b/tensorflow/docs_src/guide/version_compat.md
new file mode 100644
index 0000000000..72e427c5f8
--- /dev/null
+++ b/tensorflow/docs_src/guide/version_compat.md
@@ -0,0 +1,319 @@
+# TensorFlow Version Compatibility
+
+This document is for users who need backwards compatibility across different
+versions of TensorFlow (either for code or data), and for developers who want
+to modify TensorFlow while preserving compatibility.
+
+## Semantic Versioning 2.0
+
+TensorFlow follows Semantic Versioning 2.0 ([semver](http://semver.org)) for its
+public API. Each release version of TensorFlow has the form `MAJOR.MINOR.PATCH`.
+For example, TensorFlow version 1.2.3 has `MAJOR` version 1, `MINOR` version 2,
+and `PATCH` version 3. Changes to each number have the following meaning:
+
+* **MAJOR**:  Potentially backwards incompatible changes.  Code and data that
+  worked with a previous major release will not necessarily work with the new
+  release. However, in some cases existing TensorFlow graphs and checkpoints
+  may be migratable to the newer release; see
+  [Compatibility of graphs and checkpoints](#compatibility_of_graphs_and_checkpoints)
+  for details on data compatibility.
+
+* **MINOR**: Backwards compatible features, speed improvements, etc.  Code and
+  data that worked with a previous minor release *and* which depends only on the
+  public API will continue to work unchanged.  For details on what is and is
+  not the public API, see [What is covered](#what_is_covered).
+
+* **PATCH**: Backwards compatible bug fixes.
+
+For example, release 1.0.0 introduced backwards *incompatible* changes from
+release 0.12.1.  However, release 1.1.1 was backwards *compatible* with release
+1.0.0.
+
+## What is covered
+
+Only the public APIs of TensorFlow are backwards compatible across minor and
+patch versions.  The public APIs consist of
+
+* All the documented [Python](../api_docs/python) functions and classes in the
+  `tensorflow` module and its submodules, except for
+    * functions and classes in `tf.contrib`
+    * functions and classes whose names start with `_` (as these are private)
+  Note that the code in the `examples/` and `tools/` directories is not
+  reachable through the `tensorflow` Python module and is thus not covered by
+  the compatibility guarantee.
+
+  If a symbol is available through the `tensorflow` Python module or its
+  submodules, but is not documented, then it is **not** considered part of the
+  public API.
+
+* The [C API](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/c/c_api.h).
+
+* The following protocol buffer files:
+    * [`attr_value`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/attr_value.proto)
+    * [`config`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/config.proto)
+    * [`event`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/util/event.proto)
+    * [`graph`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/graph.proto)
+    * [`op_def`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/op_def.proto)
+    * [`reader_base`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/reader_base.proto)
+    * [`summary`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/summary.proto)
+    * [`tensor`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor.proto)
+    * [`tensor_shape`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor_shape.proto)
+    * [`types`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/types.proto)
+
+<a name="not_covered"></a>
+## What is *not* covered
+
+Some API functions are explicitly marked as "experimental" and can change in
+backward incompatible ways between minor releases. These include:
+
+*   **Experimental APIs**: The @{tf.contrib} module and its submodules in Python
+    and any functions in the C API or fields in protocol buffers that are
+    explicitly commented as being experimental. In particular, any field in a
+    protocol buffer which is called "experimental" and all its fields and
+    submessages can change at any time.
+
+*   **Other languages**: TensorFlow APIs in languages other than Python and C,
+    such as:
+
+  - @{$cc/guide$C++} (exposed through header files in
+    [`tensorflow/cc`](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/cc)).
+  - [Java](../api_docs/java/reference/org/tensorflow/package-summary),
+  - [Go](https://godoc.org/github.com/tensorflow/tensorflow/tensorflow/go)
+
+*   **Details of composite ops:** Many public functions in Python expand to
+    several primitive ops in the graph, and these details will be part of any
+    graphs saved to disk as `GraphDef`s. These details may change for
+    minor releases. In particular, regressions tests that check for exact
+    matching between graphs are likely to break across minor releases, even
+    though the behavior of the graph should be unchanged and existing
+    checkpoints will still work.
+
+*   **Floating point numerical details:** The specific floating point values
+    computed by ops may change at any time.  Users should rely only on
+    approximate accuracy and numerical stability, not on the specific bits
+    computed. Changes to numerical formulas in minor and patch releases should
+    result in comparable or improved accuracy, with the caveat that in machine
+    learning improved accuracy of specific formulas may result in decreased
+    accuracy for the overall system.
+
+*   **Random numbers:** The specific random numbers computed by the
+    @{$python/constant_op#Random_Tensors$random ops} may change at any time.
+    Users should rely only on approximately correct distributions and
+    statistical strength, not the specific bits computed. However, we will make
+    changes to random bits rarely (or perhaps never) for patch releases.  We
+    will, of course, document all such changes.
+
+*   **Version skew in distributed Tensorflow:** Running two different versions
+    of TensorFlow in a single cluster is unsupported. There are no guarantees
+    about backwards compatibility of the wire protocol.
+
+*   **Bugs:** We reserve the right to make backwards incompatible behavior
+    (though not API) changes if the current implementation is clearly broken,
+    that is, if it contradicts the documentation or if a well-known and
+    well-defined intended behavior is not properly implemented due to a bug.
+    For example, if an optimizer claims to implement a well-known optimization
+    algorithm but does not match that algorithm due to a bug, then we will fix
+    the optimizer. Our fix may break code relying on the wrong behavior for
+    convergence. We will note such changes in the release notes.
+
+*   **Error messages:** We reserve the right to change the text of error
+    messages. In addition, the type of an error may change unless the type is
+    specified in the documentation. For example, a function documented to
+    raise an `InvalidArgument` exception will continue to
+    raise `InvalidArgument`, but the human-readable message contents can change.
+
+## Compatibility of graphs and checkpoints
+
+You'll sometimes need to preserve graphs and checkpoints.
+Graphs describe the data flow of ops to be run during training and
+inference, and checkpoints contain the saved tensor values of variables in a
+graph.
+
+Many TensorFlow users save graphs and trained models to disk for
+later evaluation or additional training, but end up running their saved graphs
+or models on a later release. In compliance with semver, any graph or checkpoint
+written out with one version of TensorFlow can be loaded and evaluated with a
+later version of TensorFlow with the same major release.  However, we will
+endeavor to preserve backwards compatibility even across major releases when
+possible, so that the serialized files are usable over long periods of time.
+
+
+Graphs are serialized via the `GraphDef` protocol buffer.  To facilitate (rare)
+backwards incompatible changes to graphs, each `GraphDef` has a version number
+separate from the TensorFlow version.  For example, `GraphDef` version 17
+deprecated the `inv` op in favor of `reciprocal`.  The semantics are:
+
+* Each version of TensorFlow supports an interval of `GraphDef` versions.  This
+  interval will be constant across patch releases, and will only grow across
+  minor releases.  Dropping support for a `GraphDef` version will only occur
+  for a major release of TensorFlow.
+
+* Newly created graphs are assigned the latest `GraphDef` version number.
+
+* If a given version of TensorFlow supports the `GraphDef` version of a graph,
+  it will load and evaluate with the same behavior as the TensorFlow version
+  used to generate it (except for floating point numerical details and random
+  numbers), regardless of the major version of TensorFlow.  In particular, all
+  checkpoint files will be compatible.
+
+* If the `GraphDef` *upper* bound is increased to X in a (minor) release, there
+  will be at least six months before the *lower* bound is increased to X.  For
+  example (we're using hypothetical version numbers here):
+    * TensorFlow 1.2 might support `GraphDef` versions 4 to 7.
+    * TensorFlow 1.3 could add `GraphDef` version 8 and support versions 4 to 8.
+    * At least six months later, TensorFlow 2.0.0 could drop support for
+      versions 4 to 7, leaving version 8 only.
+
+Finally, when support for a `GraphDef` version is dropped, we will attempt to
+provide tools for automatically converting graphs to a newer supported
+`GraphDef` version.
+
+## Graph and checkpoint compatibility when extending TensorFlow
+
+This section is relevant only when making incompatible changes to the `GraphDef`
+format, such as when adding ops, removing ops, or changing the functionality
+of existing ops.  The previous section should suffice for most users.
+
+### Backward and partial forward compatibility
+
+Our versioning scheme has three requirements:
+
+*   **Backward compatibility** to support loading graphs and checkpoints
+    created with older versions of TensorFlow.
+*   **Forward compatibility** to support scenarios where the producer of a
+    graph or checkpoint is upgraded to a newer version of TensorFlow before
+    the consumer.
+*   Enable evolving TensorFlow in incompatible ways. For example, removing ops,
+    adding attributes, and removing attributes.
+
+Note that while the `GraphDef` version mechanism is separate from the TensorFlow
+version, backwards incompatible changes to the `GraphDef` format are still
+restricted by Semantic Versioning.  This means functionality can only be removed
+or changed between `MAJOR` versions of TensorFlow (such as `1.7` to `2.0`).
+Additionally, forward compatibility is enforced within Patch releases (`1.x.1`
+to `1.x.2` for example).
+
+To achieve backward and forward compatibility and to know when to enforce changes
+in formats, graphs and checkpoints have metadata that describes when they
+were produced. The sections below detail the TensorFlow implementation and
+guidelines for evolving `GraphDef` versions.
+
+### Independent data version schemes
+
+There are different data versions for graphs and checkpoints. The two data
+formats evolve at different rates from each other and also at different rates
+from TensorFlow. Both versioning systems are defined in
+[`core/public/version.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/public/version.h).
+Whenever a new version is added, a note is added to the header detailing what
+changed and the date.
+
+### Data, producers, and consumers
+
+We distinguish between the following kinds of data version information:
+* **producers**: binaries that produce data.  Producers have a version
+  (`producer`) and a minimum consumer version that they are compatible with
+  (`min_consumer`).
+* **consumers**: binaries that consume data.  Consumers have a version
+  (`consumer`) and a minimum producer version that they are compatible with
+  (`min_producer`).
+
+Each piece of versioned data has a [`VersionDef
+versions`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/versions.proto)
+field which records the `producer` that made the data, the `min_consumer`
+that it is compatible with, and a list of `bad_consumers` versions that are
+disallowed.
+
+By default, when a producer makes some data, the data inherits the producer's
+`producer` and `min_consumer` versions. `bad_consumers` can be set if specific
+consumer versions are known to contain bugs and must be avoided. A consumer can
+accept a piece of data if the following are all true:
+
+*   `consumer` >= data's `min_consumer`
+*   data's `producer` >= consumer's `min_producer`
+*   `consumer` not in data's `bad_consumers`
+
+Since both producers and consumers come from the same TensorFlow code base,
+[`core/public/version.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/public/version.h)
+contains a main data version which is treated as either `producer` or
+`consumer` depending on context and both `min_consumer` and `min_producer`
+(needed by producers and consumers, respectively). Specifically,
+
+*   For `GraphDef` versions, we have `TF_GRAPH_DEF_VERSION`,
+    `TF_GRAPH_DEF_VERSION_MIN_CONSUMER`, and
+    `TF_GRAPH_DEF_VERSION_MIN_PRODUCER`.
+*   For checkpoint versions, we have `TF_CHECKPOINT_VERSION`,
+    `TF_CHECKPOINT_VERSION_MIN_CONSUMER`, and
+    `TF_CHECKPOINT_VERSION_MIN_PRODUCER`.
+
+### Add a new attribute with default to an existing op
+
+Following the guidance below gives you forward compatibility only if the set of
+ops has not changed:
+
+1. If forward compatibility is desired,  set `strip_default_attrs` to `True`
+   while exporting the model using either the
+   @{tf.saved_model.builder.SavedModelBuilder.add_meta_graph_and_variables$`add_meta_graph_and_variables`}
+   and @{tf.saved_model.builder.SavedModelBuilder.add_meta_graph$`add_meta_graph`}
+   methods of the `SavedModelBuilder` class, or
+   @{tf.estimator.Estimator.export_savedmodel$`Estimator.export_savedmodel`}
+2. This strips off the default valued attributes at the time of
+   producing/exporting the models. This makes sure that the exported
+   @{tf.MetaGraphDef} does not contain the new op-attribute when the default
+   value is used.
+3. Having this control could allow out-of-date consumers (for example, serving
+   binaries that lag behind training binaries) to continue loading the models
+   and prevent interruptions in model serving.
+
+### Evolving GraphDef versions
+
+This section explains how to use this versioning mechanism to make different
+types of changes to the `GraphDef` format.
+
+#### Add an op
+
+Add the new op to both consumers and producers at the same time, and do not
+change any `GraphDef` versions. This type of change is automatically
+backward compatible, and does not impact forward compatibility plan since
+existing producer scripts will not suddenly use the new functionality.
+
+#### Add an op and switch existing Python wrappers to use it
+
+1.  Implement new consumer functionality and increment the `GraphDef` version.
+2.  If it is possible to make the wrappers use the new functionality only in
+    cases that did not work before, the wrappers can be updated now.
+3.  Change Python wrappers to use the new functionality. Do not increment
+    `min_consumer`, since models that do not use this op should not break.
+
+#### Remove or restrict an op's functionality
+
+1.  Fix all producer scripts (not TensorFlow itself) to not use the banned op or
+    functionality.
+2.  Increment the `GraphDef` version and implement new consumer functionality
+    that bans the removed op or functionality for GraphDefs at the new version
+    and above. If possible, make TensorFlow stop producing `GraphDefs` with the
+    banned functionality. To do so, add the
+    [`REGISTER_OP(...).Deprecated(deprecated_at_version,
+    message)`](https://github.com/tensorflow/tensorflow/blob/b289bc7a50fc0254970c60aaeba01c33de61a728/tensorflow/core/ops/array_ops.cc#L1009).
+3.  Wait for a major release for backward compatibility purposes.
+4.  Increase `min_producer` to the GraphDef version from (2) and remove the
+    functionality entirely.
+
+#### Change an op's functionality
+
+1.  Add a new similar op named `SomethingV2` or similar and go through the
+    process of adding it and switching existing Python wrappers to use it, which
+    may take three weeks if forward compatibility is desired.
+2.  Remove the old op (Can only take place with a major version change due to
+    backward compatibility).
+3.  Increase `min_consumer` to rule out consumers with the old op, add back the
+    old op as an alias for `SomethingV2`, and go through the process to switch
+    existing Python wrappers to use it.
+4.  Go through the process to remove `SomethingV2`.
+
+#### Ban a single unsafe consumer version
+
+1.  Bump the `GraphDef` version and add the bad version to `bad_consumers` for
+    all new GraphDefs. If possible, add to `bad_consumers` only for GraphDefs
+    which contain a certain op or similar.
+2.  If existing consumers have the bad version, push them out as soon as
+    possible.
diff --git a/tensorflow/docs_src/install/install_go.md b/tensorflow/docs_src/install/install_go.md
index 1c03dd223e..5451e1b319 100644
--- a/tensorflow/docs_src/install/install_go.md
+++ b/tensorflow/docs_src/install/install_go.md
@@ -6,7 +6,7 @@ a Go application. This guide explains how to install and set up the
 [TensorFlow Go package](https://godoc.org/github.com/tensorflow/tensorflow/tensorflow/go).
 
 Warning: The TensorFlow Go API is *not* covered by the TensorFlow
-[API stability guarantees](https://www.tensorflow.org/programmers_guide/version_semantics).
+[API stability guarantees](../guide/version_semantics.md).
 
 
 ## Supported Platforms
diff --git a/tensorflow/docs_src/install/install_java.md b/tensorflow/docs_src/install/install_java.md
index c73e2f4281..ad3544b595 100644
--- a/tensorflow/docs_src/install/install_java.md
+++ b/tensorflow/docs_src/install/install_java.md
@@ -7,7 +7,7 @@ Java application. This guide explains how to install
 and use it in a Java application.
 
 Warning: The TensorFlow Java API is *not* covered by the TensorFlow
-[API stability guarantees](https://www.tensorflow.org/programmers_guide/version_semantics).
+[API stability guarantees](../guide/version_semantics.md).
 
 
 ## Supported Platforms
diff --git a/tensorflow/docs_src/programmers_guide/checkpoints.md b/tensorflow/docs_src/programmers_guide/checkpoints.md
deleted file mode 100644
index 8dfd91e3c8..0000000000
--- a/tensorflow/docs_src/programmers_guide/checkpoints.md
+++ /dev/null
@@ -1,240 +0,0 @@
-# Checkpoints
-
-This document examines how to save and restore TensorFlow models built with
-Estimators. TensorFlow provides two model formats:
-
-*   checkpoints, which is a format dependent on the code that created
-    the model.
-*   SavedModel, which is a format independent of the code that created
-    the model.
-
-This document focuses on checkpoints. For details on SavedModel, see the
-@{$saved_model$Saving and Restoring} chapter of the
-*TensorFlow Programmer's Guide*.
-
-
-## Sample code
-
-This document relies on the same
-[Iris classification example](https://github.com/tensorflow/models/blob/master/samples/core/get_started/premade_estimator.py) detailed in @{$premade_estimators$Getting Started with TensorFlow}.
-To download and access the example, invoke the following two commands:
-
-```shell
-git clone https://github.com/tensorflow/models/
-cd models/samples/core/get_started
-```
-
-Most of the code snippets in this document are minor variations
-on `premade_estimator.py`.
-
-
-## Saving partially-trained models
-
-Estimators automatically write the following to disk:
-
-*   **checkpoints**, which are versions of the model created during training.
-*   **event files**, which contain information that
-    [TensorBoard](https://developers.google.com/machine-learning/glossary/#TensorBoard)
-    uses to create visualizations.
-
-To specify the top-level directory in which the Estimator stores its
-information, assign a value to the optional `model_dir` argument of *any*
-`Estimator`'s constructor.
-Taking `DNNClassifier` as an example,
-the following code sets the `model_dir`
-argument to the `models/iris` directory:
-
-```python
-classifier = tf.estimator.DNNClassifier(
-    feature_columns=my_feature_columns,
-    hidden_units=[10, 10],
-    n_classes=3,
-    model_dir='models/iris')
-```
-
-Suppose you call the Estimator's `train` method. For example:
-
-
-```python
-classifier.train(
-        input_fn=lambda:train_input_fn(train_x, train_y, batch_size=100),
-                steps=200)
-```
-
-As suggested by the following diagrams, the first call to `train`
-adds checkpoints and other files to the `model_dir` directory:
-
-<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src="../images/first_train_calls.png">
-</div>
-<div style="text-align: center">
-The first call to train().
-</div>
-
-
-To see the objects in the created `model_dir` directory on a
-UNIX-based system, just call `ls` as follows:
-
-```none
-$ ls -1 models/iris
-checkpoint
-events.out.tfevents.timestamp.hostname
-graph.pbtxt
-model.ckpt-1.data-00000-of-00001
-model.ckpt-1.index
-model.ckpt-1.meta
-model.ckpt-200.data-00000-of-00001
-model.ckpt-200.index
-model.ckpt-200.meta
-```
-
-The preceding `ls` command shows that the Estimator created checkpoints
-at steps 1 (the start of training) and 200 (the end of training).
-
-
-### Default checkpoint directory
-
-If you don't specify `model_dir` in an Estimator's constructor, the Estimator
-writes checkpoint files to a temporary directory chosen by Python's
-[tempfile.mkdtemp](https://docs.python.org/3/library/tempfile.html#tempfile.mkdtemp)
-function. For example, the following Estimator constructor does *not* specify
-the `model_dir` argument:
-
-```python
-classifier = tf.estimator.DNNClassifier(
-    feature_columns=my_feature_columns,
-    hidden_units=[10, 10],
-    n_classes=3)
-
-print(classifier.model_dir)
-```
-
-The `tempfile.mkdtemp` function picks a secure, temporary directory
-appropriate for your operating system. For example, a typical temporary
-directory on macOS might be something like the following:
-
-```None
-/var/folders/0s/5q9kfzfj3gx2knj0vj8p68yc00dhcr/T/tmpYm1Rwa
-```
-
-### Checkpointing Frequency
-
-By default, the Estimator saves
-[checkpoints](https://developers.google.com/machine-learning/glossary/#checkpoint)
-in the `model_dir` according to the following schedule:
-
-*   Writes a checkpoint every 10 minutes (600 seconds).
-*   Writes a checkpoint when the `train` method starts (first iteration)
-    and completes (final iteration).
-*   Retains only the 5 most recent checkpoints in the directory.
-
-You may alter the default schedule by taking the following steps:
-
-1.  Create a @{tf.estimator.RunConfig$`RunConfig`} object that defines the
-    desired schedule.
-2.  When instantiating the Estimator, pass that `RunConfig` object to the
-    Estimator's `config` argument.
-
-For example, the following code changes the checkpointing schedule to every
-20 minutes and retains the 10 most recent checkpoints:
-
-```python
-my_checkpointing_config = tf.estimator.RunConfig(
-    save_checkpoints_secs = 20*60,  # Save checkpoints every 20 minutes.
-    keep_checkpoint_max = 10,       # Retain the 10 most recent checkpoints.
-)
-
-classifier = tf.estimator.DNNClassifier(
-    feature_columns=my_feature_columns,
-    hidden_units=[10, 10],
-    n_classes=3,
-    model_dir='models/iris',
-    config=my_checkpointing_config)
-```
-
-## Restoring your model
-
-The first time you call an Estimator's `train` method, TensorFlow saves a
-checkpoint to the `model_dir`. Each subsequent call to the Estimator's
-`train`, `evaluate`, or `predict` method causes the following:
-
-1.  The Estimator builds the model's
-    [graph](https://developers.google.com/machine-learning/glossary/#graph)
-    by running the `model_fn()`.  (For details on the `model_fn()`, see
-    @{$custom_estimators$Creating Custom Estimators.})
-2.  The Estimator initializes the weights of the new model from the data
-    stored in the most recent checkpoint.
-
-In other words, as the following illustration suggests, once checkpoints
-exist, TensorFlow rebuilds the model each time you call `train()`,
-`evaluate()`, or `predict()`.
-
-<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src="../images/subsequent_calls.png">
-</div>
-<div style="text-align: center">
-Subsequent calls to train(), evaluate(), or predict()
-</div>
-
-
-### Avoiding a bad restoration
-
-Restoring a model's state from a checkpoint only works if the model
-and checkpoint are compatible.  For example, suppose you trained a
-`DNNClassifier` Estimator containing two hidden layers,
-each having 10 nodes:
-
-```python
-classifier = tf.estimator.DNNClassifier(
-    feature_columns=feature_columns,
-    hidden_units=[10, 10],
-    n_classes=3,
-    model_dir='models/iris')
-
-classifier.train(
-    input_fn=lambda:train_input_fn(train_x, train_y, batch_size=100),
-        steps=200)
-```
-
-After training (and, therefore, after creating checkpoints in `models/iris`),
-imagine that you changed the number of neurons in each hidden layer from 10 to
-20 and then attempted to retrain the model:
-
-``` python
-classifier2 = tf.estimator.DNNClassifier(
-    feature_columns=my_feature_columns,
-    hidden_units=[20, 20],  # Change the number of neurons in the model.
-    n_classes=3,
-    model_dir='models/iris')
-
-classifier.train(
-    input_fn=lambda:train_input_fn(train_x, train_y, batch_size=100),
-        steps=200)
-```
-
-Since the state in the checkpoint is incompatible with the model described
-in `classifier2`, retraining fails with the following error:
-
-```None
-...
-InvalidArgumentError (see above for traceback): tensor_name =
-dnn/hiddenlayer_1/bias/t_0/Adagrad; shape in shape_and_slice spec [10]
-does not match the shape stored in checkpoint: [20]
-```
-
-To run experiments in which you train and compare slightly different
-versions of a model, save a copy of the code that created each
-`model_dir`, possibly by creating a separate git branch for each version.
-This separation will keep your checkpoints recoverable.
-
-## Summary
-
-Checkpoints provide an easy automatic mechanism for saving and restoring
-models created by Estimators.
-
-See the @{$saved_model$Saving and Restoring}
-chapter of the *TensorFlow Programmer's Guide* for details on:
-
-*   Saving and restoring models using low-level TensorFlow APIs.
-*   Exporting and importing models in the SavedModel format, which is a
-    language-neutral, recoverable, serialization format.
diff --git a/tensorflow/docs_src/programmers_guide/custom_estimators.md b/tensorflow/docs_src/programmers_guide/custom_estimators.md
deleted file mode 100644
index fb20b35c12..0000000000
--- a/tensorflow/docs_src/programmers_guide/custom_estimators.md
+++ /dev/null
@@ -1,602 +0,0 @@
-
-# Creating Custom Estimators
-
-This document introduces custom Estimators. In particular, this document
-demonstrates how to create a custom @{tf.estimator.Estimator$Estimator} that
-mimics the behavior of the pre-made Estimator
-@{tf.estimator.DNNClassifier$`DNNClassifier`} in solving the Iris problem. See
-the @{$premade_estimators$Pre-Made Estimators chapter} for details
-on the Iris problem.
-
-To download and access the example code invoke the following two commands:
-
-```shell
-git clone https://github.com/tensorflow/models/
-cd models/samples/core/get_started
-```
-
-In this document we will be looking at
-[`custom_estimator.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/custom_estimator.py).
-You can run it with the following command:
-
-```bsh
-python custom_estimator.py
-```
-
-If you are feeling impatient, feel free to compare and contrast
-[`custom_estimator.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/custom_estimator.py)
-with
-[`premade_estimator.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/premade_estimator.py).
-(which is in the same directory).
-
-
-
-## Pre-made vs. custom
-
-As the following figure shows, pre-made Estimators are subclasses of the
-@{tf.estimator.Estimator} base class, while custom Estimators are an instance
-of tf.estimator.Estimator:
-
-<div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="display:block; margin: 0 auto"
-  alt="Premade estimators are sub-classes of `Estimator`. Custom Estimators are usually (direct) instances of `Estimator`"
-  src="../images/custom_estimators/estimator_types.png">
-</div>
-<div style="text-align: center">
-Pre-made and custom Estimators are all Estimators.
-</div>
-
-Pre-made Estimators are fully baked. Sometimes though, you need more control
-over an Estimator's behavior.  That's where custom Estimators come in. You can
-create a custom Estimator to do just about anything. If you want hidden layers
-connected in some unusual fashion, write a custom Estimator. If you want to
-calculate a unique
-[metric](https://developers.google.com/machine-learning/glossary/#metric)
-for your model, write a custom Estimator.  Basically, if you want an Estimator
-optimized for your specific problem, write a custom Estimator.
-
-A model function (or `model_fn`) implements the ML algorithm. The
-only difference between working with pre-made Estimators and custom Estimators
-is:
-
-* With pre-made Estimators, someone already wrote the model function for you.
-* With custom Estimators, you must write the model function.
-
-Your model function could implement a wide range of algorithms, defining all
-sorts of hidden layers and metrics.  Like input functions, all model functions
-must accept a standard group of input parameters and return a standard group of
-output values. Just as input functions can leverage the Dataset API, model
-functions can leverage the Layers API and the Metrics API.
-
-Let's see how to solve the Iris problem with a custom Estimator. A quick
-reminder--here's the organization of the Iris model that we're trying to mimic:
-
-<div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="display:block; margin: 0 auto"
-  alt="A diagram of the network architecture: Inputs, 2 hidden layers, and outputs"
-  src="../images/custom_estimators/full_network.png">
-</div>
-<div style="text-align: center">
-Our implementation of Iris contains four features, two hidden layers,
-and a logits output layer.
-</div>
-
-## Write an Input function
-
-Our custom Estimator implementation uses the same input function as our
-@{$premade_estimators$pre-made Estimator implementation}, from
-[`iris_data.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/iris_data.py).
-Namely:
-
-```python
-def train_input_fn(features, labels, batch_size):
-    """An input function for training"""
-    # Convert the inputs to a Dataset.
-    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
-
-    # Shuffle, repeat, and batch the examples.
-    dataset = dataset.shuffle(1000).repeat().batch(batch_size)
-
-    # Return the read end of the pipeline.
-    return dataset.make_one_shot_iterator().get_next()
-```
-
-This input function builds an input pipeline that yields batches of
-`(features, labels)` pairs, where `features` is a dictionary features.
-
-## Create feature columns
-
-As detailed in the @{$premade_estimators$Premade Estimators} and
-@{$feature_columns$Feature Columns} chapters, you must define
-your model's feature columns to specify how the model should use each feature.
-Whether working with pre-made Estimators or custom Estimators, you define
-feature columns in the same fashion.
-
-The following code creates a simple `numeric_column` for each input feature,
-indicating that the value of the input feature should be used directly as an
-input to the model:
-
-```python
-# Feature columns describe how to use the input.
-my_feature_columns = []
-for key in train_x.keys():
-    my_feature_columns.append(tf.feature_column.numeric_column(key=key))
-```
-
-## Write a model function
-
-The model function we'll use has the following call signature:
-
-```python
-def my_model_fn(
-   features, # This is batch_features from input_fn
-   labels,   # This is batch_labels from input_fn
-   mode,     # An instance of tf.estimator.ModeKeys
-   params):  # Additional configuration
-```
-
-The first two arguments are the batches of features and labels returned from
-the input function; that is, `features` and `labels` are the handles to the
-data your model will use. The `mode` argument indicates whether the caller is
-requesting training, predicting, or evaluation.
-
-The caller may pass `params` to an Estimator's constructor. Any `params` passed
-to the constructor are in turn passed on to the `model_fn`. In
-[`custom_estimator.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/custom_estimator.py)
-the following lines create the estimator and set the params to configure the
-model. This configuration step is similar to how we configured the @{tf.estimator.DNNClassifier} in
-@{$premade_estimators}.
-
-```python
-classifier = tf.estimator.Estimator(
-    model_fn=my_model,
-    params={
-        'feature_columns': my_feature_columns,
-        # Two hidden layers of 10 nodes each.
-        'hidden_units': [10, 10],
-        # The model must choose between 3 classes.
-        'n_classes': 3,
-    })
-```
-
-To implement a typical model function, you must do the following:
-
-* [Define the model](#define_the_model).
-* Specify additional calculations for each of
-  the [three different modes](#modes):
-    * [Predict](#predict)
-    * [Evaluate](#evaluate)
-    * [Train](#train)
-
-## Define the model
-
-The basic deep neural network model must define the following three sections:
-
-* An [input layer](https://developers.google.com/machine-learning/glossary/#input_layer)
-* One or more [hidden layers](https://developers.google.com/machine-learning/glossary/#hidden_layer)
-* An [output layer](https://developers.google.com/machine-learning/glossary/#output_layer)
-
-### Define the input layer
-
-The first line of the `model_fn` calls @{tf.feature_column.input_layer} to
-convert the feature dictionary and `feature_columns` into input for your model,
-as follows:
-
-```python
-    # Use `input_layer` to apply the feature columns.
-    net = tf.feature_column.input_layer(features, params['feature_columns'])
-```
-
-The preceding line applies the transformations defined by your feature columns,
-creating the model's input layer.
-
-<div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="display:block; margin: 0 auto"
-  alt="A diagram of the input layer, in this case a 1:1 mapping from raw-inputs to features."
-  src="../images/custom_estimators/input_layer.png">
-</div>
-
-
-### Hidden Layers
-
-If you are creating a deep neural network, you must define one or more hidden
-layers. The Layers API provides a rich set of functions to define all types of
-hidden layers, including convolutional, pooling, and dropout layers. For Iris,
-we're simply going to call @{tf.layers.dense} to create hidden layers, with
-dimensions defined by `params['hidden_layers']`. In a `dense` layer each node
-is connected to every node in the preceding layer.  Here's the relevant code:
-
-``` python
-    # Build the hidden layers, sized according to the 'hidden_units' param.
-    for units in params['hidden_units']:
-        net = tf.layers.dense(net, units=units, activation=tf.nn.relu)
-```
-
-* The `units` parameter defines the number of output neurons in a given layer.
-* The `activation` parameter defines the [activation function](https://developers.google.com/machine-learning/glossary/#activation_function) —
-  [Relu](https://developers.google.com/machine-learning/glossary/#ReLU) in this
-  case.
-
-The variable `net` here signifies the current top layer of the network. During
-the first iteration, `net` signifies the input layer. On each loop iteration
-`tf.layers.dense` creates a new layer, which takes the previous layer's output
-as its input, using the variable `net`.
-
-After creating two hidden layers, our network looks as follows. For
-simplicity, the figure does not show all the units in each layer.
-
-<div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="display:block; margin: 0 auto"
-  alt="The input layer with two hidden layers added."
-  src="../images/custom_estimators/add_hidden_layer.png">
-</div>
-
-Note that @{tf.layers.dense} provides many additional capabilities, including
-the ability to set a multitude of regularization parameters. For the sake of
-simplicity, though, we're going to simply accept the default values of the
-other parameters.
-
-### Output Layer
-
-We'll define the output layer by calling @{tf.layers.dense} yet again, this
-time without an activation function:
-
-```python
-    # Compute logits (1 per class).
-    logits = tf.layers.dense(net, params['n_classes'], activation=None)
-```
-
-Here, `net` signifies the final hidden layer. Therefore, the full set of layers
-is now connected as follows:
-
-<div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="display:block; margin: 0 auto"
-  alt="A logit output layer connected to the top hidden layer"
-  src="../images/custom_estimators/add_logits.png">
-</div>
-<div style="text-align: center">
-The final hidden layer feeds into the output layer.
-</div>
-
-When defining an output layer, the `units` parameter specifies the number of
-outputs. So, by setting `units` to `params['n_classes']`, the model produces
-one output value per class. Each element of the output vector will contain the
-score, or "logit", calculated for the associated class of Iris: Setosa,
-Versicolor, or Virginica, respectively.
-
-Later on, these logits will be transformed into probabilities by the
-@{tf.nn.softmax} function.
-
-## Implement training, evaluation, and prediction {#modes}
-
-The final step in creating a model function is to write branching code that
-implements prediction, evaluation, and training.
-
-The model function gets invoked whenever someone calls the Estimator's `train`,
-`evaluate`, or `predict` methods. Recall that the signature for the model
-function looks like this:
-
-``` python
-def my_model_fn(
-   features, # This is batch_features from input_fn
-   labels,   # This is batch_labels from input_fn
-   mode,     # An instance of tf.estimator.ModeKeys, see below
-   params):  # Additional configuration
-```
-
-Focus on that third argument, mode. As the following table shows, when someone
-calls `train`, `evaluate`, or `predict`, the Estimator framework invokes your model
-function with the mode parameter set as follows:
-
-| Estimator method                 |    Estimator Mode |
-|:---------------------------------|:------------------|
-|@{tf.estimator.Estimator.train$`train()`} |@{tf.estimator.ModeKeys.TRAIN$`ModeKeys.TRAIN`} |
-|@{tf.estimator.Estimator.evaluate$`evaluate()`}  |@{tf.estimator.ModeKeys.EVAL$`ModeKeys.EVAL`}      |
-|@{tf.estimator.Estimator.predict$`predict()`}|@{tf.estimator.ModeKeys.PREDICT$`ModeKeys.PREDICT`} |
-
-For example, suppose you instantiate a custom Estimator to generate an object
-named `classifier`. Then, you make the following call:
-
-``` python
-classifier = tf.estimator.Estimator(...)
-classifier.train(input_fn=lambda: my_input_fn(FILE_TRAIN, True, 500))
-```
-The Estimator framework then calls your model function with mode set to
-`ModeKeys.TRAIN`.
-
-Your model function must provide code to handle all three of the mode values.
-For each mode value, your code must return an instance of
-`tf.estimator.EstimatorSpec`, which contains the information the caller
-requires. Let's examine each mode.
-
-### Predict
-
-When the Estimator's `predict` method is called, the `model_fn` receives
-`mode = ModeKeys.PREDICT`. In this case, the model function must return a
-`tf.estimator.EstimatorSpec` containing the prediction.
-
-The model must have been trained prior to making a prediction. The trained model
-is stored on disk in the `model_dir` directory established when you
-instantiated the Estimator.
-
-The code to generate the prediction for this model looks as follows:
-
-```python
-# Compute predictions.
-predicted_classes = tf.argmax(logits, 1)
-if mode == tf.estimator.ModeKeys.PREDICT:
-    predictions = {
-        'class_ids': predicted_classes[:, tf.newaxis],
-        'probabilities': tf.nn.softmax(logits),
-        'logits': logits,
-    }
-    return tf.estimator.EstimatorSpec(mode, predictions=predictions)
-```
-The prediction dictionary contains everything that your model returns when run
-in prediction mode.
-
-<div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="display:block; margin: 0 auto"
-  alt="Additional outputs added to the output layer."
-  src="../images/custom_estimators/add_predictions.png">
-</div>
-
-The `predictions` holds the following three key/value pairs:
-
-*   `class_ids` holds the class id (0, 1, or 2) representing the model's
-    prediction of the most likely species for this example.
-*   `probabilities` holds the three probabilities (in this example, 0.02, 0.95,
-    and 0.03)
-*   `logit` holds the raw logit values (in this example, -1.3, 2.6, and -0.9)
-
-We return that dictionary to the caller via the `predictions` parameter of the
-@{tf.estimator.EstimatorSpec}. The Estimator's
-@{tf.estimator.Estimator.predict$`predict`} method will yield these
-dictionaries.
-
-### Calculate the loss
-
-For both [training](#train) and [evaluation](#evaluate) we need to calculate the
-model's loss. This is the
-[objective](https://developers.google.com/machine-learning/glossary/#objective)
-that will be optimized.
-
-We can calculate the loss by calling @{tf.losses.sparse_softmax_cross_entropy}.
-The value returned by this function will be lowest, approximately 0,
-probability of the correct class (at index `label`) is near 1.0. The loss value
-returned is progressively larger as the probability of the correct class
-decreases.
-
-This function returns the average over the whole batch.
-
-```python
-# Compute loss.
-loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
-```
-
-### Evaluate
-
-When the Estimator's `evaluate` method is called, the `model_fn` receives
-`mode = ModeKeys.EVAL`. In this case, the model function must return a
-`tf.estimator.EstimatorSpec` containing the model's loss and optionally one
-or more metrics.
-
-Although returning metrics is optional, most custom Estimators do return at
-least one metric. TensorFlow provides a Metrics module @{tf.metrics} to
-calculate common metrics.  For brevity's sake, we'll only return accuracy. The
-@{tf.metrics.accuracy} function compares our predictions against the
-true values, that is, against the labels provided by the input function. The
-@{tf.metrics.accuracy} function requires the labels and predictions to have the
-same shape. Here's the call to @{tf.metrics.accuracy}:
-
-``` python
-# Compute evaluation metrics.
-accuracy = tf.metrics.accuracy(labels=labels,
-                               predictions=predicted_classes,
-                               name='acc_op')
-```
-
-The @{tf.estimator.EstimatorSpec$`EstimatorSpec`} returned for evaluation
-typically contains the following information:
-
-* `loss`, which is the model's loss
-* `eval_metric_ops`, which is an optional dictionary of metrics.
-
-So, we'll create a dictionary containing our sole metric. If we had calculated
-other metrics, we would have added them as additional key/value pairs to that
-same dictionary.  Then, we'll pass that dictionary in the `eval_metric_ops`
-argument of `tf.estimator.EstimatorSpec`. Here's the code:
-
-```python
-metrics = {'accuracy': accuracy}
-tf.summary.scalar('accuracy', accuracy[1])
-
-if mode == tf.estimator.ModeKeys.EVAL:
-    return tf.estimator.EstimatorSpec(
-        mode, loss=loss, eval_metric_ops=metrics)
-```
-
-The @{tf.summary.scalar} will make accuracy available to TensorBoard
-in both `TRAIN` and `EVAL` modes. (More on this later).
-
-### Train
-
-When the Estimator's `train` method is called, the `model_fn` is called
-with `mode = ModeKeys.TRAIN`. In this case, the model function must return an
-`EstimatorSpec` that contains the loss and a training operation.
-
-Building the training operation will require an optimizer. We will use
-@{tf.train.AdagradOptimizer} because we're mimicking the `DNNClassifier`, which
-also uses `Adagrad` by default. The `tf.train` package provides many other
-optimizers—feel free to experiment with them.
-
-Here is the code that builds the optimizer:
-
-``` python
-optimizer = tf.train.AdagradOptimizer(learning_rate=0.1)
-```
-
-Next, we build the training operation using the optimizer's
-@{tf.train.Optimizer.minimize$`minimize`} method on the loss we calculated
-earlier.
-
-The `minimize` method also takes a `global_step` parameter. TensorFlow uses this
-parameter to count the number of training steps that have been processed
-(to know when to end a training run). Furthermore, the `global_step` is
-essential for TensorBoard graphs to work correctly. Simply call
-@{tf.train.get_global_step} and pass the result to the `global_step`
-argument of `minimize`.
-
-Here's the code to train the model:
-
-``` python
-train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
-```
-
-The @{tf.estimator.EstimatorSpec$`EstimatorSpec`} returned for training
-must have the following fields set:
-
-* `loss`, which contains the value of the loss function.
-* `train_op`, which executes a training step.
-
-Here's our code to call `EstimatorSpec`:
-
-```python
-return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
-```
-
-The model function is now complete.
-
-## The custom Estimator
-
-Instantiate the custom Estimator through the Estimator base class as follows:
-
-```python
-    # Build 2 hidden layer DNN with 10, 10 units respectively.
-    classifier = tf.estimator.Estimator(
-        model_fn=my_model,
-        params={
-            'feature_columns': my_feature_columns,
-            # Two hidden layers of 10 nodes each.
-            'hidden_units': [10, 10],
-            # The model must choose between 3 classes.
-            'n_classes': 3,
-        })
-```
-Here the `params` dictionary serves the same purpose as the key-word
-arguments of `DNNClassifier`; that is, the `params` dictionary lets you
-configure your Estimator without modifying the code in the `model_fn`.
-
-The rest of the code to train, evaluate, and generate predictions using our
-Estimator is the same as in the
-@{$premade_estimators$Premade Estimators} chapter. For
-example, the following line will train the model:
-
-```python
-# Train the Model.
-classifier.train(
-    input_fn=lambda:iris_data.train_input_fn(train_x, train_y, args.batch_size),
-    steps=args.train_steps)
-```
-
-## TensorBoard
-
-You can view training results for your custom Estimator in TensorBoard. To see
-this reporting, start TensorBoard from your command line as follows:
-
-```bsh
-# Replace PATH with the actual path passed as model_dir
-tensorboard --logdir=PATH
-```
-
-Then, open TensorBoard by browsing to: [http://localhost:6006](http://localhost:6006)
-
-All the pre-made Estimators automatically log a lot of information to
-TensorBoard. With custom Estimators, however, TensorBoard only provides one
-default log (a graph of the loss) plus the information you explicitly tell
-TensorBoard to log. For the custom Estimator you just created, TensorBoard
-generates the following:
-
-<div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
-
-<img style="display:block; margin: 0 auto"
-  alt="Accuracy, 'scalar' graph from tensorboard"
-  src="../images/custom_estimators/accuracy.png">
-
-<img style="display:block; margin: 0 auto"
-  alt="loss 'scalar' graph from tensorboard"
-  src="../images/custom_estimators/loss.png">
-
-<img style="display:block; margin: 0 auto"
-  alt="steps/second 'scalar' graph from tensorboard"
-  src="../images/custom_estimators/steps_per_second.png">
-</div>
-
-<div style="text-align: center">
-TensorBoard displays three graphs.
-</div>
-
-
-In brief, here's what the three graphs tell you:
-
-* global_step/sec: A performance indicator showing how many batches (gradient
-  updates) we processed per second as the model trains.
-
-* loss: The loss reported.
-
-* accuracy: The accuracy is recorded by the following two lines:
-
-    * `eval_metric_ops={'my_accuracy': accuracy}`, during evaluation.
-    * `tf.summary.scalar('accuracy', accuracy[1])`, during training.
-
-These tensorboard graphs are one of the main reasons it's important to pass a
-`global_step` to your optimizer's `minimize` method. The model can't record
-the x-coordinate for these graphs without it.
-
-Note the following in the `my_accuracy` and `loss` graphs:
-
-* The orange line represents training.
-* The blue dot represents evaluation.
-
-During training, summaries (the orange line) are recorded periodically as
-batches are processed, which is why it becomes a graph spanning x-axis range.
-
-By contrast, evaluation produces only a single point on the graph for each call
-to `evaluate`. This point contains the average over the entire evaluation call.
-This has no width on the graph as it is evaluated entirely from the model state
-at a particular training step (from a single checkpoint).
-
-As suggested in the following figure, you may see and also selectively
-disable/enable the reporting using the controls on the left side.
-
-<div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="display:block; margin: 0 auto"
-  alt="Check-boxes allowing the user to select which runs are shown."
-  src="../images/custom_estimators/select_run.jpg">
-</div>
-<div style="text-align: center">
-Enable or disable reporting.
-</div>
-
-
-## Summary
-
-Although pre-made Estimators can be an effective way to quickly create new
-models, you will often need the additional flexibility that custom Estimators
-provide. Fortunately, pre-made and custom Estimators follow the same
-programming model. The only practical difference is that you must write a model
-function for custom Estimators; everything else is the same.
-
-For more details, be sure to check out:
-
-* The
-  [official TensorFlow implementation of MNIST](https://github.com/tensorflow/models/tree/master/official/mnist),
-  which uses a custom estimator.
-* The TensorFlow
-  [official models repository](https://github.com/tensorflow/models/tree/master/official),
-  which contains more curated examples using custom estimators.
-* This [TensorBoard video](https://youtu.be/eBbEDRsCmv4), which introduces
-  TensorBoard.
-* The @{$low_level_intro$Low Level Introduction}, which demonstrates
-  how to experiment directly with TensorFlow's low level APIs, making debugging
-  easier.
diff --git a/tensorflow/docs_src/programmers_guide/datasets.md b/tensorflow/docs_src/programmers_guide/datasets.md
deleted file mode 100644
index 8b69860a68..0000000000
--- a/tensorflow/docs_src/programmers_guide/datasets.md
+++ /dev/null
@@ -1,823 +0,0 @@
-# Importing Data
-
-The @{tf.data} API enables you to build complex input pipelines from
-simple, reusable pieces. For example, the pipeline for an image model might
-aggregate data from files in a distributed file system, apply random
-perturbations to each image, and merge randomly selected images into a batch
-for training. The pipeline for a text model might involve extracting symbols
-from raw text data, converting them to embedding identifiers with a lookup
-table, and batching together sequences of different lengths. The `tf.data` API
-makes it easy to deal with large amounts of data, different data formats, and
-complicated transformations.
-
-The `tf.data` API introduces two new abstractions to TensorFlow:
-
-* A `tf.data.Dataset` represents a sequence of elements, in which
-  each element contains one or more `Tensor` objects. For example, in an image
-  pipeline, an element might be a single training example, with a pair of
-  tensors representing the image data and a label. There are two distinct
-  ways to create a dataset:
-
-    * Creating a **source** (e.g. `Dataset.from_tensor_slices()`) constructs a
-    dataset from
-    one or more `tf.Tensor` objects.
-
-    * Applying a **transformation** (e.g. `Dataset.batch()`) constructs a dataset
-    from one or more `tf.data.Dataset` objects.
-
-* A `tf.data.Iterator` provides the main way to extract elements from a
-  dataset. The operation returned by `Iterator.get_next()` yields the next
-  element of a `Dataset` when executed, and typically acts as the interface
-  between input pipeline code and your model. The simplest iterator is a
-  "one-shot iterator", which is associated with a particular `Dataset` and
-  iterates through it once. For more sophisticated uses, the
-  `Iterator.initializer` operation enables you to reinitialize and parameterize
-  an iterator with different datasets, so that you can, for example, iterate
-  over training and validation data multiple times in the same program.
-
-## Basic mechanics
-
-This section of the guide describes the fundamentals of creating different kinds
-of `Dataset` and `Iterator` objects, and how to extract data from them.
-
-To start an input pipeline, you must define a *source*. For example, to
-construct a `Dataset` from some tensors in memory, you can use
-`tf.data.Dataset.from_tensors()` or
-`tf.data.Dataset.from_tensor_slices()`. Alternatively, if your input
-data are on disk in the recommended TFRecord format, you can construct a
-`tf.data.TFRecordDataset`.
-
-Once you have a `Dataset` object, you can *transform* it into a new `Dataset` by
-chaining method calls on the `tf.data.Dataset` object. For example, you
-can apply per-element transformations such as `Dataset.map()` (to apply a
-function to each element), and multi-element transformations such as
-`Dataset.batch()`. See the documentation for @{tf.data.Dataset}
-for a complete list of transformations.
-
-The most common way to consume values from a `Dataset` is to make an
-**iterator** object that provides access to one element of the dataset at a time
-(for example, by calling `Dataset.make_one_shot_iterator()`). A
-`tf.data.Iterator` provides two operations: `Iterator.initializer`,
-which enables you to (re)initialize the iterator's state; and
-`Iterator.get_next()`, which returns `tf.Tensor` objects that correspond to the
-symbolic next element. Depending on your use case, you might choose a different
-type of iterator, and the options are outlined below.
-
-### Dataset structure
-
-A dataset comprises elements that each have the same structure. An element
-contains one or more `tf.Tensor` objects, called *components*. Each component
-has a `tf.DType` representing the type of elements in the tensor, and a
-`tf.TensorShape` representing the (possibly partially specified) static shape of
-each element. The `Dataset.output_types` and `Dataset.output_shapes` properties
-allow you to inspect the inferred types and shapes of each component of a
-dataset element. The *nested structure* of these properties map to the structure
-of an element, which may be a single tensor, a tuple of tensors, or a nested
-tuple of tensors. For example:
-
-```python
-dataset1 = tf.data.Dataset.from_tensor_slices(tf.random_uniform([4, 10]))
-print(dataset1.output_types)  # ==> "tf.float32"
-print(dataset1.output_shapes)  # ==> "(10,)"
-
-dataset2 = tf.data.Dataset.from_tensor_slices(
-   (tf.random_uniform([4]),
-    tf.random_uniform([4, 100], maxval=100, dtype=tf.int32)))
-print(dataset2.output_types)  # ==> "(tf.float32, tf.int32)"
-print(dataset2.output_shapes)  # ==> "((), (100,))"
-
-dataset3 = tf.data.Dataset.zip((dataset1, dataset2))
-print(dataset3.output_types)  # ==> (tf.float32, (tf.float32, tf.int32))
-print(dataset3.output_shapes)  # ==> "(10, ((), (100,)))"
-```
-
-It is often convenient to give names to each component of an element, for
-example if they represent different features of a training example. In addition
-to tuples, you can use `collections.namedtuple` or a dictionary mapping strings
-to tensors to represent a single element of a `Dataset`.
-
-```python
-dataset = tf.data.Dataset.from_tensor_slices(
-   {"a": tf.random_uniform([4]),
-    "b": tf.random_uniform([4, 100], maxval=100, dtype=tf.int32)})
-print(dataset.output_types)  # ==> "{'a': tf.float32, 'b': tf.int32}"
-print(dataset.output_shapes)  # ==> "{'a': (), 'b': (100,)}"
-```
-
-The `Dataset` transformations support datasets of any structure. When using the
-`Dataset.map()`, `Dataset.flat_map()`, and `Dataset.filter()` transformations,
-which apply a function to each element, the element structure determines the
-arguments of the function:
-
-```python
-dataset1 = dataset1.map(lambda x: ...)
-
-dataset2 = dataset2.flat_map(lambda x, y: ...)
-
-# Note: Argument destructuring is not available in Python 3.
-dataset3 = dataset3.filter(lambda x, (y, z): ...)
-```
-
-### Creating an iterator
-
-Once you have built a `Dataset` to represent your input data, the next step is to
-create an `Iterator` to access elements from that dataset.  The `tf.data` API
-currently supports the following iterators, in increasing level of
-sophistication:
-
-* **one-shot**,
-* **initializable**,
-* **reinitializable**, and
-* **feedable**.
-
-A **one-shot** iterator is the simplest form of iterator, which only supports
-iterating once through a dataset, with no need for explicit initialization.
-One-shot iterators handle almost all of the cases that the existing queue-based
-input pipelines support, but they do not support parameterization. Using the
-example of `Dataset.range()`:
-
-```python
-dataset = tf.data.Dataset.range(100)
-iterator = dataset.make_one_shot_iterator()
-next_element = iterator.get_next()
-
-for i in range(100):
-  value = sess.run(next_element)
-  assert i == value
-```
-
-Note: Currently, one-shot iterators are the only type that is easily usable
-with an `Estimator`.
-
-An **initializable** iterator requires you to run an explicit
-`iterator.initializer` operation before using it. In exchange for this
-inconvenience, it enables you to *parameterize* the definition of the dataset,
-using one or more `tf.placeholder()` tensors that can be fed when you
-initialize the iterator. Continuing the `Dataset.range()` example:
-
-```python
-max_value = tf.placeholder(tf.int64, shape=[])
-dataset = tf.data.Dataset.range(max_value)
-iterator = dataset.make_initializable_iterator()
-next_element = iterator.get_next()
-
-# Initialize an iterator over a dataset with 10 elements.
-sess.run(iterator.initializer, feed_dict={max_value: 10})
-for i in range(10):
-  value = sess.run(next_element)
-  assert i == value
-
-# Initialize the same iterator over a dataset with 100 elements.
-sess.run(iterator.initializer, feed_dict={max_value: 100})
-for i in range(100):
-  value = sess.run(next_element)
-  assert i == value
-```
-
-A **reinitializable** iterator can be initialized from multiple different
-`Dataset` objects. For example, you might have a training input pipeline that
-uses random perturbations to the input images to improve generalization, and
-a validation input pipeline that evaluates predictions on unmodified data. These
-pipelines will typically use different `Dataset` objects that have the same
-structure (i.e. the same types and compatible shapes for each component).
-
-```python
-# Define training and validation datasets with the same structure.
-training_dataset = tf.data.Dataset.range(100).map(
-    lambda x: x + tf.random_uniform([], -10, 10, tf.int64))
-validation_dataset = tf.data.Dataset.range(50)
-
-# A reinitializable iterator is defined by its structure. We could use the
-# `output_types` and `output_shapes` properties of either `training_dataset`
-# or `validation_dataset` here, because they are compatible.
-iterator = tf.data.Iterator.from_structure(training_dataset.output_types,
-                                           training_dataset.output_shapes)
-next_element = iterator.get_next()
-
-training_init_op = iterator.make_initializer(training_dataset)
-validation_init_op = iterator.make_initializer(validation_dataset)
-
-# Run 20 epochs in which the training dataset is traversed, followed by the
-# validation dataset.
-for _ in range(20):
-  # Initialize an iterator over the training dataset.
-  sess.run(training_init_op)
-  for _ in range(100):
-    sess.run(next_element)
-
-  # Initialize an iterator over the validation dataset.
-  sess.run(validation_init_op)
-  for _ in range(50):
-    sess.run(next_element)
-```
-
-A **feedable** iterator can be used together with @{tf.placeholder} to select
-what `Iterator` to use in each call to @{tf.Session.run}, via the familiar
-`feed_dict` mechanism. It offers the same functionality as a reinitializable
-iterator, but it does not require you to initialize the iterator from the start
-of a dataset when you switch between iterators. For example, using the same
-training and validation example from above, you can use
-@{tf.data.Iterator.from_string_handle} to define a feedable iterator
-that allows you to switch between the two datasets:
-
-```python
-# Define training and validation datasets with the same structure.
-training_dataset = tf.data.Dataset.range(100).map(
-    lambda x: x + tf.random_uniform([], -10, 10, tf.int64)).repeat()
-validation_dataset = tf.data.Dataset.range(50)
-
-# A feedable iterator is defined by a handle placeholder and its structure. We
-# could use the `output_types` and `output_shapes` properties of either
-# `training_dataset` or `validation_dataset` here, because they have
-# identical structure.
-handle = tf.placeholder(tf.string, shape=[])
-iterator = tf.data.Iterator.from_string_handle(
-    handle, training_dataset.output_types, training_dataset.output_shapes)
-next_element = iterator.get_next()
-
-# You can use feedable iterators with a variety of different kinds of iterator
-# (such as one-shot and initializable iterators).
-training_iterator = training_dataset.make_one_shot_iterator()
-validation_iterator = validation_dataset.make_initializable_iterator()
-
-# The `Iterator.string_handle()` method returns a tensor that can be evaluated
-# and used to feed the `handle` placeholder.
-training_handle = sess.run(training_iterator.string_handle())
-validation_handle = sess.run(validation_iterator.string_handle())
-
-# Loop forever, alternating between training and validation.
-while True:
-  # Run 200 steps using the training dataset. Note that the training dataset is
-  # infinite, and we resume from where we left off in the previous `while` loop
-  # iteration.
-  for _ in range(200):
-    sess.run(next_element, feed_dict={handle: training_handle})
-
-  # Run one pass over the validation dataset.
-  sess.run(validation_iterator.initializer)
-  for _ in range(50):
-    sess.run(next_element, feed_dict={handle: validation_handle})
-```
-
-### Consuming values from an iterator
-
-The `Iterator.get_next()` method returns one or more `tf.Tensor` objects that
-correspond to the symbolic next element of an iterator. Each time these tensors
-are evaluated, they take the value of the next element in the underlying
-dataset. (Note that, like other stateful objects in TensorFlow, calling
-`Iterator.get_next()` does not immediately advance the iterator. Instead you
-must use the returned `tf.Tensor` objects in a TensorFlow expression, and pass
-the result of that expression to `tf.Session.run()` to get the next elements and
-advance the iterator.)
-
-If the iterator reaches the end of the dataset, executing
-the `Iterator.get_next()` operation will raise a `tf.errors.OutOfRangeError`.
-After this point the iterator will be in an unusable state, and you must
-initialize it again if you want to use it further.
-
-```python
-dataset = tf.data.Dataset.range(5)
-iterator = dataset.make_initializable_iterator()
-next_element = iterator.get_next()
-
-# Typically `result` will be the output of a model, or an optimizer's
-# training operation.
-result = tf.add(next_element, next_element)
-
-sess.run(iterator.initializer)
-print(sess.run(result))  # ==> "0"
-print(sess.run(result))  # ==> "2"
-print(sess.run(result))  # ==> "4"
-print(sess.run(result))  # ==> "6"
-print(sess.run(result))  # ==> "8"
-try:
-  sess.run(result)
-except tf.errors.OutOfRangeError:
-  print("End of dataset")  # ==> "End of dataset"
-```
-
-A common pattern is to wrap the "training loop" in a `try`-`except` block:
-
-```python
-sess.run(iterator.initializer)
-while True:
-  try:
-    sess.run(result)
-  except tf.errors.OutOfRangeError:
-    break
-```
-
-If each element of the dataset has a nested structure, the return value of
-`Iterator.get_next()` will be one or more `tf.Tensor` objects in the same
-nested structure:
-
-```python
-dataset1 = tf.data.Dataset.from_tensor_slices(tf.random_uniform([4, 10]))
-dataset2 = tf.data.Dataset.from_tensor_slices((tf.random_uniform([4]), tf.random_uniform([4, 100])))
-dataset3 = tf.data.Dataset.zip((dataset1, dataset2))
-
-iterator = dataset3.make_initializable_iterator()
-
-sess.run(iterator.initializer)
-next1, (next2, next3) = iterator.get_next()
-```
-
-Note that `next1`, `next2`, and `next3` are tensors produced by the
-same op/node (created by `Iterator.get_next()`). Therefore,  evaluating *any* of
-these tensors will advance the iterator for all components. A typical consumer
-of an iterator will include all components in a single expression.
-
-### Saving iterator state
-
-The @{tf.contrib.data.make_saveable_from_iterator} function creates a
-`SaveableObject` from an iterator, which can be used to save and
-restore the current state of the iterator (and, effectively, the whole input
-pipeline). A saveable object thus created can be added to @{tf.train.Saver}
-variables list or the `tf.GraphKeys.SAVEABLE_OBJECTS` collection for saving and
-restoring in the same manner as a @{tf.Variable}. Refer to
-@{$saved_model$Saving and Restoring} for details on how to save and restore
-variables.
-
-```python
-# Create saveable object from iterator.
-saveable = tf.contrib.data.make_saveable_from_iterator(iterator)
-
-# Save the iterator state by adding it to the saveable objects collection.
-tf.add_to_collection(tf.GraphKeys.SAVEABLE_OBJECTS, saveable)
-saver = tf.train.Saver()
-
-with tf.Session() as sess:
-
-  if should_checkpoint:
-    saver.save(path_to_checkpoint)
-
-# Restore the iterator state.
-with tf.Session() as sess:
-  saver.restore(sess, path_to_checkpoint)
-```
-
-## Reading input data
-
-### Consuming NumPy arrays
-
-If all of your input data fit in memory, the simplest way to create a `Dataset`
-from them is to convert them to `tf.Tensor` objects and use
-`Dataset.from_tensor_slices()`.
-
-```python
-# Load the training data into two NumPy arrays, for example using `np.load()`.
-with np.load("/var/data/training_data.npy") as data:
-  features = data["features"]
-  labels = data["labels"]
-
-# Assume that each row of `features` corresponds to the same row as `labels`.
-assert features.shape[0] == labels.shape[0]
-
-dataset = tf.data.Dataset.from_tensor_slices((features, labels))
-```
-
-Note that the above code snippet will embed the `features` and `labels` arrays
-in your TensorFlow graph as `tf.constant()` operations. This works well for a
-small dataset, but wastes memory---because the contents of the array will be
-copied multiple times---and can run into the 2GB limit for the `tf.GraphDef`
-protocol buffer.
-
-As an alternative, you can define the `Dataset` in terms of `tf.placeholder()`
-tensors, and *feed* the NumPy arrays when you initialize an `Iterator` over the
-dataset.
-
-```python
-# Load the training data into two NumPy arrays, for example using `np.load()`.
-with np.load("/var/data/training_data.npy") as data:
-  features = data["features"]
-  labels = data["labels"]
-
-# Assume that each row of `features` corresponds to the same row as `labels`.
-assert features.shape[0] == labels.shape[0]
-
-features_placeholder = tf.placeholder(features.dtype, features.shape)
-labels_placeholder = tf.placeholder(labels.dtype, labels.shape)
-
-dataset = tf.data.Dataset.from_tensor_slices((features_placeholder, labels_placeholder))
-# [Other transformations on `dataset`...]
-dataset = ...
-iterator = dataset.make_initializable_iterator()
-
-sess.run(iterator.initializer, feed_dict={features_placeholder: features,
-                                          labels_placeholder: labels})
-```
-
-### Consuming TFRecord data
-
-The `tf.data` API supports a variety of file formats so that you can process
-large datasets that do not fit in memory. For example, the TFRecord file format
-is a simple record-oriented binary format that many TensorFlow applications use
-for training data. The `tf.data.TFRecordDataset` class enables you to
-stream over the contents of one or more TFRecord files as part of an input
-pipeline.
-
-```python
-# Creates a dataset that reads all of the examples from two files.
-filenames = ["/var/data/file1.tfrecord", "/var/data/file2.tfrecord"]
-dataset = tf.data.TFRecordDataset(filenames)
-```
-
-The `filenames` argument to the `TFRecordDataset` initializer can either be a
-string, a list of strings, or a `tf.Tensor` of strings. Therefore if you have
-two sets of files for training and validation purposes, you can use a
-`tf.placeholder(tf.string)` to represent the filenames, and initialize an
-iterator from the appropriate filenames:
-
-```python
-filenames = tf.placeholder(tf.string, shape=[None])
-dataset = tf.data.TFRecordDataset(filenames)
-dataset = dataset.map(...)  # Parse the record into tensors.
-dataset = dataset.repeat()  # Repeat the input indefinitely.
-dataset = dataset.batch(32)
-iterator = dataset.make_initializable_iterator()
-
-# You can feed the initializer with the appropriate filenames for the current
-# phase of execution, e.g. training vs. validation.
-
-# Initialize `iterator` with training data.
-training_filenames = ["/var/data/file1.tfrecord", "/var/data/file2.tfrecord"]
-sess.run(iterator.initializer, feed_dict={filenames: training_filenames})
-
-# Initialize `iterator` with validation data.
-validation_filenames = ["/var/data/validation1.tfrecord", ...]
-sess.run(iterator.initializer, feed_dict={filenames: validation_filenames})
-```
-
-### Consuming text data
-
-Many datasets are distributed as one or more text files. The
-`tf.data.TextLineDataset` provides an easy way to extract lines from
-one or more text files. Given one or more filenames, a `TextLineDataset` will
-produce one string-valued element per line of those files. Like a
-`TFRecordDataset`, `TextLineDataset` accepts `filenames` as a `tf.Tensor`, so
-you can parameterize it by passing a `tf.placeholder(tf.string)`.
-
-```python
-filenames = ["/var/data/file1.txt", "/var/data/file2.txt"]
-dataset = tf.data.TextLineDataset(filenames)
-```
-
-By default, a `TextLineDataset` yields *every* line of each file, which may
-not be desirable, for example if the file starts with a header line, or contains
-comments. These lines can be removed using the `Dataset.skip()` and
-`Dataset.filter()` transformations. To apply these transformations to each
-file separately, we use `Dataset.flat_map()` to create a nested `Dataset` for
-each file.
-
-```python
-filenames = ["/var/data/file1.txt", "/var/data/file2.txt"]
-
-dataset = tf.data.Dataset.from_tensor_slices(filenames)
-
-# Use `Dataset.flat_map()` to transform each file as a separate nested dataset,
-# and then concatenate their contents sequentially into a single "flat" dataset.
-# * Skip the first line (header row).
-# * Filter out lines beginning with "#" (comments).
-dataset = dataset.flat_map(
-    lambda filename: (
-        tf.data.TextLineDataset(filename)
-        .skip(1)
-        .filter(lambda line: tf.not_equal(tf.substr(line, 0, 1), "#"))))
-```
-
-### Consuming CSV data
-
-The CSV file format is a popular format for storing tabular data in plain text.
-The @{tf.contrib.data.CsvDataset} class provides a way to extract records from
-one or more CSV files that comply with [RFC 4180](https://tools.ietf.org/html/rfc4180).
-Given one or more filenames and a list of defaults, a `CsvDataset` will produce
-a tuple of elements whose types correspond to the types of the defaults
-provided, per CSV record. Like `TFRecordDataset` and `TextLineDataset`,
-`CsvDataset` accepts `filenames` as a `tf.Tensor`, so you can parameterize it
-by passing a  `tf.placeholder(tf.string)`.
-
-```
-# Creates a dataset that reads all of the records from two CSV files, each with
-# eight float columns
-filenames = ["/var/data/file1.csv", "/var/data/file2.csv"]
-record_defaults = [tf.float32] * 8   # Eight required float columns
-dataset = tf.contrib.data.CsvDataset(filenames, record_defaults)
-```
-
-If some columns are empty, you can provide defaults instead of types.
-
-```
-# Creates a dataset that reads all of the records from two CSV files, each with
-# four float columns which may have missing values
-record_defaults = [[0.0]] * 8
-dataset = tf.contrib.data.CsvDataset(filenames, record_defaults)
-```
-
-By default, a `CsvDataset` yields *every* column of *every* line of the file,
-which may not be desirable, for example if the file starts with a header line
-that should be ignored, or if some columns are not required in the input.
-These lines and fields can be removed with the `header` and `select_cols`
-arguments respectively.
-
-```
-# Creates a dataset that reads all of the records from two CSV files with
-# headers, extracting float data from columns 2 and 4.
-record_defaults = [[0.0]] * 2  # Only provide defaults for the selected columns
-dataset = tf.contrib.data.CsvDataset(filenames, record_defaults, header=True, select_cols=[2,4])
-```
-<!--
-TODO(mrry): Add these sections.
-
-### Consuming from a Python generator
--->
-
-## Preprocessing data with `Dataset.map()`
-
-The `Dataset.map(f)` transformation produces a new dataset by applying a given
-function `f` to each element of the input dataset. It is based on
-the
-[`map()` function](https://en.wikipedia.org/wiki/Map_(higher-order_function))
-that is commonly applied to lists (and other structures) in functional
-programming languages.  The function `f` takes the `tf.Tensor` objects that
-represent a single element in the input, and returns the `tf.Tensor` objects
-that will represent a single element in the new dataset. Its implementation uses
-standard TensorFlow operations to transform one element into another.
-
-This section covers common examples of how to use `Dataset.map()`.
-
-### Parsing `tf.Example` protocol buffer messages
-
-Many input pipelines extract `tf.train.Example` protocol buffer messages from a
-TFRecord-format file (written, for example, using
-`tf.python_io.TFRecordWriter`). Each `tf.train.Example` record contains one or
-more "features", and the input pipeline typically converts these features into
-tensors.
-
-```python
-# Transforms a scalar string `example_proto` into a pair of a scalar string and
-# a scalar integer, representing an image and its label, respectively.
-def _parse_function(example_proto):
-  features = {"image": tf.FixedLenFeature((), tf.string, default_value=""),
-              "label": tf.FixedLenFeature((), tf.int32, default_value=0)}
-  parsed_features = tf.parse_single_example(example_proto, features)
-  return parsed_features["image"], parsed_features["label"]
-
-# Creates a dataset that reads all of the examples from two files, and extracts
-# the image and label features.
-filenames = ["/var/data/file1.tfrecord", "/var/data/file2.tfrecord"]
-dataset = tf.data.TFRecordDataset(filenames)
-dataset = dataset.map(_parse_function)
-```
-
-### Decoding image data and resizing it
-
-When training a neural network on real-world image data, it is often necessary
-to convert images of different sizes to a common size, so that they may be
-batched into a fixed size.
-
-```python
-# Reads an image from a file, decodes it into a dense tensor, and resizes it
-# to a fixed shape.
-def _parse_function(filename, label):
-  image_string = tf.read_file(filename)
-  image_decoded = tf.image.decode_jpeg(image_string)
-  image_resized = tf.image.resize_images(image_decoded, [28, 28])
-  return image_resized, label
-
-# A vector of filenames.
-filenames = tf.constant(["/var/data/image1.jpg", "/var/data/image2.jpg", ...])
-
-# `labels[i]` is the label for the image in `filenames[i].
-labels = tf.constant([0, 37, ...])
-
-dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))
-dataset = dataset.map(_parse_function)
-```
-
-### Applying arbitrary Python logic with `tf.py_func()`
-
-For performance reasons, we encourage you to use TensorFlow operations for
-preprocessing your data whenever possible. However, it is sometimes useful to
-call upon external Python libraries when parsing your input data. To do so,
-invoke, the `tf.py_func()` operation in a `Dataset.map()` transformation.
-
-```python
-import cv2
-
-# Use a custom OpenCV function to read the image, instead of the standard
-# TensorFlow `tf.read_file()` operation.
-def _read_py_function(filename, label):
-  image_decoded = cv2.imread(filename.decode(), cv2.IMREAD_GRAYSCALE)
-  return image_decoded, label
-
-# Use standard TensorFlow operations to resize the image to a fixed shape.
-def _resize_function(image_decoded, label):
-  image_decoded.set_shape([None, None, None])
-  image_resized = tf.image.resize_images(image_decoded, [28, 28])
-  return image_resized, label
-
-filenames = ["/var/data/image1.jpg", "/var/data/image2.jpg", ...]
-labels = [0, 37, 29, 1, ...]
-
-dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))
-dataset = dataset.map(
-    lambda filename, label: tuple(tf.py_func(
-        _read_py_function, [filename, label], [tf.uint8, label.dtype])))
-dataset = dataset.map(_resize_function)
-```
-
-<!--
-TODO(mrry): Add this section.
-
-### Handling text data with unusual sizes
--->
-
-## Batching dataset elements
-
-### Simple batching
-
-The simplest form of batching stacks `n` consecutive elements of a dataset into
-a single element. The `Dataset.batch()` transformation does exactly this, with
-the same constraints as the `tf.stack()` operator, applied to each component
-of the elements: i.e. for each component *i*, all elements must have a tensor
-of the exact same shape.
-
-```python
-inc_dataset = tf.data.Dataset.range(100)
-dec_dataset = tf.data.Dataset.range(0, -100, -1)
-dataset = tf.data.Dataset.zip((inc_dataset, dec_dataset))
-batched_dataset = dataset.batch(4)
-
-iterator = batched_dataset.make_one_shot_iterator()
-next_element = iterator.get_next()
-
-print(sess.run(next_element))  # ==> ([0, 1, 2,   3],   [ 0, -1,  -2,  -3])
-print(sess.run(next_element))  # ==> ([4, 5, 6,   7],   [-4, -5,  -6,  -7])
-print(sess.run(next_element))  # ==> ([8, 9, 10, 11],   [-8, -9, -10, -11])
-```
-
-### Batching tensors with padding
-
-The above recipe works for tensors that all have the same size. However, many
-models (e.g. sequence models) work with input data that can have varying size
-(e.g. sequences of different lengths). To handle this case, the
-`Dataset.padded_batch()` transformation enables you to batch tensors of
-different shape by specifying one or more dimensions in which they may be
-padded.
-
-```python
-dataset = tf.data.Dataset.range(100)
-dataset = dataset.map(lambda x: tf.fill([tf.cast(x, tf.int32)], x))
-dataset = dataset.padded_batch(4, padded_shapes=[None])
-
-iterator = dataset.make_one_shot_iterator()
-next_element = iterator.get_next()
-
-print(sess.run(next_element))  # ==> [[0, 0, 0], [1, 0, 0], [2, 2, 0], [3, 3, 3]]
-print(sess.run(next_element))  # ==> [[4, 4, 4, 4, 0, 0, 0],
-                               #      [5, 5, 5, 5, 5, 0, 0],
-                               #      [6, 6, 6, 6, 6, 6, 0],
-                               #      [7, 7, 7, 7, 7, 7, 7]]
-```
-
-The `Dataset.padded_batch()` transformation allows you to set different padding
-for each dimension of each component, and it may be variable-length (signified
-by `None` in the example above) or constant-length. It is also possible to
-override the padding value, which defaults to 0.
-
-<!--
-TODO(mrry): Add this section.
-
-### Dense ragged -> tf.SparseTensor
--->
-
-## Training workflows
-
-### Processing multiple epochs
-
-The `tf.data` API offers two main ways to process multiple epochs of the same
-data.
-
-The simplest way to iterate over a dataset in multiple epochs is to use the
-`Dataset.repeat()` transformation. For example, to create a dataset that repeats
-its input for 10 epochs:
-
-```python
-filenames = ["/var/data/file1.tfrecord", "/var/data/file2.tfrecord"]
-dataset = tf.data.TFRecordDataset(filenames)
-dataset = dataset.map(...)
-dataset = dataset.repeat(10)
-dataset = dataset.batch(32)
-```
-
-Applying the `Dataset.repeat()` transformation with no arguments will repeat
-the input indefinitely. The `Dataset.repeat()` transformation concatenates its
-arguments without signaling the end of one epoch and the beginning of the next
-epoch.
-
-If you want to receive a signal at the end of each epoch, you can write a
-training loop that catches the `tf.errors.OutOfRangeError` at the end of a
-dataset. At that point you might collect some statistics (e.g. the validation
-error) for the epoch.
-
-```python
-filenames = ["/var/data/file1.tfrecord", "/var/data/file2.tfrecord"]
-dataset = tf.data.TFRecordDataset(filenames)
-dataset = dataset.map(...)
-dataset = dataset.batch(32)
-iterator = dataset.make_initializable_iterator()
-next_element = iterator.get_next()
-
-# Compute for 100 epochs.
-for _ in range(100):
-  sess.run(iterator.initializer)
-  while True:
-    try:
-      sess.run(next_element)
-    except tf.errors.OutOfRangeError:
-      break
-
-  # [Perform end-of-epoch calculations here.]
-```
-
-### Randomly shuffling input data
-
-The `Dataset.shuffle()` transformation randomly shuffles the input dataset
-using a similar algorithm to `tf.RandomShuffleQueue`: it maintains a fixed-size
-buffer and chooses the next element uniformly at random from that buffer.
-
-```python
-filenames = ["/var/data/file1.tfrecord", "/var/data/file2.tfrecord"]
-dataset = tf.data.TFRecordDataset(filenames)
-dataset = dataset.map(...)
-dataset = dataset.shuffle(buffer_size=10000)
-dataset = dataset.batch(32)
-dataset = dataset.repeat()
-```
-
-### Using high-level APIs
-
-The @{tf.train.MonitoredTrainingSession} API simplifies many aspects of running
-TensorFlow in a distributed setting. `MonitoredTrainingSession` uses the
-@{tf.errors.OutOfRangeError} to signal that training has completed, so to use it
-with the `tf.data` API, we recommend using
-`Dataset.make_one_shot_iterator()`. For example:
-
-```python
-filenames = ["/var/data/file1.tfrecord", "/var/data/file2.tfrecord"]
-dataset = tf.data.TFRecordDataset(filenames)
-dataset = dataset.map(...)
-dataset = dataset.shuffle(buffer_size=10000)
-dataset = dataset.batch(32)
-dataset = dataset.repeat(num_epochs)
-iterator = dataset.make_one_shot_iterator()
-
-next_example, next_label = iterator.get_next()
-loss = model_function(next_example, next_label)
-
-training_op = tf.train.AdagradOptimizer(...).minimize(loss)
-
-with tf.train.MonitoredTrainingSession(...) as sess:
-  while not sess.should_stop():
-    sess.run(training_op)
-```
-
-To use a `Dataset` in the `input_fn` of a @{tf.estimator.Estimator}, we also
-recommend using `Dataset.make_one_shot_iterator()`. For example:
-
-```python
-def dataset_input_fn():
-  filenames = ["/var/data/file1.tfrecord", "/var/data/file2.tfrecord"]
-  dataset = tf.data.TFRecordDataset(filenames)
-
-  # Use `tf.parse_single_example()` to extract data from a `tf.Example`
-  # protocol buffer, and perform any additional per-record preprocessing.
-  def parser(record):
-    keys_to_features = {
-        "image_data": tf.FixedLenFeature((), tf.string, default_value=""),
-        "date_time": tf.FixedLenFeature((), tf.int64, default_value=""),
-        "label": tf.FixedLenFeature((), tf.int64,
-                                    default_value=tf.zeros([], dtype=tf.int64)),
-    }
-    parsed = tf.parse_single_example(record, keys_to_features)
-
-    # Perform additional preprocessing on the parsed data.
-    image = tf.image.decode_jpeg(parsed["image_data"])
-    image = tf.reshape(image, [299, 299, 1])
-    label = tf.cast(parsed["label"], tf.int32)
-
-    return {"image_data": image, "date_time": parsed["date_time"]}, label
-
-  # Use `Dataset.map()` to build a pair of a feature dictionary and a label
-  # tensor for each example.
-  dataset = dataset.map(parser)
-  dataset = dataset.shuffle(buffer_size=10000)
-  dataset = dataset.batch(32)
-  dataset = dataset.repeat(num_epochs)
-  iterator = dataset.make_one_shot_iterator()
-
-  # `features` is a dictionary in which each value is a batch of values for
-  # that feature; `labels` is a batch of labels.
-  features, labels = iterator.get_next()
-  return features, labels
-```
diff --git a/tensorflow/docs_src/programmers_guide/datasets_for_estimators.md b/tensorflow/docs_src/programmers_guide/datasets_for_estimators.md
deleted file mode 100644
index 345a31b985..0000000000
--- a/tensorflow/docs_src/programmers_guide/datasets_for_estimators.md
+++ /dev/null
@@ -1,387 +0,0 @@
-# Datasets for Estimators
-
-The @{tf.data} module contains a collection of classes that allows you to
-easily load data, manipulate it, and pipe it into your model. This document
-introduces the API by walking through two simple examples:
-
-* Reading in-memory data from numpy arrays.
-* Reading lines from a csv file.
-
-<!-- TODO(markdaoust): Add links to an example reading from multiple-files
-(image_retraining), and a from_generator example. -->
-
-## Basic input
-
-Taking slices from an array is the simplest way to get started with `tf.data`.
-
-The @{$premade_estimators$Premade Estimators} chapter describes
-the following `train_input_fn`, from
-[`iris_data.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/iris_data.py),
-to pipe the data into the Estimator:
-
-``` python
-def train_input_fn(features, labels, batch_size):
-    """An input function for training"""
-    # Convert the inputs to a Dataset.
-    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
-
-    # Shuffle, repeat, and batch the examples.
-    dataset = dataset.shuffle(1000).repeat().batch(batch_size)
-
-    # Return the dataset.
-    return dataset
-```
-
-Let's look at this more closely.
-
-### Arguments
-
-This function expects three arguments. Arguments expecting an "array" can
-accept nearly anything that can be converted to an array with `numpy.array`.
-One exception is
-[`tuple`](https://docs.python.org/3/tutorial/datastructures.html#tuples-and-sequences)
-which, as we will see, has special meaning for `Datasets`.
-
-* `features`: A `{'feature_name':array}` dictionary (or
-  [`DataFrame`](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html))
-  containing the raw input features.
-* `labels` : An array containing the
-  [label](https://developers.google.com/machine-learning/glossary/#label)
-  for each example.
-* `batch_size` : An integer indicating the desired batch size.
-
-In [`premade_estimator.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/premade_estimator.py)
-we retrieved the Iris data using the `iris_data.load_data()` function.
-You can run it, and unpack the results as follows:
-
-``` python
-import iris_data
-
-# Fetch the data
-train, test = iris_data.load_data()
-features, labels = train
-```
-
-Then we passed this data to the input function, with a line similar to this:
-
-``` python
-batch_size=100
-iris_data.train_input_fn(features, labels, batch_size)
-```
-
-Let's walk through the `train_input_fn()`.
-
-### Slices
-
-The function starts by using the @{tf.data.Dataset.from_tensor_slices} function
-to create a @{tf.data.Dataset} representing slices of the array. The array is
-sliced across the first dimension. For example, an array containing the
-@{$tutorials/layers$mnist training data} has a shape of `(60000, 28, 28)`.
-Passing this to `from_tensor_slices` returns a `Dataset` object containing
-60000 slices, each one a 28x28 image.
-
-The code that returns this `Dataset` is as follows:
-
-``` python
-train, test = tf.keras.datasets.mnist.load_data()
-mnist_x, mnist_y = train
-
-mnist_ds = tf.data.Dataset.from_tensor_slices(mnist_x)
-print(mnist_ds)
-```
-
-This will print the following line, showing the
-@{$programmers_guide/tensors#shapes$shapes} and
-@{$programmers_guide/tensors#data_types$types} of the items in
-the dataset. Note that a `Dataset` does not know how many items it contains.
-
-``` None
-<TensorSliceDataset shapes: (28,28), types: tf.uint8>
-```
-
-The `Dataset` above represents a simple collection of arrays, but datasets are
-much more powerful than this. A `Dataset` can transparently handle any nested
-combination of dictionaries or tuples (or
-[`namedtuple`](https://docs.python.org/2/library/collections.html#collections.namedtuple)
-).
-
-For example after converting the iris `features`
-to a standard python dictionary, you can then convert the dictionary of arrays
-to a `Dataset` of dictionaries as follows:
-
-``` python
-dataset = tf.data.Dataset.from_tensor_slices(dict(features))
-print(dataset)
-```
-``` None
-<TensorSliceDataset
-
-  shapes: {
-    SepalLength: (), PetalWidth: (),
-    PetalLength: (), SepalWidth: ()},
-
-  types: {
-      SepalLength: tf.float64, PetalWidth: tf.float64,
-      PetalLength: tf.float64, SepalWidth: tf.float64}
->
-```
-
-Here we see that when a `Dataset` contains structured elements, the `shapes`
-and `types` of the `Dataset` take on the same structure. This dataset contains
-dictionaries of @{$programmers_guide/tensors#rank$scalars}, all of type
-`tf.float64`.
-
-The first line of the iris `train_input_fn` uses the same functionality, but
-adds another level of structure. It creates a dataset containing
-`(features_dict, label)` pairs.
-
-The following code shows that the label is a scalar with type `int64`:
-
-``` python
-# Convert the inputs to a Dataset.
-dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
-print(dataset)
-```
-```
-<TensorSliceDataset
-    shapes: (
-        {
-          SepalLength: (), PetalWidth: (),
-          PetalLength: (), SepalWidth: ()},
-        ()),
-
-    types: (
-        {
-          SepalLength: tf.float64, PetalWidth: tf.float64,
-          PetalLength: tf.float64, SepalWidth: tf.float64},
-        tf.int64)>
-```
-
-### Manipulation
-
-Currently the `Dataset` would iterate over the data once, in a fixed order, and
-only produce a single element at a time. It needs further processing before it
-can be used for training. Fortunately, the `tf.data.Dataset` class provides
-methods to better prepare the data for training. The next line of the input
-function takes advantage of several of these methods:
-
-``` python
-# Shuffle, repeat, and batch the examples.
-dataset = dataset.shuffle(1000).repeat().batch(batch_size)
-```
-
-The @{tf.data.Dataset.shuffle$`shuffle`} method uses a fixed-size buffer to
-shuffle the items as they pass through. In this case the `buffer_size` is
-greater than the number of examples in the `Dataset`, ensuring that the data is
-completely shuffled (The Iris data set only contains 150 examples).
-
-The @{tf.data.Dataset.repeat$`repeat`} method restarts the `Dataset` when
-it reaches the end. To limit the number of epochs, set the `count` argument.
-
-The @{tf.data.Dataset.batch$`batch`} method collects a number of examples and
-stacks them, to create batches. This adds a dimension to their shape. The new
-dimension is added as the first dimension. The following code uses
-the `batch` method on the MNIST `Dataset`, from earlier. This results in a
-`Dataset` containing 3D arrays representing stacks of `(28,28)` images:
-
-``` python
-print(mnist_ds.batch(100))
-```
-
-``` none
-<BatchDataset
-  shapes: (?, 28, 28),
-  types: tf.uint8>
-```
-Note that the dataset has an unknown batch size because the last batch will
-have fewer elements.
-
-In `train_input_fn`, after batching the `Dataset` contains 1D vectors of
-elements where each scalar was previously:
-
-```python
-print(dataset)
-```
-```
-<TensorSliceDataset
-    shapes: (
-        {
-          SepalLength: (?,), PetalWidth: (?,),
-          PetalLength: (?,), SepalWidth: (?,)},
-        (?,)),
-
-    types: (
-        {
-          SepalLength: tf.float64, PetalWidth: tf.float64,
-          PetalLength: tf.float64, SepalWidth: tf.float64},
-        tf.int64)>
-```
-
-
-### Return
-
-At this point the `Dataset` contains `(features_dict, labels)` pairs.
-This is the format expected by the `train` and `evaluate` methods, so the
-`input_fn` returns the dataset.
-
-The `labels` can/should be omitted when using the `predict` method.
-
-<!--
-  TODO(markdaoust): link to `input_fn` doc when it exists
--->
-
-
-## Reading a CSV File
-
-The most common real-world use case for the `Dataset` class is to stream data
-from files on disk. The @{tf.data} module includes a variety of
-file readers. Let's see how parsing the Iris dataset from the csv file looks
-using a `Dataset`.
-
-The following call to the `iris_data.maybe_download` function downloads the
-data if necessary, and returns the pathnames of the resulting files:
-
-``` python
-import iris_data
-train_path, test_path = iris_data.maybe_download()
-```
-
-The [`iris_data.csv_input_fn`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/iris_data.py)
-function contains an alternative implementation that parses the csv files using
-a `Dataset`.
-
-Let's look at how to build an Estimator-compatible input function that reads
-from the local files.
-
-### Build the `Dataset`
-
-We start by building a @{tf.data.TextLineDataset$`TextLineDataset`} object to
-read the file one line at a time. Then, we call the
-@{tf.data.Dataset.skip$`skip`} method to skip over the first line of the file, which contains a header, not an example:
-
-``` python
-ds = tf.data.TextLineDataset(train_path).skip(1)
-```
-
-### Build a csv line parser
-
-We will start by building a function to parse a single line.
-
-The following `iris_data.parse_line` function accomplishes this task using the
-@{tf.decode_csv} function, and some simple python code:
-
-We must parse each of the lines in the dataset in order to generate the
-necessary `(features, label)` pairs. The following `_parse_line` function
-calls @{tf.decode_csv} to parse a single line into its features
-and the label. Since Estimators require that features be represented as a
-dictionary, we rely on Python's built-in `dict` and `zip` functions to build
-that dictionary.  The feature names are the keys of that dictionary.
-We then call the dictionary's `pop` method to remove the label field from
-the features dictionary:
-
-``` python
-# Metadata describing the text columns
-COLUMNS = ['SepalLength', 'SepalWidth',
-           'PetalLength', 'PetalWidth',
-           'label']
-FIELD_DEFAULTS = [[0.0], [0.0], [0.0], [0.0], [0]]
-def _parse_line(line):
-    # Decode the line into its fields
-    fields = tf.decode_csv(line, FIELD_DEFAULTS)
-
-    # Pack the result into a dictionary
-    features = dict(zip(COLUMNS,fields))
-
-    # Separate the label from the features
-    label = features.pop('label')
-
-    return features, label
-```
-
-### Parse the lines
-
-Datasets have many methods for manipulating the data while it is being piped
-to a model. The most heavily-used method is @{tf.data.Dataset.map$`map`}, which
-applies a transformation to each element of the `Dataset`.
-
-The `map` method takes a `map_func` argument that describes how each item in the
-`Dataset` should be transformed.
-
-<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src="../images/datasets/map.png">
-</div>
-<div style="text-align: center">
-The @{tf.data.Dataset.map$`map`} method applies the `map_func` to
-transform each item in the <code>Dataset</code>.
-</div>
-
-So to parse the lines as they are streamed out of the csv file, we pass our
-`_parse_line` function to the `map` method:
-
-``` python
-ds = ds.map(_parse_line)
-print(ds)
-```
-``` None
-<MapDataset
-shapes: (
-    {SepalLength: (), PetalWidth: (), ...},
-    ()),
-types: (
-    {SepalLength: tf.float32, PetalWidth: tf.float32, ...},
-    tf.int32)>
-```
-
-Now instead of simple scalar strings, the dataset contains `(features, label)`
-pairs.
-
-the remainder of the `iris_data.csv_input_fn` function is identical
-to `iris_data.train_input_fn` which was covered in the in the
-[Basic input](#basic_input) section.
-
-### Try it out
-
-This function can be used as a replacement for
-`iris_data.train_input_fn`. It can be used to feed an estimator as follows:
-
-``` python
-train_path, test_path = iris_data.maybe_download()
-
-# All the inputs are numeric
-feature_columns = [
-    tf.feature_column.numeric_column(name)
-    for name in iris_data.CSV_COLUMN_NAMES[:-1]]
-
-# Build the estimator
-est = tf.estimator.LinearClassifier(feature_columns,
-                                    n_classes=3)
-# Train the estimator
-batch_size = 100
-est.train(
-    steps=1000,
-    input_fn=lambda : iris_data.csv_input_fn(train_path, batch_size))
-```
-
-Estimators expect an `input_fn` to take no arguments. To work around this
-restriction, we use `lambda` to capture the arguments and provide the expected
-interface.
-
-## Summary
-
-The `tf.data` module provides a collection of classes and functions for easily
-reading data from a variety of sources. Furthermore, `tf.data` has simple
-powerful methods for applying a wide variety of standard and custom
-transformations.
-
-Now you have the basic idea of how to efficiently load data into an
-Estimator. Consider the following documents next:
-
-
-* @{$custom_estimators}, which demonstrates how to build your own
-  custom `Estimator` model.
-* The @{$low_level_intro#datasets$Low Level Introduction}, which demonstrates
-  how to experiment directly with `tf.data.Datasets` using TensorFlow's low
-  level APIs.
-* @{$programmers_guide/datasets} which goes into great detail about additional
-  functionality of `Datasets`.
-
diff --git a/tensorflow/docs_src/programmers_guide/debugger.md b/tensorflow/docs_src/programmers_guide/debugger.md
deleted file mode 100644
index 6bd941886d..0000000000
--- a/tensorflow/docs_src/programmers_guide/debugger.md
+++ /dev/null
@@ -1,804 +0,0 @@
-# TensorFlow Debugger
-
-<!-- [comment]: TODO(barryr): Links to and from sections on "Graphs" & "Monitoring Learning". -->
-
-[TOC]
-
-`tfdbg` is a specialized debugger for TensorFlow. It lets you view the internal
-structure and states of running TensorFlow graphs during training and inference,
-which is difficult to debug with general-purpose debuggers such as Python's `pdb`
-due to TensorFlow's computation-graph paradigm.
-
-This guide focuses on the command-line interface (CLI) of `tfdbg`. For guide on
-how to use the graphical user interface (GUI) of tfdbg, i.e., the
-**TensorBoard Debugger Plugin**, please visit
-[its README](https://github.com/tensorflow/tensorboard/blob/master/tensorboard/plugins/debugger/README.md).
-
-Note: The TensorFlow debugger uses a
-[curses](https://en.wikipedia.org/wiki/Curses_\(programming_library\))-based text
-user interface. On Mac OS X, the `ncurses` library is required and can be
-installed with `brew install homebrew/dupes/ncurses`. On Windows, curses isn't as
-well supported, so a [readline](https://en.wikipedia.org/wiki/GNU_Readline)-based
-interface can be used with tfdbg by installing `pyreadline` with `pip`. If you
-use Anaconda3, you can install it with a command such as
-`"C:\Program Files\Anaconda3\Scripts\pip.exe" install pyreadline`. Unofficial
-Windows curses packages can be downloaded
-[here](https://www.lfd.uci.edu/~gohlke/pythonlibs/#curses), then subsequently
-installed using `pip install <your_version>.whl`, however curses on Windows may
-not work as reliably as curses on Linux or Mac.
-
-This tutorial demonstrates how to use the **tfdbg** CLI to debug the appearance
-of [`nan`s](https://en.wikipedia.org/wiki/NaN)
-and [`inf`s](https://en.wikipedia.org/wiki/Infinity), a frequently-encountered
-type of bug in TensorFlow model development.
-The following example is for users who use the low-level
-[`Session`](https://www.tensorflow.org/api_docs/python/tf/Session) API of
-TensorFlow. A later section of this document describes how to use **tfdbg**
-with a higher-level API, namely `Estimator`s.
-To *observe* such an issue, run the following command without the debugger (the
-source code can be found
-[here](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/debug/examples/debug_mnist.py)):
-
-```none
-python -m tensorflow.python.debug.examples.debug_mnist
-```
-
-This code trains a simple neural network for MNIST digit image recognition.
-Notice that the accuracy increases slightly after the first training step, but
-then gets stuck at a low (near-chance) level:
-
-```none
-Accuracy at step 0: 0.1113
-Accuracy at step 1: 0.3183
-Accuracy at step 2: 0.098
-Accuracy at step 3: 0.098
-Accuracy at step 4: 0.098
-```
-
-Wondering what might have gone wrong, you suspect that certain nodes in the
-training graph generated bad numeric values such as `inf`s and `nan`s, because
-this is a common cause of this type of training failure.
-Let's use tfdbg to debug this issue and pinpoint the exact graph node where this
-numeric problem first surfaced.
-
-## Wrapping TensorFlow Sessions with tfdbg
-
-To add support for tfdbg in our example, all that is needed is to add the
-following lines of code and wrap the Session object with a debugger wrapper.
-This code is already added in
-[debug_mnist.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/debug/examples/debug_mnist.py),
-so you can activate tfdbg CLI with the `--debug` flag at the command line.
-
-```python
-# Let your BUILD target depend on "//tensorflow/python/debug:debug_py"
-# (You don't need to worry about the BUILD dependency if you are using a pip
-#  install of open-source TensorFlow.)
-from tensorflow.python import debug as tf_debug
-
-sess = tf_debug.LocalCLIDebugWrapperSession(sess)
-```
-
-This wrapper has the same interface as Session, so enabling debugging requires
-no other changes to the code. The wrapper provides additional features,
-including:
-
-* Bringing up a CLI before and after `Session.run()` calls, to let you
-control the execution and inspect the graph's internal state.
-* Allowing you to register special `filters` for tensor values, to facilitate
-the diagnosis of issues.
-
-In this example, we have already registered a tensor filter called
-@{tfdbg.has_inf_or_nan},
-which simply determines if there are any `nan` or `inf` values in any
-intermediate tensors (tensors that are neither inputs or outputs of the
-`Session.run()` call, but are in the path leading from the inputs to the
-outputs). This filter is for `nan`s and `inf`s is a common enough use case that
-we ship it with the
-@{$python/tfdbg#Classes_for_debug_dump_data_and_directories$`debug_data`}
-module.
-
-Note: You can also write your own custom filters. See
-the @{tfdbg.DebugDumpDir.find$API documentation}
-of `DebugDumpDir.find()` for additional information.
-
-## Debugging Model Training with tfdbg
-
-
-Let's try training the model again, but with the `--debug` flag added this time:
-
-```none
-python -m tensorflow.python.debug.examples.debug_mnist --debug
-```
-
-The debug wrapper session will prompt you when it is about to execute the first
-`Session.run()` call, with information regarding the fetched tensor and feed
-dictionaries displayed on the screen.
-
-![tfdbg run-start UI](https://www.tensorflow.org/images/tfdbg_screenshot_run_start.png)
-
-This is what we refer to as the *run-start CLI*. It lists the feeds and fetches
-to the current `Session.run` call, before executing anything.
-
-If the screen size is too small to display the content of the message in its
-entirety, you can resize it.
-
-Use the **PageUp** / **PageDown** / **Home** / **End** keys to navigate the
-screen output. On most keyboards lacking those keys **Fn + Up** /
-**Fn + Down** / **Fn + Right** / **Fn + Left** will work.
-
-Enter the `run` command (or just `r`) at the command prompt:
-
-```
-tfdbg> run
-```
-
-The `run` command causes tfdbg to execute until the end of the next
-`Session.run()` call, which calculates the model's accuracy using a test data
-set. tfdbg augments the runtime Graph to dump all intermediate tensors.
-After the run ends, tfdbg displays all the dumped tensors values in the
-*run-end CLI*. For example:
-
-![tfdbg run-end UI: accuracy](https://www.tensorflow.org/images/tfdbg_screenshot_run_end_accuracy.png)
-
-This list of tensors can also be obtained by running the command `lt` after you
-executed `run`.
-
-### tfdbg CLI Frequently-Used Commands
-
-Try the following commands at the `tfdbg>` prompt (referencing the code at
-`tensorflow/python/debug/examples/debug_mnist.py`):
-
-| Command            | Syntax or Option | Explanation  | Example                   |
-|:-------------------|:---------------- |:------------ |:------------------------- |
-| **`lt`** | | **List dumped tensors.** | `lt` |
-| | `-n <name_pattern>` | List dumped tensors with names matching given regular-expression pattern. | `lt -n Softmax.*` |
-| | `-t <op_pattern>` | List dumped tensors with op types matching given regular-expression pattern. | `lt -t MatMul` |
-| | `-f <filter_name>` | List only the tensors that pass a registered tensor filter. | `lt -f has_inf_or_nan` |
-| | `-f <filter_name> -fenn <regex>` | List only the tensors that pass a registered tensor filter, excluding nodes with names matching the regular expression. | `lt -f has_inf_or_nan` `-fenn .*Sqrt.*` |
-| | `-s <sort_key>` | Sort the output by given `sort_key`, whose possible values are `timestamp` (default), `dump_size`, `op_type` and `tensor_name`. | `lt -s dump_size` |
-| | `-r` | Sort in reverse order. | `lt -r -s dump_size` |
-| **`pt`** | | **Print value of a dumped tensor.** | |
-| | `pt <tensor>` | Print tensor value. | `pt hidden/Relu:0` |
-| | `pt <tensor>[slicing]` | Print a subarray of tensor, using [numpy](http://www.numpy.org/)-style array slicing. | `pt hidden/Relu:0[0:50,:]` |
-| | `-a` | Print the entirety of a large tensor, without using ellipses. (May take a long time for large tensors.) | `pt -a hidden/Relu:0[0:50,:]` |
-| | `-r <range>` | Highlight elements falling into specified numerical range. Multiple ranges can be used in conjunction. | `pt hidden/Relu:0 -a -r [[-inf,-1],[1,inf]]` |
-| | `-n <number>` | Print dump corresponding to specified 0-based dump number. Required for tensors with multiple dumps. | `pt -n 0 hidden/Relu:0` |
-| | `-s` | Include a summary of the numeric values of the tensor (applicable only to non-empty tensors with Boolean and numeric types such as `int*` and `float*`.) | `pt -s hidden/Relu:0[0:50,:]` |
-| | `-w` | Write the value of the tensor (possibly sliced) to a Numpy file using [`numpy.save()`](https://docs.scipy.org/doc/numpy-1.13.0/reference/generated/numpy.save.html) | `pt -s hidden/Relu:0 -w /tmp/relu.npy` |
-| **`@[coordinates]`** | | Navigate to specified element in `pt` output. | `@[10,0]` or `@10,0` |
-| **`/regex`** | |  [less](https://linux.die.net/man/1/less)-style search for given regular expression. | `/inf` |
-| **`/`** | | Scroll to the next line with matches to the searched regex (if any). | `/` |
-| **`pf`** | | **Print a value in the feed_dict to `Session.run`.** | |
-| | `pf <feed_tensor_name>` | Print the value of the feed. Also note that the `pf` command has the `-a`, `-r` and `-s` flags (not listed below), which have the same syntax and semantics as the identically-named flags of `pt`. | `pf input_xs:0` |
-| **eval** | | **Evaluate arbitrary Python and numpy expression.** | |
-| | `eval <expression>` | Evaluate a Python / numpy expression, with numpy available as `np` and debug tensor names enclosed in backticks. | ``eval "np.matmul((`output/Identity:0` / `Softmax:0`).T, `Softmax:0`)"`` |
-| | `-a` | Print a large-sized evaluation result in its entirety, i.e., without using ellipses. | ``eval -a 'np.sum(`Softmax:0`, axis=1)'`` |
-| | `-w` | Write the result of the evaluation to a Numpy file using [`numpy.save()`](https://docs.scipy.org/doc/numpy-1.13.0/reference/generated/numpy.save.html) | ``eval -a 'np.sum(`Softmax:0`, axis=1)' -w /tmp/softmax_sum.npy`` |
-| **`ni`** | | **Display node information.** | |
-| | `-a` | Include node attributes in the output. | `ni -a hidden/Relu` |
-| | `-d` | List the debug dumps available from the node. | `ni -d hidden/Relu` |
-| | `-t` | Display the Python stack trace of the node's creation. | `ni -t hidden/Relu` |
-| **`li`** | | **List inputs to node** | |
-| | `-r` | List the inputs to node, recursively (the input tree.) | `li -r hidden/Relu:0` |
-| | `-d <max_depth>` | Limit recursion depth under the `-r` mode. | `li -r -d 3 hidden/Relu:0` |
-| | `-c` | Include control inputs. | `li -c -r hidden/Relu:0` |
-| | `-t` | Show op types of input nodes. | `li -t -r hidden/Relu:0` |
-| **`lo`** | | **List output recipients of node** | |
-| | `-r` | List the output recipients of node, recursively (the output tree.) | `lo -r hidden/Relu:0` |
-| | `-d <max_depth>` | Limit recursion depth under the `-r` mode. | `lo -r -d 3 hidden/Relu:0` |
-| | `-c` | Include recipients via control edges. | `lo -c -r hidden/Relu:0` |
-| | `-t` | Show op types of recipient nodes. | `lo -t -r hidden/Relu:0` |
-| **`ls`** | | **List Python source files involved in node creation.** | |
-| | `-p <path_pattern>` | Limit output to source files matching given regular-expression path pattern. | `ls -p .*debug_mnist.*` |
-| | `-n` | Limit output to node names matching given regular-expression pattern. | `ls -n Softmax.*` |
-| **`ps`** | | **Print Python source file.** | |
-| | `ps <file_path>` | Print given Python source file source.py, with the lines annotated with the nodes created at each of them (if any). | `ps /path/to/source.py` |
-| | `-t` | Perform annotation with respect to Tensors, instead of the default, nodes. | `ps -t /path/to/source.py` |
-| | `-b <line_number>` | Annotate source.py beginning at given line. | `ps -b 30 /path/to/source.py` |
-| | `-m <max_elements>` | Limit the number of elements in the annotation for each line. | `ps -m 100 /path/to/source.py` |
-| **`run`** | | **Proceed to the next Session.run()** | `run` |
-| | `-n` | Execute through the next `Session.run` without debugging, and drop to CLI right before the run after that. | `run -n` |
-| | `-t <T>` | Execute `Session.run` `T - 1` times without debugging, followed by a run with debugging. Then drop to CLI right after the debugged run. | `run -t 10` |
-| | `-f <filter_name>` | Continue executing `Session.run` until any intermediate tensor triggers the specified Tensor filter (causes the filter to return `True`). | `run -f has_inf_or_nan` |
-| | `-f <filter_name> -fenn <regex>` | Continue executing `Session.run` until any intermediate tensor whose node names doesn't match the regular expression triggers the specified Tensor filter (causes the filter to return `True`). | `run -f has_inf_or_nan -fenn .*Sqrt.*` |
-| | `--node_name_filter <pattern>` | Execute the next `Session.run`, watching only nodes with names matching the given regular-expression pattern. | `run --node_name_filter Softmax.*` |
-| | `--op_type_filter <pattern>` | Execute the next `Session.run`, watching only nodes with op types matching the given regular-expression pattern. | `run --op_type_filter Variable.*` |
-| | `--tensor_dtype_filter <pattern>` | Execute the next `Session.run`, dumping only Tensors with data types (`dtype`s) matching the given regular-expression pattern. | `run --tensor_dtype_filter int.*` |
-| | `-p` | Execute the next `Session.run` call in profiling mode. | `run -p` |
-| **`ri`** | | **Display information about the run the current run, including fetches and feeds.** | `ri` |
-| **`config`** | | **Set or show persistent TFDBG UI configuration.** | |
-| | `set` | Set the value of a config item: {`graph_recursion_depth`, `mouse_mode`}. | `config set graph_recursion_depth 3` |
-| | `show` | Show current persistent UI configuration. | `config show` |
-| **`help`** | | **Print general help information** | `help` |
-| | `help <command>` | Print help for given command. | `help lt` |
-
-Note that each time you enter a command, a new screen output
-will appear. This is somewhat analogous to web pages in a browser. You can
-navigate between these screens by clicking the `<--` and
-`-->` text arrows near the top-left corner of the CLI.
-
-### Other Features of the tfdbg CLI
-
-In addition to the commands listed above, the tfdbg CLI provides the following
-additional features:
-
-*   To navigate through previous tfdbg commands, type in a few characters
-    followed by the Up or Down arrow keys. tfdbg will show you the history of
-    commands that started with those characters.
-*   To navigate through the history of screen outputs, do either of the
-    following:
-    * Use the `prev` and `next` commands.
-    * Click underlined `<--` and `-->` links near the top left corner of the
-      screen.
-*   Tab completion of commands and some command arguments.
-*   To redirect the screen output to a file instead of the screen, end the
-    command with bash-style redirection. For example, the following command
-    redirects the output of the pt command to the `/tmp/xent_value_slices.txt`
-    file:
-
-  ```none
-  tfdbg> pt cross_entropy/Log:0[:, 0:10] > /tmp/xent_value_slices.txt
-  ```
-
-### Finding `nan`s and `inf`s
-
-In this first `Session.run()` call, there happen to be no problematic numerical
-values. You can move on to the next run by using the command `run` or its
-shorthand `r`.
-
-> TIP: If you enter `run` or `r` repeatedly, you will be able to move through
-> the `Session.run()` calls in a sequential manner.
->
-> You can also use the `-t` flag to move ahead a number of `Session.run()` calls
-> at a time, for example:
->
-> ```
-> tfdbg> run -t 10
-> ```
-
-Instead of entering `run` repeatedly and manually searching for `nan`s and
-`inf`s in the run-end UI after every `Session.run()` call (for example, by using
-the `pt` command shown in the table above) , you can use the following
-command to let the debugger repeatedly execute `Session.run()` calls without
-stopping at the run-start or run-end prompt, until the first `nan` or `inf`
-value shows up in the graph. This is analogous to *conditional breakpoints* in
-some procedural-language debuggers:
-
-```none
-tfdbg> run -f has_inf_or_nan
-```
-
-> NOTE: The preceding command works properly because a tensor filter called
-> `has_inf_or_nan` has been registered for you when the wrapped session is
-> created. This filter detects `nan`s and `inf`s (as explained previously).
-> If you have registered any other filters, you can
-> use "run -f" to have tfdbg run until any tensor triggers that filter (cause
-> the filter to return True).
->
-> ``` python
-> def my_filter_callable(datum, tensor):
->   # A filter that detects zero-valued scalars.
->   return len(tensor.shape) == 0 and tensor == 0.0
->
-> sess.add_tensor_filter('my_filter', my_filter_callable)
-> ```
->
-> Then at the tfdbg run-start prompt run until your filter is triggered:
->
-> ```
-> tfdbg> run -f my_filter
-> ```
-
-See [this API document](https://www.tensorflow.org/api_docs/python/tfdbg/DebugDumpDir#find)
-for more information on the expected signature and return value of the predicate
-`Callable` used with `add_tensor_filter()`.
-
-![tfdbg run-end UI: infs and nans](https://www.tensorflow.org/images/tfdbg_screenshot_run_end_inf_nan.png)
-
-As the screen display indicates on the first line, the `has_inf_or_nan` filter is first triggered
-during the fourth `Session.run()` call: an
-[Adam optimizer](https://www.tensorflow.org/api_docs/python/tf/train/AdamOptimizer)
-forward-backward training pass on the graph. In this run, 36 (out of the total
-95) intermediate tensors contain `nan` or `inf` values. These tensors are listed
-in chronological order, with their timestamps displayed on the left. At the top
-of the list, you can see the first tensor in which the bad numerical values
-first surfaced: `cross_entropy/Log:0`.
-
-To view the value of the tensor, click the underlined tensor name
-`cross_entropy/Log:0` or enter the equivalent command:
-
-```none
-tfdbg> pt cross_entropy/Log:0
-```
-
-Scroll down a little and you will notice some scattered `inf` values. If the
-instances of `inf` and `nan` are difficult to spot by eye, you can use the
-following command to perform a regex search and highlight the output:
-
-```none
-tfdbg> /inf
-```
-
-Or, alternatively:
-
-```none
-tfdbg> /(inf|nan)
-```
-
-You can also use the `-s` or `--numeric_summary` command to get a quick summary
-of the types of numeric values in the tensor:
-
-``` none
-tfdbg> pt -s cross_entropy/Log:0
-```
-
-From the summary, you can see that several of the 1000 elements of the
-`cross_entropy/Log:0` tensor are `-inf`s (negative infinities).
-
-Why did these infinities appear? To further debug, display more information
-about the node `cross_entropy/Log` by clicking the underlined `node_info` menu
-item on the top or entering the equivalent node_info (`ni`) command:
-
-```none
-tfdbg> ni cross_entropy/Log
-```
-
-![tfdbg run-end UI: infs and nans](https://www.tensorflow.org/images/tfdbg_screenshot_run_end_node_info.png)
-
-You can see that this node has the op type `Log`
-and that its input is the node `Softmax`. Run the following command to
-take a closer look at the input tensor:
-
-```none
-tfdbg> pt Softmax:0
-```
-
-Examine the values in the input tensor, searching for zeros:
-
-```none
-tfdbg> /0\.000
-```
-
-Indeed, there are zeros. Now it is clear that the origin of the bad numerical
-values is the node `cross_entropy/Log` taking logs of zeros. To find out the
-culprit line in the Python source code, use the `-t` flag of the `ni` command
-to show the traceback of the node's construction:
-
-```none
-tfdbg> ni -t cross_entropy/Log
-```
-
-If you click "node_info" at the top of the screen, tfdbg automatically shows the
-traceback of the node's construction.
-
-From the traceback, you can see that the op is constructed at the following
-line:
-[`debug_mnist.py`](https://www.tensorflow.org/code/tensorflow/python/debug/examples/debug_mnist.py):
-
-```python
-diff = y_ * tf.log(y)
-```
-
-**tfdbg** has a feature that makes it easy to trace Tensors and ops back to
-lines in Python source files. It can annotate lines of a Python file with
-the ops or Tensors created by them. To use this feature,
-simply click the underlined line numbers in the stack trace output of the
-`ni -t <op_name>` commands, or use the `ps` (or `print_source`) command such as:
-`ps /path/to/source.py`. For example, the following screenshot shows the output
-of a `ps` command.
-
-![tfdbg run-end UI: annotated Python source file](https://www.tensorflow.org/images/tfdbg_screenshot_run_end_annotated_source.png)
-
-### Fixing the problem
-
-To fix the problem, edit `debug_mnist.py`, changing the original line:
-
-```python
-diff = -(y_ * tf.log(y))
-```
-
-to the built-in, numerically-stable implementation of softmax cross-entropy:
-
-```python
-diff = tf.losses.softmax_cross_entropy(labels=y_, logits=logits)
-```
-
-Rerun with the `--debug` flag as follows:
-
-```none
-python -m tensorflow.python.debug.examples.debug_mnist --debug
-```
-
-At the `tfdbg>` prompt, enter the following command:
-
-```none
-run -f has_inf_or_nan`
-```
-
-Confirm that no tensors are flagged as containing `nan` or `inf` values, and
-accuracy now continues to rise rather than getting stuck. Success!
-
-## Debugging TensorFlow Estimators
-
-This section explains how to debug TensorFlow programs that use the `Estimator`
-APIs. Part of the convenience provided by these APIs is that
-they manage `Session`s internally. This makes the `LocalCLIDebugWrapperSession`
-described in the preceding sections inapplicable. Fortunately, you can still
-debug them by using special `hook`s provided by `tfdbg`.
-
-`tfdbg` can debug the
-@{tf.estimator.Estimator.train$`train()`},
-@{tf.estimator.Estimator.evaluate$`evaluate()`} and
-@{tf.estimator.Estimator.predict$`predict()`}
-methods of tf-learn `Estimator`s. To debug `Estimator.train()`,
-create a `LocalCLIDebugHook` and supply it in the `hooks` argument. For example:
-
-```python
-# First, let your BUILD target depend on "//tensorflow/python/debug:debug_py"
-# (You don't need to worry about the BUILD dependency if you are using a pip
-#  install of open-source TensorFlow.)
-from tensorflow.python import debug as tf_debug
-
-# Create a LocalCLIDebugHook and use it as a monitor when calling fit().
-hooks = [tf_debug.LocalCLIDebugHook()]
-
-# To debug `train`:
-classifier.train(input_fn,
-                 steps=1000,
-                 hooks=hooks)
-```
-
-Similarly, to debug `Estimator.evaluate()` and `Estimator.predict()`, assign
-hooks to the `hooks` parameter, as in the following example:
-
-```python
-# To debug `evaluate`:
-accuracy_score = classifier.evaluate(eval_input_fn,
-                                     hooks=hooks)["accuracy"]
-
-# To debug `predict`:
-predict_results = classifier.predict(predict_input_fn, hooks=hooks)
-```
-
-[debug_tflearn_iris.py](https://www.tensorflow.org/code/tensorflow/python/debug/examples/debug_tflearn_iris.py),
-based on [tf-learn's iris tutorial](https://www.tensorflow.org/versions/r1.8/get_started/tflearn),
-contains a full example of how to use the tfdbg with `Estimator`s.
-To run this example, do:
-
-```none
-python -m tensorflow.python.debug.examples.debug_tflearn_iris --debug
-```
-
-The `LocalCLIDebugHook` also allows you to configure a `watch_fn` that can be
-used to flexibly specify what `Tensor`s to watch on different `Session.run()`
-calls, as a function of the `fetches` and `feed_dict` and other states. See
-@{tfdbg.DumpingDebugWrapperSession.__init__$this API doc}
-for more details.
-
-## Debugging Keras Models with TFDBG
-
-To use TFDBG with [Keras](https://keras.io/), let the Keras backend use
-a TFDBG-wrapped Session object. For example, to use the CLI wrapper:
-
-``` python
-import tensorflow as tf
-from keras import backend as keras_backend
-from tensorflow.python import debug as tf_debug
-
-keras_backend.set_session(tf_debug.LocalCLIDebugWrapperSession(tf.Session()))
-
-# Define your keras model, called "model".
-model.fit(...)  # This will break into the TFDBG CLI.
-```
-
-## Debugging tf-slim with TFDBG
-
-TFDBG supports debugging of training and evaluation with
-[tf-slim](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim).
-As detailed below, training and evaluation require slightly different debugging
-workflows.
-
-### Debugging training in tf-slim
-To debug the training process, provide `LocalCLIDebugWrapperSession` to the
-`session_wrapper` argument of `slim.learning.train()`. For example:
-
-``` python
-import tensorflow as tf
-from tensorflow.python import debug as tf_debug
-
-# ... Code that creates the graph and the train_op ...
-tf.contrib.slim.learning.train(
-    train_op,
-    logdir,
-    number_of_steps=10,
-    session_wrapper=tf_debug.LocalCLIDebugWrapperSession)
-```
-
-### Debugging evaluation in tf-slim
-To debug the evaluation process, provide `LocalCLIDebugHook` to the
-`hooks` argument of `slim.evaluation.evaluate_once()`. For example:
-
-``` python
-import tensorflow as tf
-from tensorflow.python import debug as tf_debug
-
-# ... Code that creates the graph and the eval and final ops ...
-tf.contrib.slim.evaluation.evaluate_once(
-    '',
-    checkpoint_path,
-    logdir,
-    eval_op=my_eval_op,
-    final_op=my_value_op,
-    hooks=[tf_debug.LocalCLIDebugHook()])
-```
-
-## Offline Debugging of Remotely-Running Sessions
-
-Often, your model is running on a remote machine or a process that you don't
-have terminal access to. To perform model debugging in such cases, you can use
-the `offline_analyzer` binary of `tfdbg` (described below). It operates on
-dumped data directories. This can be done to both the lower-level `Session` API
-and the higher-level `Estimator` API.
-
-### Debugging Remote tf.Sessions
-
-If you interact directly with the `tf.Session` API in `python`, you can
-configure the `RunOptions` proto that you call your `Session.run()` method
-with, by using the method @{tfdbg.watch_graph}.
-This will cause the intermediate tensors and runtime graphs to be dumped to a
-shared storage location of your choice when the `Session.run()` call occurs
-(at the cost of slower performance). For example:
-
-```python
-from tensorflow.python import debug as tf_debug
-
-# ... Code where your session and graph are set up...
-
-run_options = tf.RunOptions()
-tf_debug.watch_graph(
-      run_options,
-      session.graph,
-      debug_urls=["file:///shared/storage/location/tfdbg_dumps_1"])
-# Be sure to specify different directories for different run() calls.
-
-session.run(fetches, feed_dict=feeds, options=run_options)
-```
-
-Later, in an environment that you have terminal access to (for example, a local
-computer that can access the shared storage location specified in the code
-above), you can load and inspect the data in the dump directory on the shared
-storage by using the `offline_analyzer` binary of `tfdbg`. For example:
-
-```none
-python -m tensorflow.python.debug.cli.offline_analyzer \
-    --dump_dir=/shared/storage/location/tfdbg_dumps_1
-```
-
-The `Session` wrapper `DumpingDebugWrapperSession` offers an easier and more
-flexible way to generate file-system dumps that can be analyzed offline.
-To use it, simply wrap your session in a `tf_debug.DumpingDebugWrapperSession`.
-For example:
-
-```python
-# Let your BUILD target depend on "//tensorflow/python/debug:debug_py
-# (You don't need to worry about the BUILD dependency if you are using a pip
-#  install of open-source TensorFlow.)
-from tensorflow.python import debug as tf_debug
-
-sess = tf_debug.DumpingDebugWrapperSession(
-    sess, "/shared/storage/location/tfdbg_dumps_1/", watch_fn=my_watch_fn)
-```
-
-The `watch_fn` argument accepts a `Callable` that allows you to configure what
-`tensor`s to watch on different `Session.run()` calls, as a function of the
-`fetches` and `feed_dict` to the `run()` call and other states.
-
-### C++ and other languages
-
-If your model code is written in C++ or other languages, you can also
-modify the `debug_options` field of `RunOptions` to generate debug dumps that
-can be inspected offline. See
-[the proto definition](https://www.tensorflow.org/code/tensorflow/core/protobuf/debug.proto)
-for more details.
-
-### Debugging Remotely-Running Estimators
-
-If your remote TensorFlow server runs `Estimator`s,
-you can use the non-interactive `DumpingDebugHook`. For example:
-
-```python
-# Let your BUILD target depend on "//tensorflow/python/debug:debug_py
-# (You don't need to worry about the BUILD dependency if you are using a pip
-#  install of open-source TensorFlow.)
-from tensorflow.python import debug as tf_debug
-
-hooks = [tf_debug.DumpingDebugHook("/shared/storage/location/tfdbg_dumps_1")]
-```
-
-Then this `hook` can be used in the same way as the `LocalCLIDebugHook` examples
-described earlier in this document.
-As the training, evalution or prediction happens with `Estimator`,
-tfdbg creates directories having the following name pattern:
-`/shared/storage/location/tfdbg_dumps_1/run_<epoch_timestamp_microsec>_<uuid>`.
-Each directory corresponds to a `Session.run()` call that underlies
-the `fit()` or `evaluate()` call. You can load these directories and inspect
-them in a command-line interface in an offline manner using the
-`offline_analyzer` offered by tfdbg. For example:
-
-```bash
-python -m tensorflow.python.debug.cli.offline_analyzer \
-    --dump_dir="/shared/storage/location/tfdbg_dumps_1/run_<epoch_timestamp_microsec>_<uuid>"
-```
-
-## Frequently Asked Questions
-
-**Q**: _Do the timestamps on the left side of the `lt` output reflect actual
-       performance in a non-debugging session?_
-
-**A**: No. The debugger inserts additional special-purpose debug nodes to the
-       graph to record the values of intermediate tensors. These nodes
-       slow down the graph execution. If you are interested in profiling your
-       model, check out
-
-   1. The profiling mode of tfdbg: `tfdbg> run -p`.
-   2. [tfprof](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/profiler)
-      and other profiling tools for TensorFlow.
-
-**Q**: _How do I link tfdbg against my `Session` in Bazel? Why do I see an
-       error such as "ImportError: cannot import name debug"?_
-
-**A**: In your BUILD rule, declare dependencies:
-       `"//tensorflow:tensorflow_py"` and `"//tensorflow/python/debug:debug_py"`.
-       The first is the dependency that you include to use TensorFlow even
-       without debugger support; the second enables the debugger.
-       Then, In your Python file, add:
-
-```python
-from tensorflow.python import debug as tf_debug
-
-# Then wrap your TensorFlow Session with the local-CLI wrapper.
-sess = tf_debug.LocalCLIDebugWrapperSession(sess)
-```
-
-**Q**: _Does tfdbg help debug runtime errors such as shape mismatches?_
-
-**A**: Yes. tfdbg intercepts errors generated by ops during runtime and presents
-       the errors with some debug instructions to the user in the CLI.
-       See examples:
-
-```none
-# Debugging shape mismatch during matrix multiplication.
-python -m tensorflow.python.debug.examples.debug_errors \
-    --error shape_mismatch --debug
-
-# Debugging uninitialized variable.
-python -m tensorflow.python.debug.examples.debug_errors \
-    --error uninitialized_variable --debug
-```
-
-**Q**: _How can I let my tfdbg-wrapped Sessions or Hooks run the debug mode
-only from the main thread?_
-
-**A**:
-This is a common use case, in which the `Session` object is used from multiple
-threads concurrently. Typically, the child threads take care of background tasks
-such as running enqueue operations. Often, you want to debug only the main
-thread (or less frequently, only one of the child threads). You can use the
-`thread_name_filter` keyword argument of `LocalCLIDebugWrapperSession` to
-achieve this type of thread-selective debugging. For example, to debug from the
-main thread only, construct a wrapped `Session` as follows:
-
-```python
-sess = tf_debug.LocalCLIDebugWrapperSession(sess, thread_name_filter="MainThread$")
-```
-
-The above example relies on the fact that main threads in Python have the
-default name `MainThread`.
-
-**Q**: _The model I am debugging is very large. The data dumped by tfdbg
-fills up the free space of my disk. What can I do?_
-
-**A**:
-You might encounter this problem in any of the following situations:
-
-*   models with many intermediate tensors
-*   very large intermediate tensors
-*   many @{tf.while_loop} iterations
-
-There are three possible workarounds or solutions:
-
-*  The constructors of `LocalCLIDebugWrapperSession` and `LocalCLIDebugHook`
-   provide a keyword argument, `dump_root`, to specify the path
-   to which tfdbg dumps the debug data. You can use it to let tfdbg dump the
-   debug data on a disk with larger free space. For example:
-
-```python
-# For LocalCLIDebugWrapperSession
-sess = tf_debug.LocalCLIDebugWrapperSession(dump_root="/with/lots/of/space")
-
-# For LocalCLIDebugHook
-hooks = [tf_debug.LocalCLIDebugHook(dump_root="/with/lots/of/space")]
-```
-   Make sure that the directory pointed to by dump_root is empty or nonexistent.
-   `tfdbg` cleans up the dump directories before exiting.
-
-*  Reduce the batch size used during the runs.
-*  Use the filtering options of tfdbg's `run` command to watch only specific
-   nodes in the graph. For example:
-
-   ```
-   tfdbg> run --node_name_filter .*hidden.*
-   tfdbg> run --op_type_filter Variable.*
-   tfdbg> run --tensor_dtype_filter int.*
-   ```
-
-   The first command above watches only nodes whose name match the
-   regular-expression pattern `.*hidden.*`. The second command watches only
-   operations whose name match the pattern `Variable.*`. The third one watches
-   only the tensors whose dtype match the pattern `int.*` (e.g., `int32`).
-
-
-**Q**: _Why can't I select text in the tfdbg CLI?_
-
-**A**: This is because the tfdbg CLI enables mouse events in the terminal by
-       default. This [mouse-mask](https://linux.die.net/man/3/mousemask) mode
-       overrides default terminal interactions, including text selection. You
-       can re-enable text selection by using the command `mouse off` or
-       `m off`.
-
-**Q**: _Why does the tfdbg CLI show no dumped tensors when I debug code like the following?_
-
-``` python
-a = tf.ones([10], name="a")
-b = tf.add(a, a, name="b")
-sess = tf.Session()
-sess = tf_debug.LocalCLIDebugWrapperSession(sess)
-sess.run(b)
-```
-
-**A**: The reason why you see no data dumped is because every node in the
-       executed TensorFlow graph is constant-folded by the TensorFlow runtime.
-       In this exapmle, `a` is a constant tensor; therefore, the fetched
-       tensor `b` is effectively also a constant tensor. TensorFlow's graph
-       optimization folds the graph that contains `a` and `b` into a single
-       node to speed up future runs of the graph, which is why `tfdbg` does
-       not generate any intermediate tensor dumps. However, if `a` were a
-       @{tf.Variable}, as in the following example:
-
-``` python
-import numpy as np
-
-a = tf.Variable(np.ones[10], name="a")
-b = tf.add(a, a, name="b")
-sess = tf.Session()
-sess.run(tf.global_variables_initializer())
-sess = tf_debug.LocalCLIDebugWrapperSession(sess)
-sess.run(b)
-```
-
-the constant-folding would not occur and `tfdbg` should show the intermediate
-tensor dumps.
-
-
-**Q**: I am debugging a model that generates unwanted infinities or NaNs. But
-       there are some nodes in my model that are known to generate infinities
-       or NaNs in their output tensors even under completely normal conditions.
-       How can I skip those nodes during my `run -f has_inf_or_nan` actions?
-
-**A**: Use the `--filter_exclude_node_names` (`-fenn` for short) flag. For
-       example, if you known you have a node with name matching the regular
-       expression `.*Sqrt.*` that generates infinities or NaNs regardless
-       of whether the model is behaving correctly, you can exclude the nodes
-       from the infinity/NaN-finding runs with the command
-       `run -f has_inf_or_nan -fenn .*Sqrt.*`.
-
-
-**Q**: Is there a GUI for tfdbg?
-
-**A**: Yes, the **TensorBoard Debugger Plugin** is the GUI of tfdbg.
-       It offers features such as inspection of the computation graph,
-       real-time visualization of tensor values, continuation to tensor
-       and conditional breakpoints, and tying tensors to their
-       graph-construction source code, all in the browser environment.
-       To get started, please visit
-       [its README](https://github.com/tensorflow/tensorboard/blob/master/tensorboard/plugins/debugger/README.md).
diff --git a/tensorflow/docs_src/programmers_guide/eager.md b/tensorflow/docs_src/programmers_guide/eager.md
deleted file mode 100644
index 00d02b4455..0000000000
--- a/tensorflow/docs_src/programmers_guide/eager.md
+++ /dev/null
@@ -1,849 +0,0 @@
-# Eager Execution
-
-TensorFlow's eager execution is an imperative programming environment that
-evaluates operations immediately, without building graphs: operations return
-concrete values instead of constructing a computational graph to run later. This
-makes it easy to get started with TensorFlow and debug models, and it
-reduces boilerplate as well. To follow along with this guide, run the code
-samples below in an interactive `python` interpreter.
-
-Eager execution is a flexible machine learning platform for research and
-experimentation, providing:
-
-* *An intuitive interface*—Structure your code naturally and use Python data
-  structures. Quickly iterate on small models and small data.
-* *Easier debugging*—Call ops directly to inspect running models and test
-  changes. Use standard Python debugging tools for immediate error reporting.
-* *Natural control flow*—Use Python control flow instead of graph control
-  flow, simplifying the specification of dynamic models.
-
-Eager execution supports most TensorFlow operations and GPU acceleration. For a
-collection of examples running in eager execution, see:
-[tensorflow/contrib/eager/python/examples](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/examples).
-
-Note: Some models may experience increased overhead with eager execution
-enabled. Performance improvements are ongoing, but please
-[file a bug](https://github.com/tensorflow/tensorflow/issues) if you find a
-problem and share your benchmarks.
-
-## Setup and basic usage
-
-Upgrade to the latest version of TensorFlow:
-
-```
-$ pip install --upgrade tensorflow
-```
-
-To start eager execution, add `tf.enable_eager_execution()` to the beginning of
-the program or console session. Do not add this operation to other modules that
-the program calls.
-
-```py
-from __future__ import absolute_import, division, print_function
-
-import tensorflow as tf
-
-tf.enable_eager_execution()
-```
-
-Now you can run TensorFlow operations and the results will return immediately:
-
-```py
-tf.executing_eagerly()        # => True
-
-x = [[2.]]
-m = tf.matmul(x, x)
-print("hello, {}".format(m))  # => "hello, [[4.]]"
-```
-
-Enabling eager execution changes how TensorFlow operations behave—now they
-immediately evaluate and return their values to Python. `tf.Tensor` objects
-reference concrete values instead of symbolic handles to nodes in a computational
-graph. Since there isn't a computational graph to build and run later in a
-session, it's easy to inspect results using `print()` or a debugger. Evaluating,
-printing, and checking tensor values does not break the flow for computing
-gradients.
-
-Eager execution works nicely with [NumPy](http://www.numpy.org/). NumPy
-operations accept `tf.Tensor` arguments. TensorFlow
-[math operations](https://www.tensorflow.org/api_guides/python/math_ops) convert
-Python objects and NumPy arrays to `tf.Tensor` objects. The
-`tf.Tensor.numpy` method returns the object's value as a NumPy `ndarray`.
-
-```py
-a = tf.constant([[1, 2],
-                 [3, 4]])
-print(a)
-# => tf.Tensor([[1 2]
-#               [3 4]], shape=(2, 2), dtype=int32)
-
-# Broadcasting support
-b = tf.add(a, 1)
-print(b)
-# => tf.Tensor([[2 3]
-#               [4 5]], shape=(2, 2), dtype=int32)
-
-# Operator overloading is supported
-print(a * b)
-# => tf.Tensor([[ 2  6]
-#               [12 20]], shape=(2, 2), dtype=int32)
-
-# Use NumPy values
-import numpy as np
-
-c = np.multiply(a, b)
-print(c)
-# => [[ 2  6]
-#     [12 20]]
-
-# Obtain numpy value from a tensor:
-print(a.numpy())
-# => [[1 2]
-#     [3 4]]
-```
-
-The `tf.contrib.eager` module contains symbols available to both eager and graph execution
-environments and is useful for writing code to [work with graphs](#work_with_graphs):
-
-```py
-tfe = tf.contrib.eager
-```
-
-## Dynamic control flow
-
-A major benefit of eager execution is that all the functionality of the host
-language is available while your model is executing. So, for example,
-it is easy to write [fizzbuzz](https://en.wikipedia.org/wiki/Fizz_buzz):
-
-```py
-def fizzbuzz(max_num):
-  counter = tf.constant(0)
-  max_num = tf.convert_to_tensor(max_num)
-  for num in range(max_num.numpy()):
-    num = tf.constant(num)
-    if int(num % 3) == 0 and int(num % 5) == 0:
-      print('FizzBuzz')
-    elif int(num % 3) == 0:
-      print('Fizz')
-    elif int(num % 5) == 0:
-      print('Buzz')
-    else:
-      print(num)
-    counter += 1
-  return counter
-```
-
-This has conditionals that depend on tensor values and it prints these values
-at runtime.
-
-## Build a model
-
-Many machine learning models are represented by composing layers. When
-using TensorFlow with eager execution you can either write your own layers or
-use a layer provided in the `tf.keras.layers` package.
-
-While you can use any Python object to represent a layer,
-TensorFlow has `tf.keras.layers.Layer` as a convenient base class. Inherit from
-it to implement your own layer:
-
-```py
-class MySimpleLayer(tf.keras.layers.Layer):
-  def __init__(self, output_units):
-    self.output_units = output_units
-
-  def build(self, input):
-    # The build method gets called the first time your layer is used.
-    # Creating variables on build() allows you to make their shape depend
-    # on the input shape and hence remove the need for the user to specify
-    # full shapes. It is possible to create variables during __init__() if
-    # you already know their full shapes.
-    self.kernel = self.add_variable(
-      "kernel", [input.shape[-1], self.output_units])
-
-  def call(self, input):
-    # Override call() instead of __call__ so we can perform some bookkeeping.
-    return tf.matmul(input, self.kernel)
-```
-
-Use `tf.keras.layers.Dense` layer instead  of `MySimpleLayer` above as it has
-a superset of its functionality (it can also add a bias).
-
-When composing layers into models you can use `tf.keras.Sequential` to represent
-models which are a linear stack of layers. It is easy to use for basic models:
-
-```py
-model = tf.keras.Sequential([
-  tf.keras.layers.Dense(10, input_shape=(784,)),  # must declare input shape
-  tf.keras.layers.Dense(10)
-])
-```
-
-Alternatively, organize models in classes by inheriting from `tf.keras.Model`.
-This is a container for layers that is a layer itself, allowing `tf.keras.Model`
-objects to contain other `tf.keras.Model` objects.
-
-```py
-class MNISTModel(tf.keras.Model):
-  def __init__(self):
-    super(MNISTModel, self).__init__()
-    self.dense1 = tf.keras.layers.Dense(units=10)
-    self.dense2 = tf.keras.layers.Dense(units=10)
-
-  def call(self, input):
-    """Run the model."""
-    result = self.dense1(input)
-    result = self.dense2(result)
-    result = self.dense2(result)  # reuse variables from dense2 layer
-    return result
-
-model = MNISTModel()
-```
-
-It's not required to set an input shape for the `tf.keras.Model` class since
-the parameters are set the first time input is passed to the layer.
-
-`tf.keras.layers` classes create and contain their own model variables that
-are tied to the lifetime of their layer objects. To share layer variables, share
-their objects.
-
-
-## Eager training
-
-### Computing gradients
-
-[Automatic differentiation](https://en.wikipedia.org/wiki/Automatic_differentiation)
-is useful for implementing machine learning algorithms such as
-[backpropagation](https://en.wikipedia.org/wiki/Backpropagation) for training
-neural networks. During eager execution, use `tf.GradientTape` to trace
-operations for computing gradients later.
-
-`tf.GradientTape` is an opt-in feature to provide maximal performance when
-not tracing. Since different operations can occur during each call, all
-forward-pass operations get recorded to a "tape". To compute the gradient, play
-the tape backwards and then discard. A particular `tf.GradientTape` can only
-compute one gradient; subsequent calls throw a runtime error.
-
-```py
-w = tfe.Variable([[1.0]])
-with tf.GradientTape() as tape:
-  loss = w * w
-
-grad = tape.gradient(loss, w)
-print(grad)  # => tf.Tensor([[ 2.]], shape=(1, 1), dtype=float32)
-```
-
-Here's an example of `tf.GradientTape` that records forward-pass operations
-to train a simple model:
-
-```py
-# A toy dataset of points around 3 * x + 2
-NUM_EXAMPLES = 1000
-training_inputs = tf.random_normal([NUM_EXAMPLES])
-noise = tf.random_normal([NUM_EXAMPLES])
-training_outputs = training_inputs * 3 + 2 + noise
-
-def prediction(input, weight, bias):
-  return input * weight + bias
-
-# A loss function using mean-squared error
-def loss(weights, biases):
-  error = prediction(training_inputs, weights, biases) - training_outputs
-  return tf.reduce_mean(tf.square(error))
-
-# Return the derivative of loss with respect to weight and bias
-def grad(weights, biases):
-  with tf.GradientTape() as tape:
-    loss_value = loss(weights, biases)
-  return tape.gradient(loss_value, [weights, biases])
-
-train_steps = 200
-learning_rate = 0.01
-# Start with arbitrary values for W and B on the same batch of data
-W = tfe.Variable(5.)
-B = tfe.Variable(10.)
-
-print("Initial loss: {:.3f}".format(loss(W, B)))
-
-for i in range(train_steps):
-  dW, dB = grad(W, B)
-  W.assign_sub(dW * learning_rate)
-  B.assign_sub(dB * learning_rate)
-  if i % 20 == 0:
-    print("Loss at step {:03d}: {:.3f}".format(i, loss(W, B)))
-
-print("Final loss: {:.3f}".format(loss(W, B)))
-print("W = {}, B = {}".format(W.numpy(), B.numpy()))
-```
-
-Output (exact numbers may vary):
-
-```
-Initial loss: 71.204
-Loss at step 000: 68.333
-Loss at step 020: 30.222
-Loss at step 040: 13.691
-Loss at step 060: 6.508
-Loss at step 080: 3.382
-Loss at step 100: 2.018
-Loss at step 120: 1.422
-Loss at step 140: 1.161
-Loss at step 160: 1.046
-Loss at step 180: 0.996
-Final loss: 0.974
-W = 3.01582956314, B = 2.1191945076
-```
-
-Replay the `tf.GradientTape` to compute the gradients and apply them in a
-training loop. This is demonstrated in an excerpt from the
-[mnist_eager.py](https://github.com/tensorflow/models/blob/master/official/mnist/mnist_eager.py)
-example:
-
-```py
-dataset = tf.data.Dataset.from_tensor_slices((data.train.images,
-                                              data.train.labels))
-...
-for (batch, (images, labels)) in enumerate(dataset):
-  ...
-  with tf.GradientTape() as tape:
-    logits = model(images, training=True)
-    loss_value = loss(logits, labels)
-  ...
-  grads = tape.gradient(loss_value, model.variables)
-  optimizer.apply_gradients(zip(grads, model.variables),
-                            global_step=tf.train.get_or_create_global_step())
-```
-
-
-The following example creates a multi-layer model that classifies the standard
-[MNIST handwritten digits](https://www.tensorflow.org/tutorials/layers). It
-demonstrates the optimizer and layer APIs to build trainable graphs in an eager
-execution environment.
-
-### Train a model
-
-Even without training, call the model and inspect the output in eager execution:
-
-```py
-# Create a tensor representing a blank image
-batch = tf.zeros([1, 1, 784])
-print(batch.shape)  # => (1, 1, 784)
-
-result = model(batch)
-# => tf.Tensor([[[ 0.  0., ..., 0.]]], shape=(1, 1, 10), dtype=float32)
-```
-
-This example uses the
-[dataset.py module](https://github.com/tensorflow/models/blob/master/official/mnist/dataset.py)
-from the
-[TensorFlow MNIST example](https://github.com/tensorflow/models/tree/master/official/mnist);
-download this file to your local directory. Run the following to download the
-MNIST data files to your working directory and prepare a `tf.data.Dataset`
-for training:
-
-```py
-import dataset  # download dataset.py file
-dataset_train = dataset.train('./datasets').shuffle(60000).repeat(4).batch(32)
-```
-
-To train a model, define a loss function to optimize and then calculate
-gradients. Use an optimizer to update the variables:
-
-```py
-def loss(model, x, y):
-  prediction = model(x)
-  return tf.losses.sparse_softmax_cross_entropy(labels=y, logits=prediction)
-
-def grad(model, inputs, targets):
-  with tf.GradientTape() as tape:
-    loss_value = loss(model, inputs, targets)
-  return tape.gradient(loss_value, model.variables)
-
-optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
-
-x, y = iter(dataset_train).next()
-print("Initial loss: {:.3f}".format(loss(model, x, y)))
-
-# Training loop
-for (i, (x, y)) in enumerate(dataset_train):
-  # Calculate derivatives of the input function with respect to its parameters.
-  grads = grad(model, x, y)
-  # Apply the gradient to the model
-  optimizer.apply_gradients(zip(grads, model.variables),
-                            global_step=tf.train.get_or_create_global_step())
-  if i % 200 == 0:
-    print("Loss at step {:04d}: {:.3f}".format(i, loss(model, x, y)))
-
-print("Final loss: {:.3f}".format(loss(model, x, y)))
-```
-
-Output (exact numbers may vary):
-
-```
-Initial loss: 2.674
-Loss at step 0000: 2.593
-Loss at step 0200: 2.143
-Loss at step 0400: 2.009
-Loss at step 0600: 2.103
-Loss at step 0800: 1.621
-Loss at step 1000: 1.695
-...
-Loss at step 6600: 0.602
-Loss at step 6800: 0.557
-Loss at step 7000: 0.499
-Loss at step 7200: 0.744
-Loss at step 7400: 0.681
-Final loss: 0.670
-```
-
-And for faster training, move the computation to a GPU:
-
-```py
-with tf.device("/gpu:0"):
-  for (i, (x, y)) in enumerate(dataset_train):
-    # minimize() is equivalent to the grad() and apply_gradients() calls.
-    optimizer.minimize(lambda: loss(model, x, y),
-                       global_step=tf.train.get_or_create_global_step())
-```
-
-### Variables and optimizers
-
-`tfe.Variable` objects store mutable `tf.Tensor` values accessed during
-training to make automatic differentiation easier. The parameters of a model can
-be encapsulated in classes as variables.
-
-Better encapsulate model parameters by using `tfe.Variable` with
-`tf.GradientTape`. For example, the automatic differentiation example above
-can be rewritten:
-
-```py
-class Model(tf.keras.Model):
-  def __init__(self):
-    super(Model, self).__init__()
-    self.W = tfe.Variable(5., name='weight')
-    self.B = tfe.Variable(10., name='bias')
-  def predict(self, inputs):
-    return inputs * self.W + self.B
-
-# A toy dataset of points around 3 * x + 2
-NUM_EXAMPLES = 2000
-training_inputs = tf.random_normal([NUM_EXAMPLES])
-noise = tf.random_normal([NUM_EXAMPLES])
-training_outputs = training_inputs * 3 + 2 + noise
-
-# The loss function to be optimized
-def loss(model, inputs, targets):
-  error = model.predict(inputs) - targets
-  return tf.reduce_mean(tf.square(error))
-
-def grad(model, inputs, targets):
-  with tf.GradientTape() as tape:
-    loss_value = loss(model, inputs, targets)
-  return tape.gradient(loss_value, [model.W, model.B])
-
-# Define:
-# 1. A model.
-# 2. Derivatives of a loss function with respect to model parameters.
-# 3. A strategy for updating the variables based on the derivatives.
-model = Model()
-optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
-
-print("Initial loss: {:.3f}".format(loss(model, training_inputs, training_outputs)))
-
-# Training loop
-for i in range(300):
-  grads = grad(model, training_inputs, training_outputs)
-  optimizer.apply_gradients(zip(grads, [model.W, model.B]),
-                            global_step=tf.train.get_or_create_global_step())
-  if i % 20 == 0:
-    print("Loss at step {:03d}: {:.3f}".format(i, loss(model, training_inputs, training_outputs)))
-
-print("Final loss: {:.3f}".format(loss(model, training_inputs, training_outputs)))
-print("W = {}, B = {}".format(model.W.numpy(), model.B.numpy()))
-```
-
-Output (exact numbers may vary):
-
-```
-Initial loss: 69.066
-Loss at step 000: 66.368
-Loss at step 020: 30.107
-Loss at step 040: 13.959
-Loss at step 060: 6.769
-Loss at step 080: 3.567
-Loss at step 100: 2.141
-Loss at step 120: 1.506
-Loss at step 140: 1.223
-Loss at step 160: 1.097
-Loss at step 180: 1.041
-Loss at step 200: 1.016
-Loss at step 220: 1.005
-Loss at step 240: 1.000
-Loss at step 260: 0.998
-Loss at step 280: 0.997
-Final loss: 0.996
-W = 2.99431324005, B = 2.02129220963
-```
-
-## Use objects for state during eager execution
-
-With graph execution, program state (such as the variables) is stored in global
-collections and their lifetime is managed by the `tf.Session` object. In
-contrast, during eager execution the lifetime of state objects is determined by
-the lifetime of their corresponding Python object.
-
-### Variables are objects
-
-During eager execution, variables persist until the last reference to the object
-is removed, and is then deleted.
-
-```py
-with tf.device("gpu:0"):
-  v = tfe.Variable(tf.random_normal([1000, 1000]))
-  v = None  # v no longer takes up GPU memory
-```
-
-### Object-based saving
-
-`tfe.Checkpoint` can save and restore `tfe.Variable`s to and from
-checkpoints:
-
-```py
-x = tfe.Variable(10.)
-
-checkpoint = tfe.Checkpoint(x=x)  # save as "x"
-
-x.assign(2.)   # Assign a new value to the variables and save.
-save_path = checkpoint.save('./ckpt/')
-
-x.assign(11.)  # Change the variable after saving.
-
-# Restore values from the checkpoint
-checkpoint.restore(save_path)
-
-print(x)  # => 2.0
-```
-
-To save and load models, `tfe.Checkpoint` stores the internal state of objects,
-without requiring hidden variables. To record the state of a `model`,
-an `optimizer`, and a global step, pass them to a `tfe.Checkpoint`:
-
-```py
-model = MyModel()
-optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
-checkpoint_dir = ‘/path/to/model_dir’
-checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
-root = tfe.Checkpoint(optimizer=optimizer,
-                      model=model,
-                      optimizer_step=tf.train.get_or_create_global_step())
-
-root.save(file_prefix=checkpoint_prefix)
-# or
-root.restore(tf.train.latest_checkpoint(checkpoint_dir))
-```
-
-### Object-oriented metrics
-
-`tfe.metrics` are stored as objects. Update a metric by passing the new data to
-the callable, and retrieve the result using the `tfe.metrics.result` method,
-for example:
-
-```py
-m = tfe.metrics.Mean("loss")
-m(0)
-m(5)
-m.result()  # => 2.5
-m([8, 9])
-m.result()  # => 5.5
-```
-
-#### Summaries and TensorBoard
-
-@{$summaries_and_tensorboard$TensorBoard} is a visualization tool for
-understanding, debugging and optimizing the model training process. It uses
-summary events that are written while executing the program.
-
-`tf.contrib.summary` is compatible with both eager and graph execution
-environments. Summary operations, such as `tf.contrib.summary.scalar`, are
-inserted during model construction. For example, to record summaries once every
-100 global steps:
-
-```py
-writer = tf.contrib.summary.create_file_writer(logdir)
-global_step=tf.train.get_or_create_global_step()  # return global step var
-
-writer.set_as_default()
-
-for _ in range(iterations):
-  global_step.assign_add(1)
-  # Must include a record_summaries method
-  with tf.contrib.summary.record_summaries_every_n_global_steps(100):
-    # your model code goes here
-    tf.contrib.summary.scalar('loss', loss)
-     ...
-```
-
-## Advanced automatic differentiation topics
-
-### Dynamic models
-
-`tf.GradientTape` can also be used in dynamic models. This example for a
-[backtracking line search](https://wikipedia.org/wiki/Backtracking_line_search)
-algorithm looks like normal NumPy code, except there are gradients and is
-differentiable, despite the complex control flow:
-
-```py
-def line_search_step(fn, init_x, rate=1.0):
-  with tf.GradientTape() as tape:
-    # Variables are automatically recorded, but manually watch a tensor
-    tape.watch(init_x)
-    value = fn(init_x)
-  grad = tape.gradient(value, init_x)
-  grad_norm = tf.reduce_sum(grad * grad)
-  init_value = value
-  while value > init_value - rate * grad_norm:
-    x = init_x - rate * grad
-    value = fn(x)
-    rate /= 2.0
-  return x, value
-```
-
-### Additional functions to compute gradients
-
-`tf.GradientTape` is a powerful interface for computing gradients, but there
-is another [Autograd](https://github.com/HIPS/autograd)-style API available for
-automatic differentiation. These functions are useful if writing math code with
-only tensors and gradient functions, and without `tfe.Variables`:
-
-* `tfe.gradients_function` —Returns a function that computes the derivatives
-  of its input function parameter with respect to its arguments. The input
-  function parameter must return a scalar value. When the returned function is
-  invoked, it returns a list of `tf.Tensor` objects: one element for each
-  argument of the input function. Since anything of interest must be passed as a
-  function parameter, this becomes unwieldy if there's a dependency on many
-  trainable parameters.
-* `tfe.value_and_gradients_function` —Similar to
-  `tfe.gradients_function`, but when the returned function is invoked, it
-  returns the value from the input function in addition to the list of
-  derivatives of the input function with respect to its arguments.
-
-In the following example, `tfe.gradients_function` takes the `square`
-function as an argument and returns a function that computes the partial
-derivatives of `square` with respect to its inputs. To calculate the derivative
-of `square` at `3`, `grad(3.0)` returns `6`.
-
-```py
-def square(x):
-  return tf.multiply(x, x)
-
-grad = tfe.gradients_function(square)
-
-square(3.)  # => 9.0
-grad(3.)    # => [6.0]
-
-# The second-order derivative of square:
-gradgrad = tfe.gradients_function(lambda x: grad(x)[0])
-gradgrad(3.)  # => [2.0]
-
-# The third-order derivative is None:
-gradgradgrad = tfe.gradients_function(lambda x: gradgrad(x)[0])
-gradgradgrad(3.)  # => [None]
-
-
-# With flow control:
-def abs(x):
-  return x if x > 0. else -x
-
-grad = tfe.gradients_function(abs)
-
-grad(3.)   # => [1.0]
-grad(-3.)  # => [-1.0]
-```
-
-### Custom gradients
-
-Custom gradients are an easy way to override gradients in eager and graph
-execution. Within the forward function, define the gradient with respect to the
-inputs, outputs, or intermediate results. For example, here's an easy way to clip
-the norm of the gradients in the backward pass:
-
-```py
-@tf.custom_gradient
-def clip_gradient_by_norm(x, norm):
-  y = tf.identity(x)
-  def grad_fn(dresult):
-    return [tf.clip_by_norm(dresult, norm), None]
-  return y, grad_fn
-```
-
-Custom gradients are commonly used to provide a numerically stable gradient for a
-sequence of operations:
-
-```py
-def log1pexp(x):
-  return tf.log(1 + tf.exp(x))
-grad_log1pexp = tfe.gradients_function(log1pexp)
-
-# The gradient computation works fine at x = 0.
-grad_log1pexp(0.)  # => [0.5]
-
-# However, x = 100 fails because of numerical instability.
-grad_log1pexp(100.)  # => [nan]
-```
-
-Here, the `log1pexp` function can be analytically simplified with a custom
-gradient. The implementation below reuses the value for `tf.exp(x)` that is
-computed during the forward pass—making it more efficient by eliminating
-redundant calculations:
-
-```py
-@tf.custom_gradient
-def log1pexp(x):
-  e = tf.exp(x)
-  def grad(dy):
-    return dy * (1 - 1 / (1 + e))
-  return tf.log(1 + e), grad
-
-grad_log1pexp = tfe.gradients_function(log1pexp)
-
-# As before, the gradient computation works fine at x = 0.
-grad_log1pexp(0.)  # => [0.5]
-
-# And the gradient computation also works at x = 100.
-grad_log1pexp(100.)  # => [1.0]
-```
-
-## Performance
-
-Computation is automatically offloaded to GPUs during eager execution. If you
-want control over where a computation runs you can enclose it in a
-`tf.device('/gpu:0')` block (or the CPU equivalent):
-
-```py
-import time
-
-def measure(x, steps):
-  # TensorFlow initializes a GPU the first time it's used, exclude from timing.
-  tf.matmul(x, x)
-  start = time.time()
-  for i in range(steps):
-    x = tf.matmul(x, x)
-    _ = x.numpy()  # Make sure to execute op and not just enqueue it
-  end = time.time()
-  return end - start
-
-shape = (1000, 1000)
-steps = 200
-print("Time to multiply a {} matrix by itself {} times:".format(shape, steps))
-
-# Run on CPU:
-with tf.device("/cpu:0"):
-  print("CPU: {} secs".format(measure(tf.random_normal(shape), steps)))
-
-# Run on GPU, if available:
-if tfe.num_gpus() > 0:
-  with tf.device("/gpu:0"):
-    print("GPU: {} secs".format(measure(tf.random_normal(shape), steps)))
-else:
-  print("GPU: not found")
-```
-
-Output (exact numbers depend on hardware):
-
-```
-Time to multiply a (1000, 1000) matrix by itself 200 times:
-CPU: 4.614904403686523 secs
-GPU: 0.5581181049346924 secs
-```
-
-A `tf.Tensor` object can be copied to a different device to execute its
-operations:
-
-```py
-x = tf.random_normal([10, 10])
-
-x_gpu0 = x.gpu()
-x_cpu = x.cpu()
-
-_ = tf.matmul(x_cpu, x_cpu)    # Runs on CPU
-_ = tf.matmul(x_gpu0, x_gpu0)  # Runs on GPU:0
-
-if tfe.num_gpus() > 1:
-  x_gpu1 = x.gpu(1)
-  _ = tf.matmul(x_gpu1, x_gpu1)  # Runs on GPU:1
-```
-
-### Benchmarks
-
-For compute-heavy models, such as
-[ResNet50](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/examples/resnet50)
-training on a GPU, eager execution performance is comparable to graph execution.
-But this gap grows larger for models with less computation and there is work to
-be done for optimizing hot code paths for models with lots of small operations.
-
-
-## Work with graphs
-
-While eager execution makes development and debugging more interactive,
-TensorFlow graph execution has advantages for distributed training, performance
-optimizations, and production deployment. However, writing graph code can feel
-different than writing regular Python code and more difficult to debug.
-
-For building and training graph-constructed models, the Python program first
-builds a graph representing the computation, then invokes `Session.run` to send
-the graph for execution on the C++-based runtime.  This provides:
-
-* Automatic differentiation using static autodiff.
-* Simple deployment to a platform independent server.
-* Graph-based optimizations (common subexpression elimination, constant-folding, etc.).
-* Compilation and kernel fusion.
-* Automatic distribution and replication (placing nodes on the distributed system).
-
-Deploying code written for eager execution is more difficult: either generate a
-graph from the model, or run the Python runtime and code directly on the server.
-
-### Write compatible code
-
-The same code written for eager execution will also build a graph during graph
-execution. Do this by simply running the same code in a new Python session where
-eager execution is not enabled.
-
-Most TensorFlow operations work during eager execution, but there are some things
-to keep in mind:
-
-* Use `tf.data` for input processing instead of queues. It's faster and easier.
-* Use object-oriented layer APIs—like `tf.keras.layers` and
-  `tf.keras.Model`—since they have explicit storage for variables.
-* Most model code works the same during eager and graph execution, but there are
-  exceptions. (For example, dynamic models using Python control flow to change the
-  computation based on inputs.)
-* Once eager execution is enabled with `tf.enable_eager_execution`, it
-  cannot be turned off. Start a new Python session to return to graph execution.
-
-It's best to write code for both eager execution *and* graph execution. This
-gives you eager's interactive experimentation and debuggability with the
-distributed performance benefits of graph execution.
-
-Write, debug, and iterate in eager execution, then import the model graph for
-production deployment. Use `tfe.Checkpoint` to save and restore model
-variables, this allows movement between eager and graph execution environments.
-See the examples in:
-[tensorflow/contrib/eager/python/examples](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/examples).
-
-### Use eager execution in a graph environment
-
-Selectively enable eager execution in a TensorFlow graph environment using
-`tfe.py_func`. This is used when `tf.enable_eager_execution()` has *not*
-been called.
-
-```py
-def my_py_func(x):
-  x = tf.matmul(x, x)  # You can use tf ops
-  print(x)  # but it's eager!
-  return x
-
-with tf.Session() as sess:
-  x = tf.placeholder(dtype=tf.float32)
-  # Call eager function in graph!
-  pf = tfe.py_func(my_py_func, [x], tf.float32)
-  sess.run(pf, feed_dict={x: [[2.0]]})  # [[4.0]]
-```
diff --git a/tensorflow/docs_src/programmers_guide/embedding.md b/tensorflow/docs_src/programmers_guide/embedding.md
deleted file mode 100644
index 8a98367dfb..0000000000
--- a/tensorflow/docs_src/programmers_guide/embedding.md
+++ /dev/null
@@ -1,262 +0,0 @@
-# Embeddings
-
-This document introduces the concept of embeddings, gives a simple example of
-how to train an embedding in TensorFlow, and explains how to view embeddings
-with the TensorBoard Embedding Projector
-([live example](http://projector.tensorflow.org)). The first two parts target
-newcomers to machine learning or TensorFlow, and the Embedding Projector how-to
-is for users at all levels.
-
-An alternative tutorial on these concepts is available in the
-[Embeddings section of Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course/embeddings/video-lecture).
-
-[TOC]
-
-An **embedding** is a mapping from discrete objects, such as words, to vectors
-of real numbers. For example, a 300-dimensional embedding for English words
-could include:
-
-```
-blue:  (0.01359, 0.00075997, 0.24608, ..., -0.2524, 1.0048, 0.06259)
-blues:  (0.01396, 0.11887, -0.48963, ..., 0.033483, -0.10007, 0.1158)
-orange:  (-0.24776, -0.12359, 0.20986, ..., 0.079717, 0.23865, -0.014213)
-oranges:  (-0.35609, 0.21854, 0.080944, ..., -0.35413, 0.38511, -0.070976)
-```
-
-The individual dimensions in these vectors typically have no inherent meaning.
-Instead, it's the overall patterns of location and distance between vectors
-that machine learning takes advantage of.
-
-Embeddings are important for input to machine learning. Classifiers, and neural
-networks more generally, work on vectors of real numbers. They train best on
-dense vectors, where all values contribute to define an object. However, many
-important inputs to machine learning, such as words of text, do not have a
-natural vector representation. Embedding functions are the standard and
-effective way to transform such discrete input objects into useful
-continuous vectors.
-
-Embeddings are also valuable as outputs of machine learning. Because embeddings
-map objects to vectors, applications can use similarity in vector space (for
-instance, Euclidean distance or the angle between vectors) as a robust and
-flexible measure of object similarity. One common use is to find nearest
-neighbors.  Using the same word embeddings as above, for instance, here are the
-three nearest neighbors for each word and the corresponding angles:
-
-```
-blue:  (red, 47.6°), (yellow, 51.9°), (purple, 52.4°)
-blues:  (jazz, 53.3°), (folk, 59.1°), (bluegrass, 60.6°)
-orange:  (yellow, 53.5°), (colored, 58.0°), (bright, 59.9°)
-oranges:  (apples, 45.3°), (lemons, 48.3°), (mangoes, 50.4°)
-```
-
-This would tell an application that apples and oranges are in some way more
-similar (45.3° apart) than lemons and oranges (48.3° apart).
-
-## Embeddings in TensorFlow
-
-To create word embeddings in TensorFlow, we first split the text into words
-and then assign an integer to every word in the vocabulary. Let us assume that
-this has already been done, and that `word_ids` is a vector of these integers.
-For example, the sentence “I have a cat.” could be split into
-`[“I”, “have”, “a”, “cat”, “.”]` and then the corresponding `word_ids` tensor
-would have shape `[5]` and consist of 5 integers. To map these word ids
-to vectors, we need to create the embedding variable and use the
-`tf.nn.embedding_lookup` function as follows:
-
-```
-word_embeddings = tf.get_variable(“word_embeddings”,
-    [vocabulary_size, embedding_size])
-embedded_word_ids = tf.nn.embedding_lookup(word_embeddings, word_ids)
-```
-
-After this, the tensor `embedded_word_ids` will have shape `[5, embedding_size]`
-in our example and contain the embeddings (dense vectors) for each of the 5
-words. At the end of training, `word_embeddings` will contain the embeddings
-for all words in the vocabulary.
-
-Embeddings can be trained in many network types, and with various loss
-functions and data sets. For example, one could use a recurrent neural network
-to predict the next word from the previous one given a large corpus of
-sentences, or one could train two networks to do multi-lingual translation.
-These methods are described in the @{$word2vec$Vector Representations of Words}
-tutorial.
-
-## Visualizing Embeddings
-
-TensorBoard includes the **Embedding Projector**, a tool that lets you
-interactively visualize embeddings. This tool can read embeddings from your
-model and render them in two or three dimensions.
-
-The Embedding Projector has three panels:
-
-- *Data panel* on the top left, where you can choose the run, the embedding
-  variable and data columns to color and label points by.
-- *Projections panel* on the bottom left, where you can choose the type of
-  projection.
-- *Inspector panel* on the right side, where you can search for particular
-  points and see a list of nearest neighbors.
-
-### Projections
-The Embedding Projector provides three ways to reduce the dimensionality of a
-data set.
-
-- *[t-SNE](https://en.wikipedia.org/wiki/T-distributed_stochastic_neighbor_embedding)*:
-  a nonlinear nondeterministic algorithm (T-distributed stochastic neighbor
-  embedding) that tries to preserve local neighborhoods in the data, often at
-  the expense of distorting global structure. You can choose whether to compute
-  two- or three-dimensional projections.
-
-- *[PCA](https://en.wikipedia.org/wiki/Principal_component_analysis)*:
-  a linear deterministic algorithm (principal component analysis) that tries to
-  capture as much of the data variability in as few dimensions as possible. PCA
-  tends to highlight large-scale structure in the data, but can distort local
-  neighborhoods. The Embedding Projector computes the top 10 principal
-  components, from which you can choose two or three to view.
-
-- *Custom*: a linear projection onto horizontal and vertical axes that you
-  specify using labels in the data. You define the horizontal axis, for
-  instance, by giving text patterns for "Left" and "Right". The Embedding
-  Projector finds all points whose label matches the "Left" pattern and
-  computes the centroid of that set; similarly for "Right".  The line passing
-  through these two centroids defines the horizontal axis. The vertical axis is
-  likewise computed from the centroids for points matching the "Up" and "Down"
-  text patterns.
-
-Further useful articles are
-[How to Use t-SNE Effectively](https://distill.pub/2016/misread-tsne/) and
-[Principal Component Analysis Explained Visually](http://setosa.io/ev/principal-component-analysis/).
-
-### Exploration
-
-You can explore visually by zooming, rotating, and panning using natural
-click-and-drag gestures. Hovering your mouse over a point will show any
-[metadata](#metadata) for that point.  You can also inspect nearest-neighbor
-subsets.  Clicking on a point causes the right pane to list the nearest
-neighbors, along with distances to the current point. The nearest-neighbor
-points are also highlighted in the projection.
-
-It is sometimes useful to restrict the view to a subset of points and perform
-projections only on those points. To do so, you can select points in multiple
-ways:
-
-- After clicking on a point, its nearest neighbors are also selected.
-- After a search, the points matching the query are selected.
-- Enabling selection, clicking on a point and dragging defines a selection
-  sphere.
-
-Then click the "Isolate *nnn* points" button at the top of the Inspector pane
-on the right hand side. The following image shows 101 points selected and ready
-for the user to click "Isolate 101 points":
-
-![Selection of nearest neighbors](https://www.tensorflow.org/images/embedding-nearest-points.png "Selection of nearest neighbors")
-
-*Selection of the nearest neighbors of “important” in a word embedding dataset.*
-
-Advanced tip: filtering with custom projection can be powerful. Below, we
-filtered the 100 nearest neighbors of “politics” and projected them onto the
-“worst” - “best” vector as an x axis. The y axis is random. As a result, one
-finds on the right side “ideas”, “science”, “perspective”, “journalism” but on
-the left “crisis”, “violence” and “conflict”.
-
-<table width="100%;">
-  <tr>
-    <td style="width: 30%;">
-      <img src="https://www.tensorflow.org/images/embedding-custom-controls.png" alt="Custom controls panel" title="Custom controls panel" />
-    </td>
-    <td style="width: 70%;">
-      <img src="https://www.tensorflow.org/images/embedding-custom-projection.png" alt="Custom projection" title="Custom projection" />
-    </td>
-  </tr>
-  <tr>
-    <td style="width: 30%;">
-      Custom projection controls.
-    </td>
-    <td style="width: 70%;">
-      Custom projection of neighbors of "politics" onto "best" - "worst" vector.
-    </td>
-  </tr>
-</table>
-
-To share your findings, you can use the bookmark panel in the bottom right
-corner and save the current state (including computed coordinates of any
-projection) as a small file. The Projector can then be pointed to a set of one
-or more of these files, producing the panel below. Other users can then walk
-through a sequence of bookmarks.
-
-<img src="https://www.tensorflow.org/images/embedding-bookmark.png" alt="Bookmark panel" style="width:300px;">
-
-### Metadata
-
-If you are working with an embedding, you'll probably want to attach
-labels/images to the data points. You can do this by generating a metadata file
-containing the labels for each point and clicking "Load data" in the data panel
-of the Embedding Projector.
-
-The metadata can be either labels or images, which are
-stored in a separate file. For labels, the format should
-be a [TSV file](https://en.wikipedia.org/wiki/Tab-separated_values)
-(tab characters shown in red) whose first line contains column headers
-(shown in bold) and subsequent lines contain the metadata values. For example:
-
-<code>
-<b>Word<span style="color:#800;">\t</span>Frequency</b><br/>
-  Airplane<span style="color:#800;">\t</span>345<br/>
-  Car<span style="color:#800;">\t</span>241<br/>
-  ...
-</code>
-
-The order of lines in the metadata file is assumed to match the order of
-vectors in the embedding variable, except for the header.  Consequently, the
-(i+1)-th line in the metadata file corresponds to the i-th row of the embedding
-variable.  If the TSV metadata file has only a single column, then we don’t
-expect a header row, and assume each row is the label of the embedding. We
-include this exception because it matches the commonly-used "vocab file"
-format.
-
-To use images as metadata, you must produce a single
-[sprite image](https://www.google.com/webhp#q=what+is+a+sprite+image),
-consisting of small thumbnails, one for each vector in the embedding.  The
-sprite should store thumbnails in row-first order: the first data point placed
-in the top left and the last data point in the bottom right, though the last
-row doesn't have to be filled, as shown below.
-
-<table style="border: none;">
-<tr style="background-color: transparent;">
-  <td style="border: 1px solid black">0</td>
-  <td style="border: 1px solid black">1</td>
-  <td style="border: 1px solid black">2</td>
-</tr>
-<tr style="background-color: transparent;">
-  <td style="border: 1px solid black">3</td>
-  <td style="border: 1px solid black">4</td>
-  <td style="border: 1px solid black">5</td>
-</tr>
-<tr style="background-color: transparent;">
-  <td style="border: 1px solid black">6</td>
-  <td style="border: 1px solid black">7</td>
-  <td style="border: 1px solid black"></td>
-</tr>
-</table>
-
-Follow [this link](https://www.tensorflow.org/images/embedding-mnist.mp4)
-to see a fun example of thumbnail images in the Embedding Projector.
-
-
-## Mini-FAQ
-
-**Is "embedding" an action or a thing?**
-Both. People talk about embedding words in a vector space (action) and about
-producing word embeddings (things).  Common to both is the notion of embedding
-as a mapping from discrete objects to vectors. Creating or applying that
-mapping is an action, but the mapping itself is a thing.
-
-**Are embeddings high-dimensional or low-dimensional?**
-It depends. A 300-dimensional vector space of words and phrases, for instance,
-is often called low-dimensional (and dense) when compared to the millions of
-words and phrases it can contain. But mathematically it is high-dimensional,
-displaying many properties that are dramatically different from what our human
-intuition has learned about 2- and 3-dimensional spaces.
-
-**Is an embedding the same as an embedding layer?**
-No. An *embedding layer* is a part of neural network, but an *embedding* is a more
-general concept.
diff --git a/tensorflow/docs_src/programmers_guide/estimators.md b/tensorflow/docs_src/programmers_guide/estimators.md
deleted file mode 100644
index b13b47184d..0000000000
--- a/tensorflow/docs_src/programmers_guide/estimators.md
+++ /dev/null
@@ -1,193 +0,0 @@
-# Estimators
-
-This document introduces @{tf.estimator$**Estimators**}--a high-level TensorFlow
-API that greatly simplifies machine learning programming. Estimators encapsulate
-the following actions:
-
-*   training
-*   evaluation
-*   prediction
-*   export for serving
-
-You may either use the pre-made Estimators we provide or write your
-own custom Estimators.  All Estimators--whether pre-made or custom--are
-classes based on the @{tf.estimator.Estimator} class.
-
-Note: TensorFlow also includes a deprecated `Estimator` class at
-@{tf.contrib.learn.Estimator}, which you should not use.
-
-
-## Advantages of Estimators
-
-Estimators provide the following benefits:
-
-*   You can run Estimator-based models on a local host or on a
-    distributed multi-server environment without changing your model.
-    Furthermore, you can run Estimator-based models on CPUs, GPUs,
-    or TPUs without recoding your model.
-*   Estimators simplify sharing implementations between model developers.
-*   You can develop a state of the art model with high-level intuitive code.
-    In short, it is generally much easier to create models with Estimators
-    than with the low-level TensorFlow APIs.
-*   Estimators are themselves built on @{tf.layers}, which
-    simplifies customization.
-*   Estimators build the graph for you.
-*   Estimators provide a safe distributed training loop that controls how and
-    when to:
-    *   build the graph
-    *   initialize variables
-    *   start queues
-    *   handle exceptions
-    *   create checkpoint files and recover from failures
-    *   save summaries for TensorBoard
-
-When writing an application with Estimators, you must separate the data input
-pipeline from the model.  This separation simplifies experiments with
-different data sets.
-
-
-## Pre-made Estimators
-
-Pre-made Estimators enable you to work at a much higher conceptual level
-than the base TensorFlow APIs. You no longer have to worry about creating
-the computational graph or sessions since Estimators handle all
-the "plumbing" for you.  That is, pre-made Estimators create and manage
-@{tf.Graph$`Graph`} and @{tf.Session$`Session`} objects for you.  Furthermore,
-pre-made Estimators let you experiment with different model architectures by
-making only minimal code changes.  @{tf.estimator.DNNClassifier$`DNNClassifier`},
-for example, is a pre-made Estimator class that trains classification models
-based on dense, feed-forward neural networks.
-
-
-### Structure of a pre-made Estimators program
-
-A TensorFlow program relying on a pre-made Estimator typically consists
-of the following four steps:
-
-1.  **Write one or more dataset importing functions.** For example, you might
-    create one function to import the training set and another function to
-    import the test set. Each dataset importing function must return two
-    objects:
-
-    *   a dictionary in which the keys are feature names and the
-        values are Tensors (or SparseTensors) containing the corresponding
-        feature data
-    *   a Tensor containing one or more labels
-
-    For example, the following code illustrates the basic skeleton for
-    an input function:
-
-        def input_fn(dataset):
-           ...  # manipulate dataset, extracting the feature dict and the label
-           return feature_dict, label
-
-    (See @{$programmers_guide/datasets} for full details.)
-
-2.  **Define the feature columns.** Each @{tf.feature_column}
-    identifies a feature name, its type, and any input pre-processing.
-    For example, the following snippet creates three feature
-    columns that hold integer or floating-point data.  The first two
-    feature columns simply identify the feature's name and type. The
-    third feature column also specifies a lambda the program will invoke
-    to scale the raw data:
-
-        # Define three numeric feature columns.
-        population = tf.feature_column.numeric_column('population')
-        crime_rate = tf.feature_column.numeric_column('crime_rate')
-        median_education = tf.feature_column.numeric_column('median_education',
-                            normalizer_fn=lambda x: x - global_education_mean)
-
-3.  **Instantiate the relevant pre-made Estimator.**  For example, here's
-    a sample instantiation of a pre-made Estimator named `LinearClassifier`:
-
-        # Instantiate an estimator, passing the feature columns.
-        estimator = tf.estimator.LinearClassifier(
-            feature_columns=[population, crime_rate, median_education],
-            )
-
-4.  **Call a training, evaluation, or inference method.**
-    For example, all Estimators provide a `train` method, which trains a model.
-
-        # my_training_set is the function created in Step 1
-        estimator.train(input_fn=my_training_set, steps=2000)
-
-
-### Benefits of pre-made Estimators
-
-Pre-made Estimators encode best practices, providing the following benefits:
-
-*   Best practices for determining where different parts of the computational
-    graph should run, implementing strategies on a single machine or on a
-    cluster.
-*   Best practices for event (summary) writing and universally useful
-    summaries.
-
-If you don't use pre-made Estimators, you must implement the preceding
-features yourself.
-
-
-## Custom Estimators
-
-The heart of every Estimator--whether pre-made or custom--is its
-**model function**, which is a method that builds graphs for training,
-evaluation, and prediction. When you are using a pre-made Estimator,
-someone else has already implemented the model function. When relying
-on a custom Estimator, you must write the model function yourself. A
-@{$custom_estimators$companion document}
-explains how to write the model function.
-
-
-## Recommended workflow
-
-We recommend the following workflow:
-
-1.  Assuming a suitable pre-made Estimator exists, use it to build your
-    first model and use its results to establish a baseline.
-2.  Build and test your overall pipeline, including the integrity and
-    reliability of your data with this pre-made Estimator.
-3.  If suitable alternative pre-made Estimators are available, run
-    experiments to determine which pre-made Estimator produces the
-    best results.
-4.  Possibly, further improve your model by building your own custom Estimator.
-
-
-## Creating Estimators from Keras models
-
-You can convert existing Keras models to Estimators. Doing so enables your Keras
-model to access Estimator's strengths, such as distributed training. Call
-@{tf.keras.estimator.model_to_estimator} as in the
-following sample:
-
-```python
-# Instantiate a Keras inception v3 model.
-keras_inception_v3 = tf.keras.applications.inception_v3.InceptionV3(weights=None)
-# Compile model with the optimizer, loss, and metrics you'd like to train with.
-keras_inception_v3.compile(optimizer=tf.keras.optimizers.SGD(lr=0.0001, momentum=0.9),
-                          loss='categorical_crossentropy',
-                          metric='accuracy')
-# Create an Estimator from the compiled Keras model. Note the initial model
-# state of the keras model is preserved in the created Estimator.
-est_inception_v3 = tf.keras.estimator.model_to_estimator(keras_model=keras_inception_v3)
-
-# Treat the derived Estimator as you would with any other Estimator.
-# First, recover the input name(s) of Keras model, so we can use them as the
-# feature column name(s) of the Estimator input function:
-keras_inception_v3.input_names  # print out: ['input_1']
-# Once we have the input name(s), we can create the input function, for example,
-# for input(s) in the format of numpy ndarray:
-train_input_fn = tf.estimator.inputs.numpy_input_fn(
-    x={"input_1": train_data},
-    y=train_labels,
-    num_epochs=1,
-    shuffle=False)
-# To train, we call Estimator's train function:
-est_inception_v3.train(input_fn=train_input_fn, steps=2000)
-```
-Note that the names of feature columns and labels of a keras estimator come from
-the corresponding compiled keras model. For example, the input key names for
-`train_input_fn` above can be obtained from `keras_inception_v3.input_names`,
-and similarly, the predicted output names can be obtained from
-`keras_inception_v3.output_names`.
-
-For more details, please refer to the documentation for
-@{tf.keras.estimator.model_to_estimator}.
diff --git a/tensorflow/docs_src/programmers_guide/faq.md b/tensorflow/docs_src/programmers_guide/faq.md
deleted file mode 100644
index b6291a9ffa..0000000000
--- a/tensorflow/docs_src/programmers_guide/faq.md
+++ /dev/null
@@ -1,297 +0,0 @@
-# Frequently Asked Questions
-
-This document provides answers to some of the frequently asked questions about
-TensorFlow. If you have a question that is not covered here, you might find an
-answer on one of the TensorFlow @{$about$community resources}.
-
-[TOC]
-
-## Features and Compatibility
-
-#### Can I run distributed training on multiple computers?
-
-Yes! TensorFlow gained
-@{$distributed$support for distributed computation} in
-version 0.8. TensorFlow now supports multiple devices (CPUs and GPUs) in one or
-more computers.
-
-#### Does TensorFlow work with Python 3?
-
-As of the 0.6.0 release timeframe (Early December 2015), we do support Python
-3.3+.
-
-## Building a TensorFlow graph
-
-See also the
-@{$python/framework$API documentation on building graphs}.
-
-#### Why does `c = tf.matmul(a, b)` not execute the matrix multiplication immediately?
-
-In the TensorFlow Python API, `a`, `b`, and `c` are
-@{tf.Tensor} objects. A `Tensor` object is
-a symbolic handle to the result of an operation, but does not actually hold the
-values of the operation's output. Instead, TensorFlow encourages users to build
-up complicated expressions (such as entire neural networks and its gradients) as
-a dataflow graph. You then offload the computation of the entire dataflow graph
-(or a subgraph of it) to a TensorFlow
-@{tf.Session}, which is able to execute the
-whole computation much more efficiently than executing the operations
-one-by-one.
-
-#### How are devices named?
-
-The supported device names are `"/device:CPU:0"` (or `"/cpu:0"`) for the CPU
-device, and `"/device:GPU:i"` (or `"/gpu:i"`) for the *i*th GPU device.
-
-#### How do I place operations on a particular device?
-
-To place a group of operations on a device, create them within a
-@{tf.device$`with tf.device(name):`} context.  See
-the how-to documentation on
-@{$using_gpu$using GPUs with TensorFlow} for details of how
-TensorFlow assigns operations to devices, and the
-@{$deep_cnn$CIFAR-10 tutorial} for an example model that
-uses multiple GPUs.
-
-
-## Running a TensorFlow computation
-
-See also the
-@{$python/client$API documentation on running graphs}.
-
-#### What's the deal with feeding and placeholders?
-
-Feeding is a mechanism in the TensorFlow Session API that allows you to
-substitute different values for one or more tensors at run time. The `feed_dict`
-argument to @{tf.Session.run} is a
-dictionary that maps @{tf.Tensor} objects to
-numpy arrays (and some other types), which will be used as the values of those
-tensors in the execution of a step.
-
-#### What is the difference between `Session.run()` and `Tensor.eval()`?
-
-If `t` is a @{tf.Tensor} object,
-@{tf.Tensor.eval} is shorthand for
-@{tf.Session.run}, where `sess` is the
-current @{tf.get_default_session}. The
-two following snippets of code are equivalent:
-
-```python
-# Using `Session.run()`.
-sess = tf.Session()
-c = tf.constant(5.0)
-print(sess.run(c))
-
-# Using `Tensor.eval()`.
-c = tf.constant(5.0)
-with tf.Session():
-  print(c.eval())
-```
-
-In the second example, the session acts as a
-[context manager](https://docs.python.org/2.7/reference/compound_stmts.html#with),
-which has the effect of installing it as the default session for the lifetime of
-the `with` block. The context manager approach can lead to more concise code for
-simple use cases (like unit tests); if your code deals with multiple graphs and
-sessions, it may be more straightforward to make explicit calls to
-`Session.run()`.
-
-#### Do Sessions have a lifetime? What about intermediate tensors?
-
-Sessions can own resources, such as
-@{tf.Variable},
-@{tf.QueueBase}, and
-@{tf.ReaderBase}. These resources can sometimes use
-a significant amount of memory, and can be released when the session is closed by calling
-@{tf.Session.close}.
-
-The intermediate tensors that are created as part of a call to
-@{$python/client$`Session.run()`} will be freed at or before the
-end of the call.
-
-#### Does the runtime parallelize parts of graph execution?
-
-The TensorFlow runtime parallelizes graph execution across many different
-dimensions:
-
-* The individual ops have parallel implementations, using multiple cores in a
-  CPU, or multiple threads in a GPU.
-* Independent nodes in a TensorFlow graph can run in parallel on multiple
-  devices, which makes it possible to speed up
-  @{$deep_cnn$CIFAR-10 training using multiple GPUs}.
-* The Session API allows multiple concurrent steps (i.e. calls to
-  @{tf.Session.run} in parallel). This
-  enables the runtime to get higher throughput, if a single step does not use
-  all of the resources in your computer.
-
-#### Which client languages are supported in TensorFlow?
-
-TensorFlow is designed to support multiple client languages.
-Currently, the best-supported client language is [Python](../api_docs/python/index.md). Experimental interfaces for
-executing and constructing graphs are also available for
-[C++](../api_docs/cc/index.md), [Java](../api_docs/java/reference/org/tensorflow/package-summary.html) and [Go](https://godoc.org/github.com/tensorflow/tensorflow/tensorflow/go).
-
-TensorFlow also has a
-[C-based client API](https://www.tensorflow.org/code/tensorflow/c/c_api.h)
-to help build support for more client languages.  We invite contributions of new
-language bindings.
-
-Bindings for various other languages (such as [C#](https://github.com/migueldeicaza/TensorFlowSharp), [Julia](https://github.com/malmaud/TensorFlow.jl), [Ruby](https://github.com/somaticio/tensorflow.rb) and [Scala](https://github.com/eaplatanios/tensorflow_scala)) created and supported by the open source community build on top of the C API supported by the TensorFlow maintainers.
-
-#### Does TensorFlow make use of all the devices (GPUs and CPUs) available on my machine?
-
-TensorFlow supports multiple GPUs and CPUs. See the how-to documentation on
-@{$using_gpu$using GPUs with TensorFlow} for details of how
-TensorFlow assigns operations to devices, and the
-@{$deep_cnn$CIFAR-10 tutorial} for an example model that
-uses multiple GPUs.
-
-Note that TensorFlow only uses GPU devices with a compute capability greater
-than 3.5.
-
-#### Why does `Session.run()` hang when using a reader or a queue?
-
-The @{tf.ReaderBase} and
-@{tf.QueueBase} classes provide special operations that
-can *block* until input (or free space in a bounded queue) becomes
-available. These operations allow you to build sophisticated
-@{$reading_data$input pipelines}, at the cost of making the
-TensorFlow computation somewhat more complicated. See the how-to documentation
-for
-@{$reading_data#creating_threads_to_prefetch_using_queuerunner_objects$using `QueueRunner` objects to drive queues and readers}
-for more information on how to use them.
-
-## Variables
-
-See also the how-to documentation on @{$variables$variables} and
-@{$python/state_ops$the API documentation for variables}.
-
-#### What is the lifetime of a variable?
-
-A variable is created when you first run the
-@{tf.Variable.initializer}
-operation for that variable in a session. It is destroyed when that
-@{tf.Session.close}.
-
-#### How do variables behave when they are concurrently accessed?
-
-Variables allow concurrent read and write operations. The value read from a
-variable may change if it is concurrently updated. By default, concurrent
-assignment operations to a variable are allowed to run with no mutual exclusion.
-To acquire a lock when assigning to a variable, pass `use_locking=True` to
-@{tf.Variable.assign}.
-
-## Tensor shapes
-
-See also the
-@{tf.TensorShape}.
-
-#### How can I determine the shape of a tensor in Python?
-
-In TensorFlow, a tensor has both a static (inferred) shape and a dynamic (true)
-shape. The static shape can be read using the
-@{tf.Tensor.get_shape}
-method: this shape is inferred from the operations that were used to create the
-tensor, and may be
-@{tf.TensorShape$partially complete}. If the static
-shape is not fully defined, the dynamic shape of a `Tensor` `t` can be
-determined by evaluating @{tf.shape$`tf.shape(t)`}.
-
-#### What is the difference between `x.set_shape()` and `x = tf.reshape(x)`?
-
-The @{tf.Tensor.set_shape} method updates
-the static shape of a `Tensor` object, and it is typically used to provide
-additional shape information when this cannot be inferred directly. It does not
-change the dynamic shape of the tensor.
-
-The @{tf.reshape} operation creates
-a new tensor with a different dynamic shape.
-
-#### How do I build a graph that works with variable batch sizes?
-
-It is often useful to build a graph that works with variable batch sizes 
-so that the same code can be used for (mini-)batch training, and
-single-instance inference. The resulting graph can be
-@{tf.Graph.as_graph_def$saved as a protocol buffer}
-and
-@{tf.import_graph_def$imported into another program}.
-
-When building a variable-size graph, the most important thing to remember is not
-to encode the batch size as a Python constant, but instead to use a symbolic
-`Tensor` to represent it. The following tips may be useful:
-
-* Use [`batch_size = tf.shape(input)[0]`](../api_docs/python/array_ops.md#shape)
-  to extract the batch dimension from a `Tensor` called `input`, and store it in
-  a `Tensor` called `batch_size`.
-
-* Use @{tf.reduce_mean} instead
-  of `tf.reduce_sum(...) / batch_size`.
-
-
-## TensorBoard
-
-#### How can I visualize a TensorFlow graph?
-
-See the @{$graph_viz$graph visualization tutorial}.
-
-#### What is the simplest way to send data to TensorBoard?
-
-Add summary ops to your TensorFlow graph, and write
-these summaries to a log directory.  Then, start TensorBoard using
-
-    python tensorflow/tensorboard/tensorboard.py --logdir=path/to/log-directory
-
-For more details, see the
-@{$summaries_and_tensorboard$Summaries and TensorBoard tutorial}.
-
-#### Every time I launch TensorBoard, I get a network security popup!
-
-You can change TensorBoard to serve on localhost rather than '0.0.0.0' by
-the flag --host=localhost. This should quiet any security warnings.
-
-## Extending TensorFlow
-
-See the how-to documentation for
-@{$adding_an_op$adding a new operation to TensorFlow}.
-
-#### My data is in a custom format. How do I read it using TensorFlow?
-
-There are three main options for dealing with data in a custom format.
-
-The easiest option is to write parsing code in Python that transforms the data
-into a numpy array. Then, use @{tf.data.Dataset.from_tensor_slices} to
-create an input pipeline from the in-memory data.
-
-If your data doesn't fit in memory, try doing the parsing in the Dataset
-pipeline. Start with an appropriate file reader, like
-@{tf.data.TextLineDataset}. Then convert the dataset by mapping
-@{tf.data.Dataset.map$mapping} appropriate operations over it.
-Prefer predefined TensorFlow operations such as @{tf.decode_raw},
-@{tf.decode_csv}, @{tf.parse_example}, or @{tf.image.decode_png}.
-
-If your data is not easily parsable with the built-in TensorFlow operations,
-consider converting it, offline, to a format that is easily parsable, such
-as @{tf.python_io.TFRecordWriter$`TFRecord`} format.
-
-The most efficient method to customize the parsing behavior is to
-@{$adding_an_op$add a new op written in C++} that parses your
-data format. The @{$new_data_formats$guide to handling new data formats} has
-more information about the steps for doing this.
-
-
-## Miscellaneous
-
-#### What is TensorFlow's coding style convention?
-
-The TensorFlow Python API adheres to the
-[PEP8](https://www.python.org/dev/peps/pep-0008/) conventions.<sup>*</sup> In
-particular, we use `CamelCase` names for classes, and `snake_case` names for
-functions, methods, and properties. We also adhere to the
-[Google Python style guide](https://google.github.io/styleguide/pyguide.html).
-
-The TensorFlow C++ code base adheres to the
-[Google C++ style guide](https://google.github.io/styleguide/cppguide.html).
-
-(<sup>*</sup> With one exception: we use 2-space indentation instead of 4-space
-indentation.)
-
diff --git a/tensorflow/docs_src/programmers_guide/feature_columns.md b/tensorflow/docs_src/programmers_guide/feature_columns.md
deleted file mode 100644
index 90f5c53a17..0000000000
--- a/tensorflow/docs_src/programmers_guide/feature_columns.md
+++ /dev/null
@@ -1,572 +0,0 @@
-# Feature Columns
-
-This document details feature columns. Think of **feature columns** as the
-intermediaries between raw data and Estimators. Feature columns are very rich,
-enabling you to transform a diverse range of raw data into formats that
-Estimators can use, allowing easy experimentation.
-
-In @{$premade_estimators$Premade Estimators}, we used the premade
-Estimator, @{tf.estimator.DNNClassifier$`DNNClassifier`} to train a model to
-predict different types of Iris flowers from four input features. That example
-created only numerical feature columns (of type
-@{tf.feature_column.numeric_column}). Although numerical feature columns model
-the lengths of petals and sepals effectively, real world data sets contain all
-kinds of features, many of which are non-numerical.
-
-<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src="../images/feature_columns/feature_cloud.jpg">
-</div>
-<div style="text-align: center">
-Some real-world features (such as, longitude) are numerical, but many are not.
-</div>
-
-## Input to a Deep Neural Network
-
-What kind of data can a deep neural network operate on? The answer
-is, of course, numbers (for example, `tf.float32`). After all, every neuron in
-a neural network performs multiplication and addition operations on weights and
-input data. Real-life input data, however, often contains non-numerical
-(categorical) data. For example, consider a `product_class` feature that can
-contain the following three non-numerical values:
-
-* `kitchenware`
-* `electronics`
-* `sports`
-
-ML models generally represent categorical values as simple vectors in which a
-1 represents the presence of a value and a 0 represents the absence of a value.
-For example, when `product_class` is set to `sports`, an ML model would usually
-represent `product_class` as  `[0, 0, 1]`, meaning:
-
-* `0`: `kitchenware` is absent
-* `0`: `electronics` is absent
-* `1`: `sports` is present
-
-So, although raw data can be numerical or categorical, an ML model represents
-all features as numbers.
-
-## Feature Columns
-
-As the following figure suggests, you specify the input to a model through the
-`feature_columns` argument of an Estimator (`DNNClassifier` for Iris).
-Feature Columns bridge input data (as returned by `input_fn`) with your model.
-
-<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src="../images/feature_columns/inputs_to_model_bridge.jpg">
-</div>
-<div style="text-align: center">
-Feature columns bridge raw data with the data your model needs.
-</div>
-
-To create feature columns, call functions from the
-@{tf.feature_column} module. This document explains nine of the functions in
-that module. As the following figure shows, all nine functions return either a
-Categorical-Column or a Dense-Column object, except `bucketized_column`, which
-inherits from both classes:
-
-<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src="../images/feature_columns/some_constructors.jpg">
-</div>
-<div style="text-align: center">
-Feature column methods fall into two main categories and one hybrid category.
-</div>
-
-Let's look at these functions in more detail.
-
-### Numeric column
-
-The Iris classifier calls the @{tf.feature_column.numeric_column} function for
-all input features:
-
-  * `SepalLength`
-  * `SepalWidth`
-  * `PetalLength`
-  * `PetalWidth`
-
-Although `tf.numeric_column` provides optional arguments, calling
-`tf.numeric_column` without any arguments, as follows, is a fine way to specify
-a numerical value with the default data type (`tf.float32`) as input to your
-model:
-
-```python
-# Defaults to a tf.float32 scalar.
-numeric_feature_column = tf.feature_column.numeric_column(key="SepalLength")
-```
-
-To specify a non-default numerical data type, use the `dtype` argument. For
-example:
-
-``` python
-# Represent a tf.float64 scalar.
-numeric_feature_column = tf.feature_column.numeric_column(key="SepalLength",
-                                                          dtype=tf.float64)
-```
-
-By default, a numeric column creates a single value (scalar). Use the shape
-argument to specify another shape. For example:
-
-<!--TODO(markdaoust) link to full example-->
-```python
-# Represent a 10-element vector in which each cell contains a tf.float32.
-vector_feature_column = tf.feature_column.numeric_column(key="Bowling",
-                                                         shape=10)
-
-# Represent a 10x5 matrix in which each cell contains a tf.float32.
-matrix_feature_column = tf.feature_column.numeric_column(key="MyMatrix",
-                                                         shape=[10,5])
-```
-### Bucketized column
-
-Often, you don't want to feed a number directly into the model, but instead
-split its value into different categories based on numerical ranges.  To do so,
-create a @{tf.feature_column.bucketized_column$bucketized column}. For
-example, consider raw data that represents the year a house was built. Instead
-of representing that year as a scalar numeric column, we could split the year
-into the following four buckets:
-
-<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src="../images/feature_columns/bucketized_column.jpg">
-</div>
-<div style="text-align: center">
-Dividing year data into four buckets.
-</div>
-
-The model will represent the buckets as follows:
-
-|Date Range |Represented as... |
-|:----------|:-----------------|
-|< 1960               | [1, 0, 0, 0] |
-|>= 1960 but < 1980   | [0, 1, 0, 0] |
-|>= 1980 but < 2000   | [0, 0, 1, 0] |
-|>= 2000              | [0, 0, 0, 1] |
-
-Why would you want to split a number—a perfectly valid input to your
-model—into a categorical value? Well, notice that the categorization splits a
-single input number into a four-element vector. Therefore, the model now can
-learn _four individual weights_ rather than just one; four weights creates a
-richer model than one weight. More importantly, bucketizing enables the model
-to clearly distinguish between different year categories since only one of the
-elements is set (1) and the other three elements are cleared (0). For example,
-when we just use a single number (a year) as input, a linear model can only
-learn a linear relationship. So, bucketing provides the model with additional
-flexibility that the model can use to learn.
-
-The following code demonstrates how to create a bucketized feature:
-
-<!--TODO(markdaoust) link to full example - housing price grid?-->
-```python
-# First, convert the raw input to a numeric column.
-numeric_feature_column = tf.feature_column.numeric_column("Year")
-
-# Then, bucketize the numeric column on the years 1960, 1980, and 2000.
-bucketized_feature_column = tf.feature_column.bucketized_column(
-    source_column = numeric_feature_column,
-    boundaries = [1960, 1980, 2000])
-```
-Note that specifying a _three_-element boundaries vector creates a
-_four_-element bucketized vector.
-
-
-### Categorical identity column
-
-**Categorical identity columns** can be seen as a special case of bucketized
-columns. In traditional bucketized columns, each bucket represents a range of
-values (for example, from 1960 to 1979). In a categorical identity column, each
-bucket represents a single, unique integer. For example, let's say you want to
-represent the integer range `[0, 4)`.  That is, you want to represent the
-integers 0, 1, 2, or 3. In this case, the categorical identity mapping looks
-like this:
-
-<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src="../images/feature_columns/categorical_column_with_identity.jpg">
-</div>
-<div style="text-align: center">
-A categorical identity column mapping. Note that this is a one-hot
-encoding, not a binary numerical encoding.
-</div>
-
-As with bucketized columns, a model can learn a separate weight for each class
-in a categorical identity column. For example, instead of using a string to
-represent the `product_class`, let's represent each class with a unique integer
-value. That is:
-
-* `0="kitchenware"`
-* `1="electronics"`
-* `2="sport"`
-
-Call @{tf.feature_column.categorical_column_with_identity} to implement a
-categorical identity column. For example:
-
-``` python
-# Create categorical output for an integer feature named "my_feature_b",
-# The values of my_feature_b must be >= 0 and < num_buckets
-identity_feature_column = tf.feature_column.categorical_column_with_identity(
-    key='my_feature_b',
-    num_buckets=4) # Values [0, 4)
-
-# In order for the preceding call to work, the input_fn() must return
-# a dictionary containing 'my_feature_b' as a key. Furthermore, the values
-# assigned to 'my_feature_b' must belong to the set [0, 4).
-def input_fn():
-    ...
-    return ({ 'my_feature_a':[7, 9, 5, 2], 'my_feature_b':[3, 1, 2, 2] },
-            [Label_values])
-```
-
-### Categorical vocabulary column
-
-We cannot input strings directly to a model. Instead, we must first map strings
-to numeric or categorical values. Categorical vocabulary columns provide a good
-way to represent strings as a one-hot vector. For example:
-
-<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src="../images/feature_columns/categorical_column_with_vocabulary.jpg">
-</div>
-<div style="text-align: center">
-Mapping string values to vocabulary columns.
-</div>
-
-As you can see, categorical vocabulary columns are kind of an enum version of
-categorical identity columns. TensorFlow provides two different functions to
-create categorical vocabulary columns:
-
-* @{tf.feature_column.categorical_column_with_vocabulary_list}
-* @{tf.feature_column.categorical_column_with_vocabulary_file}
-
-`categorical_column_with_vocabulary_list` maps each string to an integer based
-on an explicit vocabulary list. For example:
-
-```python
-# Given input "feature_name_from_input_fn" which is a string,
-# create a categorical feature by mapping the input to one of
-# the elements in the vocabulary list.
-vocabulary_feature_column =
-    tf.feature_column.categorical_column_with_vocabulary_list(
-        key=feature_name_from_input_fn,
-        vocabulary_list=["kitchenware", "electronics", "sports"])
-```
-
-The preceding function is pretty straightforward, but it has a significant
-drawback. Namely, there's way too much typing when the vocabulary list is long.
-For these cases, call
-`tf.feature_column.categorical_column_with_vocabulary_file` instead, which lets
-you place the vocabulary words in a separate file. For example:
-
-```python
-
-# Given input "feature_name_from_input_fn" which is a string,
-# create a categorical feature to our model by mapping the input to one of
-# the elements in the vocabulary file
-vocabulary_feature_column =
-    tf.feature_column.categorical_column_with_vocabulary_file(
-        key=feature_name_from_input_fn,
-        vocabulary_file="product_class.txt",
-        vocabulary_size=3)
-```
-
-`product_class.txt` should contain one line for each vocabulary element. In our
-case:
-
-```None
-kitchenware
-electronics
-sports
-```
-
-### Hashed Column
-
-So far, we've worked with a naively small number of categories. For example,
-our product_class example has only 3 categories. Often though, the number of
-categories can be so big that it's not possible to have individual categories
-for each vocabulary word or integer because that would consume too much memory.
-For these cases, we can instead turn the question around and ask, "How many
-categories am I willing to have for my input?"  In fact, the
-@{tf.feature_column.categorical_column_with_hash_bucket} function enables you
-to specify the number of categories. For this type of feature column the model
-calculates a hash value of the input, then puts it into one of
-the `hash_bucket_size` categories using the modulo operator, as in the following
-pseudocode:
-
-```python
-# pseudocode
-feature_id = hash(raw_feature) % hash_buckets_size
-```
-
-The code to create the `feature_column` might look something like this:
-
-``` python
-hashed_feature_column =
-    tf.feature_column.categorical_column_with_hash_bucket(
-        key = "some_feature",
-        hash_buckets_size = 100) # The number of categories
-```
-At this point, you might rightfully think: "This is crazy!" After all, we are
-forcing the different input values to a smaller set of categories. This means
-that two probably unrelated inputs will be mapped to the same
-category, and consequently mean the same thing to the neural network. The
-following figure illustrates this dilemma, showing that kitchenware and sports
-both get assigned to category (hash bucket) 12:
-
-<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src="../images/feature_columns/hashed_column.jpg">
-</div>
-<div style="text-align: center">
-Representing data with hash buckets.
-</div>
-
-As with many counterintuitive phenomena in machine learning, it turns out that
-hashing often works well in practice. That's because hash categories provide
-the model with some separation. The model can use additional features to further
-separate kitchenware from sports.
-
-### Crossed column
-
-Combining features into a single feature, better known as
-[feature crosses](https://developers.google.com/machine-learning/glossary/#feature_cross),
-enables the model to learn separate weights for each combination of
-features.
-
-More concretely, suppose we want our model to calculate real estate prices in
-Atlanta, GA. Real-estate prices within this city vary greatly depending on
-location. Representing latitude and longitude as separate features isn't very
-useful in identifying real-estate location dependencies; however, crossing
-latitude and longitude into a single feature can pinpoint locations. Suppose we
-represent Atlanta as a grid of 100x100 rectangular sections, identifying each
-of the 10,000 sections by a feature cross of latitude and longitude. This
-feature cross enables the model to train on pricing conditions related to each
-individual section, which is a much stronger signal than latitude and longitude
-alone.
-
-The following figure shows our plan, with the latitude & longitude values for
-the corners of the city in red text:
-
-<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src="../images/feature_columns/Atlanta.jpg">
-</div>
-<div style="text-align: center">
-Map of Atlanta. Imagine this map divided into 10,000 sections of
-equal size.
-</div>
-
-For the solution, we used a combination of the `bucketized_column` we looked at
-earlier, with the @{tf.feature_column.crossed_column} function.
-
-<!--TODO(markdaoust) link to full example-->
-
-``` python
-def make_dataset(latitude, longitude, labels):
-    assert latitude.shape == longitude.shape == labels.shape
-
-    features = {'latitude': latitude.flatten(),
-                'longitude': longitude.flatten()}
-    labels=labels.flatten()
-
-    return tf.data.Dataset.from_tensor_slices((features, labels))
-
-
-# Bucketize the latitude and longitude using the `edges`
-latitude_bucket_fc = tf.feature_column.bucketized_column(
-    tf.feature_column.numeric_column('latitude'),
-    list(atlanta.latitude.edges))
-
-longitude_bucket_fc = tf.feature_column.bucketized_column(
-    tf.feature_column.numeric_column('longitude'),
-    list(atlanta.longitude.edges))
-
-# Cross the bucketized columns, using 5000 hash bins.
-crossed_lat_lon_fc = tf.feature_column.crossed_column(
-    [latitude_bucket_fc, longitude_bucket_fc], 5000)
-
-fc = [
-    latitude_bucket_fc,
-    longitude_bucket_fc,
-    crossed_lat_lon_fc]
-
-# Build and train the Estimator.
-est = tf.estimator.LinearRegressor(fc, ...)
-```
-
-You may create a feature cross from either of the following:
-
-* Feature names; that is, names from the `dict` returned from `input_fn`.
-* Any categorical column, except `categorical_column_with_hash_bucket`
-  (since `crossed_column` hashes the input).
-
-When the feature columns `latitude_bucket_fc` and `longitude_bucket_fc` are
-crossed, TensorFlow will create `(latitude_fc, longitude_fc)` pairs for each
-example. This would produce a full grid of possibilities as follows:
-
-``` None
- (0,0),  (0,1)...  (0,99)
- (1,0),  (1,1)...  (1,99)
-   ...     ...       ...
-(99,0), (99,1)...(99, 99)
-```
-
-Except that a full grid would only be tractable for inputs with limited
-vocabularies. Instead of building this, potentially huge, table of inputs,
-the `crossed_column` only builds the number requested by the `hash_bucket_size`
-argument. The feature column assigns an example to a index by running a hash
-function on the tuple of inputs, followed by a modulo operation with
-`hash_bucket_size`.
-
-As discussed earlier, performing the
-hash and modulo function limits the number of categories, but can cause category
-collisions; that is, multiple (latitude, longitude) feature crosses will end
-up in the same hash bucket. In practice though, performing feature crosses
-still adds significant value to the learning capability of your models.
-
-Somewhat counterintuitively, when creating feature crosses, you typically still
-should include the original (uncrossed) features in your model (as in the
-preceding code snippet). The independent latitude and longitude features help the
-model distinguish between examples where a hash collision has occurred in the
-crossed feature.
-
-## Indicator and embedding columns
-
-Indicator columns and embedding columns never work on features directly, but
-instead take categorical columns as input.
-
-When using an indicator column, we're telling TensorFlow to do exactly what
-we've seen in our categorical product_class example. That is, an
-**indicator column** treats each category as an element in a one-hot vector,
-where the matching category has value 1 and the rest have 0s:
-
-<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src="../images/feature_columns/categorical_column_with_identity.jpg">
-</div>
-<div style="text-align: center">
-Representing data in indicator columns.
-</div>
-
-Here's how you create an indicator column by calling
-@{tf.feature_column.indicator_column}:
-
-``` python
-categorical_column = ... # Create any type of categorical column.
-
-# Represent the categorical column as an indicator column.
-indicator_column = tf.feature_column.indicator_column(categorical_column)
-```
-
-Now, suppose instead of having just three possible classes, we have a million.
-Or maybe a billion. For a number of reasons, as the number of categories grow
-large, it becomes infeasible to train a neural network using indicator columns.
-
-We can use an embedding column to overcome this limitation. Instead of
-representing the data as a one-hot vector of many dimensions, an
-**embedding column** represents that data as a lower-dimensional, ordinary
-vector in which each cell can contain any number, not just 0 or 1. By
-permitting a richer palette of numbers for every cell, an embedding column
-contains far fewer cells than an indicator column.
-
-Let's look at an example comparing indicator and embedding columns. Suppose our
-input examples consist of different words from a limited palette of only 81
-words. Further suppose that the data set provides the following input
-words in 4 separate examples:
-
-* `"dog"`
-* `"spoon"`
-* `"scissors"`
-* `"guitar"`
-
-In that case, the following figure illustrates the processing path for
-embedding columns or indicator columns.
-
-<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src="../images/feature_columns/embedding_vs_indicator.jpg">
-</div>
-<div style="text-align: center">
-An embedding column stores categorical data in a lower-dimensional
-vector than an indicator column. (We just placed random numbers into the
-embedding vectors; training determines the actual numbers.)
-</div>
-
-When an example is processed, one of the `categorical_column_with...` functions
-maps the example string to a numerical categorical value. For example, a
-function maps "spoon" to `[32]`. (The 32 comes from our imagination—the actual
-values depend on the mapping function.) You may then represent these numerical
-categorical values in either of the following two ways:
-
-* As an indicator column. A function converts each numeric categorical value
-  into an 81-element vector (because our palette consists of 81 words), placing
-  a 1 in the index of the categorical value (0, 32, 79, 80) and a 0 in all the
-  other positions.
-
-* As an embedding column. A function uses the numerical categorical values
-  `(0, 32, 79, 80)` as indices to a lookup table. Each slot in that lookup table
-  contains a 3-element vector.
-
-How do the values in the embeddings vectors magically get assigned? Actually,
-the assignments happen during training. That is, the model learns the best way
-to map your input numeric categorical values to the embeddings vector value in
-order to solve your problem. Embedding columns increase your model's
-capabilities, since an embeddings vector learns new relationships between
-categories from the training data.
-
-Why is the embedding vector size 3 in our example? Well, the following "formula"
-provides a general rule of thumb about the number of embedding dimensions:
-
-```python
-embedding_dimensions =  number_of_categories**0.25
-```
-
-That is, the embedding vector dimension should be the 4th root of the number of
-categories. Since our vocabulary size in this example is 81, the recommended
-number of dimensions is 3:
-
-``` python
-3 =  81**0.25
-```
-Note that this is just a general guideline; you can set the number of embedding
-dimensions as you please.
-
-Call @{tf.feature_column.embedding_column} to create an `embedding_column` as
-suggested by the following snippet:
-
-``` python
-categorical_column = ... # Create any categorical column
-
-# Represent the categorical column as an embedding column.
-# This means creating an embedding vector lookup table with one element for each category.
-embedding_column = tf.feature_column.embedding_column(
-    categorical_column=categorical_column,
-    dimension=embedding_dimensions)
-```
-
-@{$programmers_guide/embedding$Embeddings} is a significant topic within machine
-learning. This information was just to get you started using them as feature
-columns.
-
-## Passing feature columns to Estimators
-
-As the following list indicates, not all Estimators permit all types of
-`feature_columns` argument(s):
-
-* @{tf.estimator.LinearClassifier$`LinearClassifier`} and
-  @{tf.estimator.LinearRegressor$`LinearRegressor`}: Accept all types of
-  feature column.
-* @{tf.estimator.DNNClassifier$`DNNClassifier`} and
-  @{tf.estimator.DNNRegressor$`DNNRegressor`}: Only accept dense columns. Other
-  column types must be wrapped in either an `indicator_column` or
-  `embedding_column`.
-* @{tf.estimator.DNNLinearCombinedClassifier$`DNNLinearCombinedClassifier`} and
-  @{tf.estimator.DNNLinearCombinedRegressor$`DNNLinearCombinedRegressor`}:
-    * The `linear_feature_columns` argument accepts any feature column type.
-    * The `dnn_feature_columns` argument only accepts dense columns.
-
-## Other Sources
-
-For more examples on feature columns, view the following:
-
-* The @{$low_level_intro#feature_columns$Low Level Introduction} demonstrates how
-  experiment directly with `feature_columns` using TensorFlow's low level APIs.
-* The @{$wide$wide} and @{$wide_and_deep$Wide & Deep} Tutorials solve a
-  binary classification problem using `feature_columns` on a variety of input
-  data types.
-
-To learn more about embeddings, see the following:
-
-* [Deep Learning, NLP, and representations](http://colah.github.io/posts/2014-07-NLP-RNNs-Representations/)
-  (Chris Olah's blog)
-* The TensorFlow [Embedding Projector](http://projector.tensorflow.org)
diff --git a/tensorflow/docs_src/programmers_guide/graph_viz.md b/tensorflow/docs_src/programmers_guide/graph_viz.md
deleted file mode 100644
index f581ae56da..0000000000
--- a/tensorflow/docs_src/programmers_guide/graph_viz.md
+++ /dev/null
@@ -1,316 +0,0 @@
-# TensorBoard: Graph Visualization
-
-TensorFlow computation graphs are powerful but complicated. The graph visualization can help you understand and debug them. Here's an example of the visualization at work.
-
-![Visualization of a TensorFlow graph](https://www.tensorflow.org/images/graph_vis_animation.gif "Visualization of a TensorFlow graph")
-*Visualization of a TensorFlow graph.*
-
-To see your own graph, run TensorBoard pointing it to the log directory of the job, click on the graph tab on the top pane and select the appropriate run using the menu at the upper left corner. For in depth information on how to run TensorBoard and make sure you are logging all the necessary information, see @{$summaries_and_tensorboard$TensorBoard: Visualizing Learning}.
-
-## Name scoping and nodes
-
-Typical TensorFlow graphs can have many thousands of nodes--far too many to see
-easily all at once, or even to lay out using standard graph tools. To simplify,
-variable names can be scoped and the visualization uses this information to
-define a hierarchy on the nodes in the graph.  By default, only the top of this
-hierarchy is shown. Here is an example that defines three operations under the
-`hidden` name scope using
-@{tf.name_scope}:
-
-```python
-import tensorflow as tf
-
-with tf.name_scope('hidden') as scope:
-  a = tf.constant(5, name='alpha')
-  W = tf.Variable(tf.random_uniform([1, 2], -1.0, 1.0), name='weights')
-  b = tf.Variable(tf.zeros([1]), name='biases')
-```
-
-This results in the following three op names:
-
-* `hidden/alpha`
-* `hidden/weights`
-* `hidden/biases`
-
-By default, the visualization will collapse all three into a node labeled `hidden`.
-The extra detail isn't lost. You can double-click, or click
-on the orange `+` sign in the top right to expand the node, and then you'll see
-three subnodes for `alpha`, `weights` and `biases`.
-
-Here's a real-life example of a more complicated node in its initial and
-expanded states.
-
-<table width="100%;">
-  <tr>
-    <td style="width: 50%;">
-      <img src="https://www.tensorflow.org/images/pool1_collapsed.png" alt="Unexpanded name scope" title="Unexpanded name scope" />
-    </td>
-    <td style="width: 50%;">
-      <img src="https://www.tensorflow.org/images/pool1_expanded.png" alt="Expanded name scope" title="Expanded name scope" />
-    </td>
-  </tr>
-  <tr>
-    <td style="width: 50%;">
-      Initial view of top-level name scope <code>pool_1</code>. Clicking on the orange <code>+</code> button on the top right or double-clicking on the node itself will expand it.
-    </td>
-    <td style="width: 50%;">
-      Expanded view of <code>pool_1</code> name scope. Clicking on the orange <code>-</code> button on the top right or double-clicking on the node itself will collapse the name scope.
-    </td>
-  </tr>
-</table>
-
-Grouping nodes by name scopes is critical to making a legible graph. If you're
-building a model, name scopes give you control over the resulting visualization.
-**The better your name scopes, the better your visualization.**
-
-The figure above illustrates a second aspect of the visualization. TensorFlow
-graphs have two kinds of connections: data dependencies and control
-dependencies. Data dependencies show the flow of tensors between two ops and
-are shown as solid arrows, while control dependencies use dotted lines. In the
-expanded view (right side of the figure above) all the connections are data
-dependencies with the exception of the dotted line connecting `CheckNumerics`
-and `control_dependency`.
-
-There's a second trick to simplifying the layout. Most TensorFlow graphs have a
-few nodes with many connections to other nodes. For example, many nodes might
-have a control dependency on an initialization step. Drawing all edges between
-the `init` node and its dependencies would create a very cluttered view.
-
-To reduce clutter, the visualization separates out all high-degree nodes to an
-*auxiliary* area on the right and doesn't draw lines to represent their edges.
-Instead of lines, we draw small *node icons* to indicate the connections.
-Separating out the auxiliary nodes typically doesn't remove critical
-information since these nodes are usually related to bookkeeping functions.
-See [Interaction](#interaction) for how to move nodes between the main graph
-and the auxiliary area.
-
-<table width="100%;">
-  <tr>
-    <td style="width: 50%;">
-      <img src="https://www.tensorflow.org/images/conv_1.png" alt="conv_1 is part of the main graph" title="conv_1 is part of the main graph" />
-    </td>
-    <td style="width: 50%;">
-      <img src="https://www.tensorflow.org/images/save.png" alt="save is extracted as auxiliary node" title="save is extracted as auxiliary node" />
-    </td>
-  </tr>
-  <tr>
-    <td style="width: 50%;">
-      Node <code>conv_1</code> is connected to <code>save</code>. Note the little <code>save</code> node icon on its right.
-    </td>
-    <td style="width: 50%;">
-      <code>save</code> has a high degree, and will appear as an auxiliary node. The connection with <code>conv_1</code> is shown as a node icon on its left. To further reduce clutter, since <code>save</code> has a lot of connections, we show the first 5 and abbreviate the others as <code>... 12 more</code>.
-    </td>
-  </tr>
-</table>
-
-One last structural simplification is *series collapsing*. Sequential
-motifs--that is, nodes whose names differ by a number at the end and have
-isomorphic structures--are collapsed into a single *stack* of nodes, as shown
-below. For networks with long sequences, this greatly simplifies the view. As
-with hierarchical nodes, double-clicking expands the series. See
-[Interaction](#interaction) for how to disable/enable series collapsing for a
-specific set of nodes.
-
-<table width="100%;">
-  <tr>
-    <td style="width: 50%;">
-      <img src="https://www.tensorflow.org/images/series.png" alt="Sequence of nodes" title="Sequence of nodes" />
-    </td>
-    <td style="width: 50%;">
-      <img src="https://www.tensorflow.org/images/series_expanded.png" alt="Expanded sequence of nodes" title="Expanded sequence of nodes" />
-    </td>
-  </tr>
-  <tr>
-    <td style="width: 50%;">
-      A collapsed view of a node sequence.
-    </td>
-    <td style="width: 50%;">
-      A small piece of the expanded view, after double-click.
-    </td>
-  </tr>
-</table>
-
-Finally, as one last aid to legibility, the visualization uses special icons
-for constants and summary nodes. To summarize, here's a table of node symbols:
-
-Symbol | Meaning
---- | ---
-![Name scope](https://www.tensorflow.org/images/namespace_node.png "Name scope") | *High-level* node representing a name scope. Double-click to expand a high-level node.
-![Sequence of unconnected nodes](https://www.tensorflow.org/images/horizontal_stack.png "Sequence of unconnected nodes") | Sequence of numbered nodes that are not connected to each other.
-![Sequence of connected nodes](https://www.tensorflow.org/images/vertical_stack.png "Sequence of connected nodes") | Sequence of numbered nodes that are connected to each other.
-![Operation node](https://www.tensorflow.org/images/op_node.png "Operation node") | An individual operation node.
-![Constant node](https://www.tensorflow.org/images/constant.png "Constant node") | A constant.
-![Summary node](https://www.tensorflow.org/images/summary.png "Summary node") | A summary node.
-![Data flow edge](https://www.tensorflow.org/images/dataflow_edge.png "Data flow edge") | Edge showing the data flow between operations.
-![Control dependency edge](https://www.tensorflow.org/images/control_edge.png "Control dependency edge") | Edge showing the control dependency between operations.
-![Reference edge](https://www.tensorflow.org/images/reference_edge.png "Reference edge") | A reference edge showing that the outgoing operation node can mutate the incoming tensor.
-
-## Interaction {#interaction}
-
-Navigate the graph by panning and zooming. Click and drag to pan, and use a
-scroll gesture to zoom. Double-click on a node, or click on its `+` button, to
-expand a name scope that represents a group of operations. To easily keep
-track of the current viewpoint when zooming and panning, there is a minimap in
-the bottom right corner.
-
-To close an open node, double-click it again or click its `-` button. You can
-also click once to select a node. It will turn a darker color, and details
-about it and the nodes it connects to will appear in the info card at upper
-right corner of the visualization.
-
-<table width="100%;">
-  <tr>
-    <td style="width: 50%;">
-      <img src="https://www.tensorflow.org/images/infocard.png" alt="Info card of a name scope" title="Info card of a name scope" />
-    </td>
-    <td style="width: 50%;">
-      <img src="https://www.tensorflow.org/images/infocard_op.png" alt="Info card of operation node" title="Info card of operation node" />
-    </td>
-  </tr>
-  <tr>
-    <td style="width: 50%;">
-      Info card showing detailed information for the <code>conv2</code> name scope. The inputs and outputs are combined from the inputs and outputs of the operation nodes inside the name scope. For name scopes no attributes are shown.
-    </td>
-    <td style="width: 50%;">
-      Info card showing detailed information for the <code>DecodeRaw</code> operation node. In addition to inputs and outputs, the card shows the device and the attributes associated with the current operation.
-    </td>
-  </tr>
-</table>
-
-TensorBoard provides several ways to change the visual layout of the graph. This
-doesn't change the graph's computational semantics, but it can bring some
-clarity to the network's structure. By right clicking on a node or pressing
-buttons on the bottom of that node's info card, you can make the following
-changes to its layout:
-
-* Nodes can be moved between the main graph and the auxiliary area.
-* A series of nodes can be ungrouped so that the nodes in the series do not
-appear grouped together. Ungrouped series can likewise be regrouped.
-
-Selection can also be helpful in understanding high-degree nodes. Select any
-high-degree node, and the corresponding node icons for its other connections
-will be selected as well. This makes it easy, for example, to see which nodes
-are being saved--and which aren't.
-
-Clicking on a node name in the info card will select it. If necessary, the
-viewpoint will automatically pan so that the node is visible.
-
-Finally, you can choose two color schemes for your graph, using the color menu
-above the legend. The default *Structure View* shows structure: when two
-high-level nodes have the same structure, they appear in the same color of the
-rainbow. Uniquely structured nodes are gray. There's a second view, which shows
-what device the different operations run on. Name scopes are colored
-proportionally to the fraction of devices for the operations inside them.
-
-The images below give an illustration for a piece of a real-life graph.
-
-<table width="100%;">
-  <tr>
-    <td style="width: 50%;">
-      <img src="https://www.tensorflow.org/images/colorby_structure.png" alt="Color by structure" title="Color by structure" />
-    </td>
-    <td style="width: 50%;">
-      <img src="https://www.tensorflow.org/images/colorby_device.png" alt="Color by device" title="Color by device" />
-    </td>
-  </tr>
-  <tr>
-    <td style="width: 50%;">
-      Structure view: The gray nodes have unique structure. The orange <code>conv1</code> and <code>conv2</code> nodes have the same structure, and analogously for nodes with other colors.
-    </td>
-    <td style="width: 50%;">
-      Device view: Name scopes are colored proportionally to the fraction of devices of the operation nodes inside them. Here, purple means GPU and the green is CPU.
-    </td>
-  </tr>
-</table>
-
-## Tensor shape information
-
-When the serialized `GraphDef` includes tensor shapes, the graph visualizer
-labels edges with tensor dimensions, and edge thickness reflects total tensor
-size. To include tensor shapes in the `GraphDef` pass the actual graph object
-(as in `sess.graph`) to the `FileWriter` when serializing the graph.
-The images below show the CIFAR-10 model with tensor shape information:
-<table width="100%;">
-  <tr>
-    <td style="width: 100%;">
-      <img src="https://www.tensorflow.org/images/tensor_shapes.png" alt="CIFAR-10 model with tensor shape information" title="CIFAR-10 model with tensor shape information" />
-    </td>
-  </tr>
-  <tr>
-    <td style="width: 100%;">
-      CIFAR-10 model with tensor shape information.
-    </td>
-  </tr>
-</table>
-
-## Runtime statistics
-
-Often it is useful to collect runtime metadata for a run, such as total memory
-usage, total compute time, and tensor shapes for nodes. The code example below
-is a snippet from the train and test section of a modification of the
-@{$layers$simple MNIST tutorial}, in which we have recorded summaries and
-runtime statistics. See the
-@{$summaries_and_tensorboard#serializing-the-data$Summaries Tutorial}
-for details on how to record summaries.
-Full source is [here](https://www.tensorflow.org/code/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py).
-
-```python
-  # Train the model, and also write summaries.
-  # Every 10th step, measure test-set accuracy, and write test summaries
-  # All other steps, run train_step on training data, & add training summaries
-
-  def feed_dict(train):
-    """Make a TensorFlow feed_dict: maps data onto Tensor placeholders."""
-    if train or FLAGS.fake_data:
-      xs, ys = mnist.train.next_batch(100, fake_data=FLAGS.fake_data)
-      k = FLAGS.dropout
-    else:
-      xs, ys = mnist.test.images, mnist.test.labels
-      k = 1.0
-    return {x: xs, y_: ys, keep_prob: k}
-
-  for i in range(FLAGS.max_steps):
-    if i % 10 == 0:  # Record summaries and test-set accuracy
-      summary, acc = sess.run([merged, accuracy], feed_dict=feed_dict(False))
-      test_writer.add_summary(summary, i)
-      print('Accuracy at step %s: %s' % (i, acc))
-    else:  # Record train set summaries, and train
-      if i % 100 == 99:  # Record execution stats
-        run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
-        run_metadata = tf.RunMetadata()
-        summary, _ = sess.run([merged, train_step],
-                              feed_dict=feed_dict(True),
-                              options=run_options,
-                              run_metadata=run_metadata)
-        train_writer.add_run_metadata(run_metadata, 'step%d' % i)
-        train_writer.add_summary(summary, i)
-        print('Adding run metadata for', i)
-      else:  # Record a summary
-        summary, _ = sess.run([merged, train_step], feed_dict=feed_dict(True))
-        train_writer.add_summary(summary, i)
-```
-
-This code will emit runtime statistics for every 100th step starting at step99.
-
-When you launch tensorboard and go to the Graph tab, you will now see options
-under "Session runs" which correspond to the steps where run metadata was added.
-Selecting one of these runs will show you the snapshot of the network at that
-step, fading out unused nodes. In the controls on the left hand side, you will
-be able to color the nodes by total memory or total compute time. Additionally,
-clicking on a node will display the exact total memory, compute time, and
-tensor output sizes.
-
-
-<table width="100%;">
-  <tr style="height: 380px">
-    <td>
-      <img src="https://www.tensorflow.org/images/colorby_compute_time.png" alt="Color by compute time" title="Color by compute time"/>
-    </td>
-    <td>
-      <img src="https://www.tensorflow.org/images/run_metadata_graph.png" alt="Run metadata graph" title="Run metadata graph" />
-    </td>
-    <td>
-      <img src="https://www.tensorflow.org/images/run_metadata_infocard.png" alt="Run metadata info card" title="Run metadata info card" />
-    </td>
-  </tr>
-</table>
diff --git a/tensorflow/docs_src/programmers_guide/graphs.md b/tensorflow/docs_src/programmers_guide/graphs.md
deleted file mode 100644
index f0dd8def17..0000000000
--- a/tensorflow/docs_src/programmers_guide/graphs.md
+++ /dev/null
@@ -1,558 +0,0 @@
-# Graphs and Sessions
-
-TensorFlow uses a **dataflow graph** to represent your computation in terms of
-the dependencies between individual operations. This leads to a low-level
-programming model in which you first define the dataflow graph, then create a
-TensorFlow **session** to run parts of the graph across a set of local and
-remote devices.
-
-This guide will be most useful if you intend to use the low-level programming
-model directly. Higher-level APIs such as @{tf.estimator.Estimator} and Keras
-hide the details of graphs and sessions from the end user, but this guide may
-also be useful if you want to understand how these APIs are implemented.
-
-## Why dataflow graphs?
-
-![](../images/tensors_flowing.gif)
-
-[Dataflow](https://en.wikipedia.org/wiki/Dataflow_programming) is a common
-programming model for parallel computing. In a dataflow graph, the nodes
-represent units of computation, and the edges represent the data consumed or
-produced by a computation. For example, in a TensorFlow graph, the @{tf.matmul}
-operation would correspond to a single node with two incoming edges (the
-matrices to be multiplied) and one outgoing edge (the result of the
-multiplication).
-
-<!-- TODO(barryr): Add a diagram to illustrate the @{tf.matmul} graph. -->
-
-Dataflow has several advantages that TensorFlow leverages when executing your
-programs:
-
-* **Parallelism.** By using explicit edges to represent dependencies between
-  operations, it is easy for the system to identify operations that can execute
-  in parallel.
-
-* **Distributed execution.** By using explicit edges to represent the values
-  that flow between operations, it is possible for TensorFlow to partition your
-  program across multiple devices (CPUs, GPUs, and TPUs) attached to different
-  machines. TensorFlow inserts the necessary communication and coordination
-  between devices.
-
-* **Compilation.** TensorFlow's @{$performance/xla$XLA compiler} can
-  use the information in your dataflow graph to generate faster code, for
-  example, by fusing together adjacent operations.
-
-* **Portability.** The dataflow graph is a language-independent representation
-  of the code in your model. You can build a dataflow graph in Python, store it
-  in a @{$saved_model$SavedModel}, and restore it in a C++ program for
-  low-latency inference.
-
-
-## What is a @{tf.Graph}?
-
-A @{tf.Graph} contains two relevant kinds of information:
-
-* **Graph structure.** The nodes and edges of the graph, indicating how
-  individual operations are composed together, but not prescribing how they
-  should be used. The graph structure is like assembly code: inspecting it can
-  convey some useful information, but it does not contain all of the useful
-  context that source code conveys.
-
-* **Graph collections.** TensorFlow provides a general mechanism for storing
-  collections of metadata in a @{tf.Graph}. The @{tf.add_to_collection} function
-  enables you to associate a list of objects with a key (where @{tf.GraphKeys}
-  defines some of the standard keys), and @{tf.get_collection} enables you to
-  look up all objects associated with a key. Many parts of the TensorFlow
-  library use this facility: for example, when you create a @{tf.Variable}, it
-  is added by default to collections representing "global variables" and
-  "trainable variables". When you later come to create a @{tf.train.Saver} or
-  @{tf.train.Optimizer}, the variables in these collections are used as the
-  default arguments.
-
-
-## Building a @{tf.Graph}
-
-Most TensorFlow programs start with a dataflow graph construction phase. In this
-phase, you invoke TensorFlow API functions that construct new @{tf.Operation}
-(node) and @{tf.Tensor} (edge) objects and add them to a @{tf.Graph}
-instance. TensorFlow provides a **default graph** that is an implicit argument
-to all API functions in the same context.  For example:
-
-* Calling `tf.constant(42.0)` creates a single @{tf.Operation} that produces the
-  value `42.0`, adds it to the default graph, and returns a @{tf.Tensor} that
-  represents the value of the constant.
-
-* Calling `tf.matmul(x, y)` creates a single @{tf.Operation} that multiplies
-  the values of @{tf.Tensor} objects `x` and `y`, adds it to the default graph,
-  and returns a @{tf.Tensor} that represents the result of the multiplication.
-
-* Executing `v = tf.Variable(0)` adds to the graph a @{tf.Operation} that will
-  store a writeable tensor value that persists between @{tf.Session.run} calls.
-  The @{tf.Variable} object wraps this operation, and can be used [like a
-  tensor](#tensor-like_objects), which will read the current value of the
-  stored value. The @{tf.Variable} object also has methods such as
-  @{tf.Variable.assign$`assign`} and @{tf.Variable.assign_add$`assign_add`} that
-  create @{tf.Operation} objects that, when executed, update the stored value.
-  (See @{$programmers_guide/variables} for more information about variables.)
-
-* Calling @{tf.train.Optimizer.minimize} will add operations and tensors to the
-  default graph that calculates gradients, and return a @{tf.Operation} that,
-  when run, will apply those gradients to a set of variables.
-
-Most programs rely solely on the default graph. However,
-see [Dealing with multiple graphs](#programming_with_multiple_graphs) for more
-advanced use cases. High-level APIs such as the @{tf.estimator.Estimator} API
-manage the default graph on your behalf, and--for example--may create different
-graphs for training and evaluation.
-
-Note: Calling most functions in the TensorFlow API merely adds operations
-and tensors to the default graph, but **does not** perform the actual
-computation. Instead, you compose these functions until you have a @{tf.Tensor}
-or @{tf.Operation} that represents the overall computation--such as performing
-one step of gradient descent--and then pass that object to a @{tf.Session} to
-perform the computation. See the section "Executing a graph in a @{tf.Session}"
-for more details.
-
-## Naming operations
-
-A @{tf.Graph} object defines a **namespace** for the @{tf.Operation} objects it
-contains. TensorFlow automatically chooses a unique name for each operation in
-your graph, but giving operations descriptive names can make your program easier
-to read and debug. The TensorFlow API provides two ways to override the name of
-an operation:
-
-* Each API function that creates a new @{tf.Operation} or returns a new
-  @{tf.Tensor} accepts an optional `name` argument. For example,
-  `tf.constant(42.0, name="answer")` creates a new @{tf.Operation} named
-  `"answer"` and returns a @{tf.Tensor} named `"answer:0"`. If the default graph
-  already contains an operation named `"answer"`, then TensorFlow would append
-  `"_1"`, `"_2"`, and so on to the name, in order to make it unique.
-
-* The @{tf.name_scope} function makes it possible to add a **name scope** prefix
-  to all operations created in a particular context. The current name scope
-  prefix is a `"/"`-delimited list of the names of all active @{tf.name_scope}
-  context managers. If a name scope has already been used in the current
-  context, TensorFlow appends `"_1"`, `"_2"`, and so on. For example:
-
-  ```python
-  c_0 = tf.constant(0, name="c")  # => operation named "c"
-
-  # Already-used names will be "uniquified".
-  c_1 = tf.constant(2, name="c")  # => operation named "c_1"
-
-  # Name scopes add a prefix to all operations created in the same context.
-  with tf.name_scope("outer"):
-    c_2 = tf.constant(2, name="c")  # => operation named "outer/c"
-
-    # Name scopes nest like paths in a hierarchical file system.
-    with tf.name_scope("inner"):
-      c_3 = tf.constant(3, name="c")  # => operation named "outer/inner/c"
-
-    # Exiting a name scope context will return to the previous prefix.
-    c_4 = tf.constant(4, name="c")  # => operation named "outer/c_1"
-
-    # Already-used name scopes will be "uniquified".
-    with tf.name_scope("inner"):
-      c_5 = tf.constant(5, name="c")  # => operation named "outer/inner_1/c"
-  ```
-
-The graph visualizer uses name scopes to group operations and reduce the visual
-complexity of a graph. See [Visualizing your graph](#visualizing-your-graph) for
-more information.
-
-Note that @{tf.Tensor} objects are implicitly named after the @{tf.Operation}
-that produces the tensor as output. A tensor name has the form `"<OP_NAME>:<i>"`
-where:
-
-* `"<OP_NAME>"` is the name of the operation that produces it.
-* `"<i>"` is an integer representing the index of that tensor among the
-  operation's outputs.
-
-## Placing operations on different devices
-
-If you want your TensorFlow program to use multiple different devices, the
-@{tf.device} function provides a convenient way to request that all operations
-created in a particular context are placed on the same device (or type of
-device).
-
-A **device specification** has the following form:
-
-```
-/job:<JOB_NAME>/task:<TASK_INDEX>/device:<DEVICE_TYPE>:<DEVICE_INDEX>
-```
-
-where:
-
-* `<JOB_NAME>` is an alpha-numeric string that does not start with a number.
-* `<DEVICE_TYPE>` is a registered device type (such as `GPU` or `CPU`).
-* `<TASK_INDEX>` is a non-negative integer representing the index of the task
-  in the job named `<JOB_NAME>`. See @{tf.train.ClusterSpec} for an explanation
-  of jobs and tasks.
-* `<DEVICE_INDEX>` is a non-negative integer representing the index of the
-  device, for example, to distinguish between different GPU devices used in the
-  same process.
-
-You do not need to specify every part of a device specification. For example,
-if you are running in a single-machine configuration with a single GPU, you
-might use @{tf.device} to pin some operations to the CPU and GPU:
-
-```python
-# Operations created outside either context will run on the "best possible"
-# device. For example, if you have a GPU and a CPU available, and the operation
-# has a GPU implementation, TensorFlow will choose the GPU.
-weights = tf.random_normal(...)
-
-with tf.device("/device:CPU:0"):
-  # Operations created in this context will be pinned to the CPU.
-  img = tf.decode_jpeg(tf.read_file("img.jpg"))
-
-with tf.device("/device:GPU:0"):
-  # Operations created in this context will be pinned to the GPU.
-  result = tf.matmul(weights, img)
-```
-If you are deploying TensorFlow in a @{$distributed$typical distributed configuration},
-you might specify the job name and task ID to place variables on
-a task in the parameter server job (`"/job:ps"`), and the other operations on
-task in the worker job (`"/job:worker"`):
-
-```python
-with tf.device("/job:ps/task:0"):
-  weights_1 = tf.Variable(tf.truncated_normal([784, 100]))
-  biases_1 = tf.Variable(tf.zeroes([100]))
-
-with tf.device("/job:ps/task:1"):
-  weights_2 = tf.Variable(tf.truncated_normal([100, 10]))
-  biases_2 = tf.Variable(tf.zeroes([10]))
-
-with tf.device("/job:worker"):
-  layer_1 = tf.matmul(train_batch, weights_1) + biases_1
-  layer_2 = tf.matmul(train_batch, weights_2) + biases_2
-```
-
-@{tf.device} gives you a lot of flexibility to choose placements for individual
-operations or broad regions of a TensorFlow graph. In many cases, there are
-simple heuristics that work well. For example, the
-@{tf.train.replica_device_setter} API can be used with @{tf.device} to place
-operations for **data-parallel distributed training**. For example, the
-following code fragment shows how @{tf.train.replica_device_setter} applies
-different placement policies to @{tf.Variable} objects and other operations:
-
-```python
-with tf.device(tf.train.replica_device_setter(ps_tasks=3)):
-  # tf.Variable objects are, by default, placed on tasks in "/job:ps" in a
-  # round-robin fashion.
-  w_0 = tf.Variable(...)  # placed on "/job:ps/task:0"
-  b_0 = tf.Variable(...)  # placed on "/job:ps/task:1"
-  w_1 = tf.Variable(...)  # placed on "/job:ps/task:2"
-  b_1 = tf.Variable(...)  # placed on "/job:ps/task:0"
-
-  input_data = tf.placeholder(tf.float32)     # placed on "/job:worker"
-  layer_0 = tf.matmul(input_data, w_0) + b_0  # placed on "/job:worker"
-  layer_1 = tf.matmul(layer_0, w_1) + b_1     # placed on "/job:worker"
-```
-
-## Tensor-like objects
-
-Many TensorFlow operations take one or more @{tf.Tensor} objects as arguments.
-For example, @{tf.matmul} takes two @{tf.Tensor} objects, and @{tf.add_n} takes
-a list of `n` @{tf.Tensor} objects. For convenience, these functions will accept
-a **tensor-like object** in place of a @{tf.Tensor}, and implicitly convert it
-to a @{tf.Tensor} using the @{tf.convert_to_tensor} method. Tensor-like objects
-include elements of the following types:
-
-* @{tf.Tensor}
-* @{tf.Variable}
-* [`numpy.ndarray`](https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.html)
-* `list` (and lists of tensor-like objects)
-* Scalar Python types: `bool`, `float`, `int`, `str`
-
-You can register additional tensor-like types using
-@{tf.register_tensor_conversion_function}.
-
-Note: By default, TensorFlow will create a new @{tf.Tensor} each time you use
-the same tensor-like object. If the tensor-like object is large (e.g. a
-`numpy.ndarray` containing a set of training examples) and you use it multiple
-times, you may run out of memory. To avoid this, manually call
-@{tf.convert_to_tensor} on the tensor-like object once and use the returned
-@{tf.Tensor} instead.
-
-## Executing a graph in a @{tf.Session}
-
-TensorFlow uses the @{tf.Session} class to represent a connection between the
-client program---typically a Python program, although a similar interface is
-available in other languages---and the C++ runtime. A @{tf.Session} object
-provides access to devices in the local machine, and remote devices using the
-distributed TensorFlow runtime. It also caches information about your
-@{tf.Graph} so that you can efficiently run the same computation multiple times.
-
-### Creating a @{tf.Session}
-
-If you are using the low-level TensorFlow API, you can create a @{tf.Session}
-for the current default graph as follows:
-
-```python
-# Create a default in-process session.
-with tf.Session() as sess:
-  # ...
-
-# Create a remote session.
-with tf.Session("grpc://example.org:2222"):
-  # ...
-```
-
-Since a @{tf.Session} owns physical resources (such as GPUs and
-network connections), it is typically used as a context manager (in a `with`
-block) that automatically closes the session when you exit the block. It is
-also possible to create a session without using a `with` block, but you should
-explicitly call @{tf.Session.close} when you are finished with it to free the
-resources.
-
-Note: Higher-level APIs such as @{tf.train.MonitoredTrainingSession} or
-@{tf.estimator.Estimator} will create and manage a @{tf.Session} for you. These
-APIs accept optional `target` and `config` arguments (either directly, or as
-part of a @{tf.estimator.RunConfig} object), with the same meaning as
-described below.
-
-@{tf.Session.__init__} accepts three optional arguments:
-
-* **`target`.** If this argument is left empty (the default), the session will
-  only use devices in the local machine. However, you may also specify a
-  `grpc://` URL to specify the address of a TensorFlow server, which gives the
-  session access to all devices on machines that this server controls. See
-  @{tf.train.Server} for details of how to create a TensorFlow
-  server. For example, in the common **between-graph replication**
-  configuration, the @{tf.Session} connects to a @{tf.train.Server} in the same
-  process as the client. The [distributed TensorFlow](../deploy/distributed.md)
-  deployment guide describes other common scenarios.
-
-* **`graph`.** By default, a new @{tf.Session} will be bound to---and only able
-  to run operations in---the current default graph. If you are using multiple
-  graphs in your program (see [Programming with multiple
-  graphs](#programming_with_multiple_graphs) for more details), you can specify
-  an explicit @{tf.Graph} when you construct the session.
-
-* **`config`.** This argument allows you to specify a @{tf.ConfigProto} that
-  controls the behavior of the session. For example, some of the configuration
-  options include:
-
-    * `allow_soft_placement`. Set this to `True` to enable a "soft" device
-    placement algorithm, which ignores @{tf.device} annotations that attempt
-    to place CPU-only operations on a GPU device, and places them on the CPU
-    instead.
-
-    * `cluster_def`. When using distributed TensorFlow, this option allows you
-    to specify what machines to use in the computation, and provide a mapping
-    between job names, task indices, and network addresses. See
-    @{tf.train.ClusterSpec.as_cluster_def} for details.
-
-    * `graph_options.optimizer_options`. Provides control over the optimizations
-    that TensorFlow performs on your graph before executing it.
-
-    * `gpu_options.allow_growth`. Set this to `True` to change the GPU memory
-    allocator so that it gradually increases the amount of memory allocated,
-    rather than allocating most of the memory at startup.
-
-
-### Using @{tf.Session.run} to execute operations
-
-The @{tf.Session.run} method is the main mechanism for running a @{tf.Operation}
-or evaluating a @{tf.Tensor}. You can pass one or more @{tf.Operation} or
-@{tf.Tensor} objects to @{tf.Session.run}, and TensorFlow will execute the
-operations that are needed to compute the result.
-
-@{tf.Session.run} requires you to specify a list of **fetches**, which determine
-the return values, and may be a @{tf.Operation}, a @{tf.Tensor}, or
-a [tensor-like type](#tensor-like_objects) such as @{tf.Variable}. These fetches
-determine what **subgraph** of the overall @{tf.Graph} must be executed to
-produce the result: this is the subgraph that contains all operations named in
-the fetch list, plus all operations whose outputs are used to compute the value
-of the fetches. For example, the following code fragment shows how different
-arguments to @{tf.Session.run} cause different subgraphs to be executed:
-
-```python
-x = tf.constant([[37.0, -23.0], [1.0, 4.0]])
-w = tf.Variable(tf.random_uniform([2, 2]))
-y = tf.matmul(x, w)
-output = tf.nn.softmax(y)
-init_op = w.initializer
-
-with tf.Session() as sess:
-  # Run the initializer on `w`.
-  sess.run(init_op)
-
-  # Evaluate `output`. `sess.run(output)` will return a NumPy array containing
-  # the result of the computation.
-  print(sess.run(output))
-
-  # Evaluate `y` and `output`. Note that `y` will only be computed once, and its
-  # result used both to return `y_val` and as an input to the `tf.nn.softmax()`
-  # op. Both `y_val` and `output_val` will be NumPy arrays.
-  y_val, output_val = sess.run([y, output])
-```
-
-@{tf.Session.run} also optionally takes a dictionary of **feeds**, which is a
-mapping from @{tf.Tensor} objects (typically @{tf.placeholder} tensors) to
-values (typically Python scalars, lists, or NumPy arrays) that will be
-substituted for those tensors in the execution. For example:
-
-```python
-# Define a placeholder that expects a vector of three floating-point values,
-# and a computation that depends on it.
-x = tf.placeholder(tf.float32, shape=[3])
-y = tf.square(x)
-
-with tf.Session() as sess:
-  # Feeding a value changes the result that is returned when you evaluate `y`.
-  print(sess.run(y, {x: [1.0, 2.0, 3.0]}))  # => "[1.0, 4.0, 9.0]"
-  print(sess.run(y, {x: [0.0, 0.0, 5.0]}))  # => "[0.0, 0.0, 25.0]"
-
-  # Raises `tf.errors.InvalidArgumentError`, because you must feed a value for
-  # a `tf.placeholder()` when evaluating a tensor that depends on it.
-  sess.run(y)
-
-  # Raises `ValueError`, because the shape of `37.0` does not match the shape
-  # of placeholder `x`.
-  sess.run(y, {x: 37.0})
-```
-
-@{tf.Session.run} also accepts an optional `options` argument that enables you
-to specify options about the call, and an optional `run_metadata` argument that
-enables you to collect metadata about the execution. For example, you can use
-these options together to collect tracing information about the execution:
-
-```
-y = tf.matmul([[37.0, -23.0], [1.0, 4.0]], tf.random_uniform([2, 2]))
-
-with tf.Session() as sess:
-  # Define options for the `sess.run()` call.
-  options = tf.RunOptions()
-  options.output_partition_graphs = True
-  options.trace_level = tf.RunOptions.FULL_TRACE
-
-  # Define a container for the returned metadata.
-  metadata = tf.RunMetadata()
-
-  sess.run(y, options=options, run_metadata=metadata)
-
-  # Print the subgraphs that executed on each device.
-  print(metadata.partition_graphs)
-
-  # Print the timings of each operation that executed.
-  print(metadata.step_stats)
-```
-
-
-## Visualizing your graph
-
-TensorFlow includes tools that can help you to understand the code in a graph.
-The **graph visualizer** is a component of TensorBoard that renders the
-structure of your graph visually in a browser. The easiest way to create a
-visualization is to pass a @{tf.Graph} when creating the
-@{tf.summary.FileWriter}:
-
-```python
-# Build your graph.
-x = tf.constant([[37.0, -23.0], [1.0, 4.0]])
-w = tf.Variable(tf.random_uniform([2, 2]))
-y = tf.matmul(x, w)
-# ...
-loss = ...
-train_op = tf.train.AdagradOptimizer(0.01).minimize(loss)
-
-with tf.Session() as sess:
-  # `sess.graph` provides access to the graph used in a `tf.Session`.
-  writer = tf.summary.FileWriter("/tmp/log/...", sess.graph)
-
-  # Perform your computation...
-  for i in range(1000):
-    sess.run(train_op)
-    # ...
-
-  writer.close()
-```
-
-Note: If you are using a @{tf.estimator.Estimator}, the graph (and any
-summaries) will be logged automatically to the `model_dir` that you specified
-when creating the estimator.
-
-You can then open the log in `tensorboard`, navigate to the "Graph" tab, and
-see a high-level visualization of your graph's structure. Note that a typical
-TensorFlow graph---especially training graphs with automatically computed
-gradients---has too many nodes to visualize at once. The graph visualizer makes
-use of name scopes to group related operations into "super" nodes. You can
-click on the orange "+" button on any of these super nodes to expand the
-subgraph inside.
-
-![](../images/mnist_deep.png)
-
-For more information about visualizing your TensorFlow application with
-TensorBoard, see the [TensorBoard tutorial](../get_started/summaries_and_tensorboard.md).
-
-## Programming with multiple graphs
-
-Note: When training a model, a common way of organizing your code is to use one
-graph for training your model, and a separate graph for evaluating or performing
-inference with a trained model. In many cases, the inference graph will be
-different from the training graph: for example, techniques like dropout and
-batch normalization use different operations in each case. Furthermore, by
-default utilities like @{tf.train.Saver} use the names of @{tf.Variable} objects
-(which have names based on an underlying @{tf.Operation}) to identify each
-variable in a saved checkpoint. When programming this way, you can either use
-completely separate Python processes to build and execute the graphs, or you can
-use multiple graphs in the same process. This section describes how to use
-multiple graphs in the same process.
-
-As noted above, TensorFlow provides a "default graph" that is implicitly passed
-to all API functions in the same context. For many applications, a single graph
-is sufficient. However, TensorFlow also provides methods for manipulating
-the default graph, which can be useful in more advanced use cases. For example:
-
-* A @{tf.Graph} defines the namespace for @{tf.Operation} objects: each
-  operation in a single graph must have a unique name. TensorFlow will
-  "uniquify" the names of operations by appending `"_1"`, `"_2"`, and so on to
-  their names if the requested name is already taken. Using multiple explicitly
-  created graphs gives you more control over what name is given to each
-  operation.
-
-* The default graph stores information about every @{tf.Operation} and
-  @{tf.Tensor} that was ever added to it. If your program creates a large number
-  of unconnected subgraphs, it may be more efficient to use a different
-  @{tf.Graph} to build each subgraph, so that unrelated state can be garbage
-  collected.
-
-You can install a different @{tf.Graph} as the default graph, using the
-@{tf.Graph.as_default} context manager:
-
-```python
-g_1 = tf.Graph()
-with g_1.as_default():
-  # Operations created in this scope will be added to `g_1`.
-  c = tf.constant("Node in g_1")
-
-  # Sessions created in this scope will run operations from `g_1`.
-  sess_1 = tf.Session()
-
-g_2 = tf.Graph()
-with g_2.as_default():
-  # Operations created in this scope will be added to `g_2`.
-  d = tf.constant("Node in g_2")
-
-# Alternatively, you can pass a graph when constructing a `tf.Session`:
-# `sess_2` will run operations from `g_2`.
-sess_2 = tf.Session(graph=g_2)
-
-assert c.graph is g_1
-assert sess_1.graph is g_1
-
-assert d.graph is g_2
-assert sess_2.graph is g_2
-```
-
-To inspect the current default graph, call @{tf.get_default_graph}, which
-returns a @{tf.Graph} object:
-
-```python
-# Print all of the operations in the default graph.
-g = tf.get_default_graph()
-print(g.get_operations())
-```
diff --git a/tensorflow/docs_src/programmers_guide/index.md b/tensorflow/docs_src/programmers_guide/index.md
deleted file mode 100644
index 9c58a3b45e..0000000000
--- a/tensorflow/docs_src/programmers_guide/index.md
+++ /dev/null
@@ -1,86 +0,0 @@
-# Programmer's Guide
-
-The documents in this unit dive into the details of how TensorFlow
-works. The units are as follows:
-
-## High Level APIs
-
-  * @{$programmers_guide/keras}, TensorFlow's high-level API for building and
-    training deep learning models.
-  * @{$programmers_guide/eager}, an API for writing TensorFlow code
-    imperatively, like you would use Numpy.
-  * @{$programmers_guide/estimators}, a high-level API that provides
-    fully-packaged models ready for large-scale training and production.
-  * @{$programmers_guide/datasets}, easy input pipelines to bring your data into
-    your TensorFlow program.
-
-## Estimators
-
-* @{$estimators} provides an introduction.
-* @{$premade_estimators}, introduces Estimators for machine learning.
-* @{$custom_estimators}, which demonstrates how to build and train models you
-  design yourself.
-* @{$feature_columns}, which shows how an Estimator can handle a variety of input
-  data types without changes to the model.
-* @{$datasets_for_estimators} describes using tf.data with estimators.
-* @{$checkpoints}, which explains how to save training progress and resume where
-  you left off.
-
-## Accelerators
-
-  * @{$using_gpu} explains how TensorFlow assigns operations to
-    devices and how you can change the arrangement manually.
-  * @{$using_tpu} explains how to modify `Estimator` programs to run on a TPU.
-
-## Low Level APIs
-
-  * @{$programmers_guide/low_level_intro}, which introduces the
-    basics of how you can use TensorFlow outside of the high Level APIs.
-  * @{$programmers_guide/tensors}, which explains how to create,
-    manipulate, and access Tensors--the fundamental object in TensorFlow.
-  * @{$programmers_guide/variables}, which details how
-    to represent shared, persistent state in your program.
-  * @{$programmers_guide/graphs}, which explains:
-      * dataflow graphs, which are TensorFlow's representation of computations
-        as dependencies between operations.
-      * sessions, which are TensorFlow's mechanism for running dataflow graphs
-        across one or more local or remote devices.
-    If you are programming with the low-level TensorFlow API, this unit
-    is essential. If you are programming with a high-level TensorFlow API
-    such as Estimators or Keras, the high-level API creates and manages
-    graphs and sessions for you, but understanding graphs and sessions
-    can still be helpful.
-  * @{$programmers_guide/saved_model}, which
-    explains how to save and restore variables and models.
-
-## ML Concepts
-
-  * @{$programmers_guide/embedding}, which introduces the concept
-    of embeddings, provides a simple example of training an embedding in
-    TensorFlow, and explains how to view embeddings with the TensorBoard
-    Embedding Projector.
-
-## Debugging
-
-  * @{$programmers_guide/debugger}, which
-    explains how to use the TensorFlow debugger (tfdbg).
-
-## TensorBoard
-
-TensorBoard is a utility to visualize different aspects of machine learning.
-The following guides explain how to use TensorBoard:
-
-  * @{$programmers_guide/summaries_and_tensorboard},
-    which introduces TensorBoard.
-  * @{$programmers_guide/graph_viz}, which
-    explains how to visualize the computational graph.
-  * @{$programmers_guide/tensorboard_histograms} which demonstrates the how to
-    use TensorBoard's histogram dashboard.
-
-
-## Misc
-
-  * @{$programmers_guide/version_compat},
-    which explains backward compatibility guarantees and non-guarantees.
-  * @{$programmers_guide/faq}, which contains frequently asked
-    questions about TensorFlow.
diff --git a/tensorflow/docs_src/programmers_guide/keras.md b/tensorflow/docs_src/programmers_guide/keras.md
deleted file mode 100644
index c6aca7ebf4..0000000000
--- a/tensorflow/docs_src/programmers_guide/keras.md
+++ /dev/null
@@ -1,623 +0,0 @@
-# Keras
-
-Keras is a high-level API to build and train deep learning models. It's used for
-fast prototyping, advanced research, and production, with three key advantages:
-
-- *User friendly*<br>
-  Keras has a simple, consistent interface optimized for common use cases. It
-  provides clear and actionable feedback for user errors.
-- *Modular and composable*<br>
-  Keras models are made by connecting configurable building blocks together,
-  with few restrictions.
-- *Easy to extend*<br> Write custom building blocks to express new ideas for
-  research. Create new layers, loss functions, and develop state-of-the-art
-  models.
-
-## Import tf.keras
-
-`tf.keras` is TensorFlow's implementation of the
-[Keras API specification](https://keras.io){:.external}. This is a high-level
-API to build and train models that includes first-class support for
-TensorFlow-specific functionality, such as [eager execution](#eager_execution),
-`tf.data` pipelines, and [Estimators](/programmers_guide/estimators).
-`tf.keras` makes TensorFlow easier to use without sacrificing flexibility and
-performance.
-
-To get started, import `tf.keras` as part of your TensorFlow program setup:
-
-```python
-import tensorflow as tf
-from tensorflow import keras
-```
-
-`tf.keras` can run any Keras-compatible code, but keep in mind:
-
-* The `tf.keras` version in the latest TensorFlow release might not be the same
-  as the latest `keras` version from PyPI. Check `tf.keras.__version__`.
-* When [saving a model's weights](#weights_only), `tf.keras` defaults to the
-  [checkpoint format](/get_started/checkpoints). Pass `save_format='h5'` to use
-  HDF5.
-
-## Build a simple model
-
-### Sequential model
-
-In Keras, you assemble *layers* to build *models*. A model is (usually) a graph
-of layers. The most common type of model is a stack of layers: the
-`tf.keras.Sequential` model.
-
-To build a simple, fully-connected network (i.e. multi-layer perceptron):
-
-```python
-model = keras.Sequential()
-# Adds a densely-connected layer with 64 units to the model:
-model.add(keras.layers.Dense(64, activation='relu'))
-# Add another:
-model.add(keras.layers.Dense(64, activation='relu'))
-# Add a softmax layer with 10 output units:
-model.add(keras.layers.Dense(10, activation='softmax'))
-```
-
-### Configure the layers
-
-There are many `tf.keras.layers` available with some common constructor
-parameters:
-
-* `activation`: Set the activation function for the layer. This parameter is
-  specified by the name of a built-in function or as a callable object. By
-  default, no activation is applied.
-* `kernel_initializer` and `bias_initializer`: The initialization schemes
-  that create the layer's weights (kernel and bias). This parameter is a name or
-  a callable object. This defaults to the `"Glorot uniform"` initializer.
-* `kernel_regularizer` and `bias_regularizer`: The regularization schemes
-  that apply the layer's weights (kernel and bias), such as L1 or L2
-  regularization. By default, no regularization is applied.
-
-The following instantiates `tf.keras.layers.Dense` layers using constructor
-arguments:
-
-```python
-# Create a sigmoid layer:
-layers.Dense(64, activation='sigmoid')
-# Or:
-layers.Dense(64, activation=tf.sigmoid)
-
-# A linear layer with L1 regularization of factor 0.01 applied to the kernel matrix:
-layers.Dense(64, kernel_regularizer=keras.regularizers.l1(0.01))
-# A linear layer with L2 regularization of factor 0.01 applied to the bias vector:
-layers.Dense(64, bias_regularizer=keras.regularizers.l2(0.01))
-
-# A linear layer with a kernel initialized to a random orthogonal matrix:
-layers.Dense(64, kernel_initializer='orthogonal')
-# A linear layer with a bias vector initialized to 2.0s:
-layers.Dense(64, bias_initializer=keras.initializers.constant(2.0))
-```
-
-## Train and evaluate
-
-### Set up training
-
-After the model is constructed, configure its learning process by calling the
-`compile` method:
-
-```python
-model.compile(optimizer=tf.train.AdamOptimizer(0.001),
-              loss='categorical_crossentropy',
-              metrics=['accuracy'])
-```
-
-`tf.keras.Model.compile` takes three important arguments:
-
-* `optimizer`: This object specifies the training procedure. Pass it optimizer
-  instances from the `tf.train` module, such as
-  [`AdamOptimizer`](/api_docs/python/tf/train/AdamOptimizer),
-  [`RMSPropOptimizer`](/api_docs/python/tf/train/RMSPropOptimizer), or
-  [`GradientDescentOptimizer`](/api_docs/python/tf/train/GradientDescentOptimizer).
-* `loss`: The function to minimize during optimization. Common choices include
-  mean square error (`mse`), `categorical_crossentropy`, and
-  `binary_crossentropy`. Loss functions are specified by name or by
-  passing a callable object from the `tf.keras.losses` module.
-* `metrics`: Used to monitor training. These are string names or callables from
-  the `tf.keras.metrics` module.
-
-The following shows a few examples of configuring a model for training:
-
-```python
-# Configure a model for mean-squared error regression.
-model.compile(optimizer=tf.train.AdamOptimizer(0.01),
-              loss='mse',       # mean squared error
-              metrics=['mae'])  # mean absolute error
-
-# Configure a model for categorical classification.
-model.compile(optimizer=tf.train.RMSPropOptimizer(0.01),
-              loss=keras.losses.categorical_crossentropy,
-              metrics=[keras.metrics.categorical_accuracy])
-```
-
-### Input NumPy data
-
-For small datasets, use in-memory [NumPy](https://www.numpy.org/){:.external}
-arrays to train and evaluate a model. The model is "fit" to the training data
-using the `fit` method:
-
-```python
-import numpy as np
-
-data = np.random.random((1000, 32))
-labels = np.random.random((1000, 10))
-
-model.fit(data, labels, epochs=10, batch_size=32)
-```
-
-`tf.keras.Model.fit` takes three important arguments:
-
-* `epochs`: Training is structured into *epochs*. An epoch is one iteration over
-  the entire input data (this is done in smaller batches).
-* `batch_size`: When passed NumPy data, the model slices the data into smaller
-  batches and iterates over these batches during training. This integer
-  specifies the size of each batch. Be aware that the last batch may be smaller
-  if the total number of samples is not divisible by the batch size.
-* `validation_data`: When prototyping a model, you want to easily monitor its
-  performance on some validation data. Passing this argument—a tuple of inputs
-  and labels—allows the model to display the loss and metrics in inference mode
-  for the passed data, at the end of each epoch.
-
-Here's an example using `validation_data`:
-
-```python
-import numpy as np
-
-data = np.random.random((1000, 32))
-labels = np.random.random((1000, 10))
-
-val_data = np.random.random((100, 32))
-val_labels = np.random.random((100, 10))
-
-model.fit(data, labels, epochs=10, batch_size=32,
-          validation_data=(val_data, val_labels))
-```
-
-### Input tf.data datasets
-
-Use the [Datasets API](/programmers_guide/datasets) to scale to large datasets
-or multi-device training. Pass a `tf.data.Dataset` instance to the `fit`
-method:
-
-```python
-# Instantiates a toy dataset instance:
-dataset = tf.data.Dataset.from_tensor_slices((data, labels))
-dataset = dataset.batch(32)
-dataset = dataset.repeat()
-
-# Don't forget to specify `steps_per_epoch` when calling `fit` on a dataset.
-model.fit(dataset, epochs=10, steps_per_epoch=30)
-```
-
-Here, the `fit` method uses the `steps_per_epoch` argument—this is the number of
-training steps the model runs before it moves to the next epoch. Since the
-`Dataset` yields batches of data, this snippet does not require a `batch_size`.
-
-Datasets can also be used for validation:
-
-```python
-dataset = tf.data.Dataset.from_tensor_slices((data, labels))
-dataset = dataset.batch(32).repeat()
-
-val_dataset = tf.data.Dataset.from_tensor_slices((val_data, val_labels))
-val_dataset = val_dataset.batch(32).repeat()
-
-model.fit(dataset, epochs=10, steps_per_epoch=30,
-          validation_data=val_dataset,
-          validation_steps=3)
-```
-
-### Evaluate and predict
-
-The `tf.keras.Model.evaluate` and `tf.keras.Model.predict` methods can use NumPy
-data and a `tf.data.Dataset`.
-
-To *evaluate* the inference-mode loss and metrics for the data provided:
-
-```python
-model.evaluate(x, y, batch_size=32)
-
-model.evaluate(dataset, steps=30
-```
-
-And to *predict* the output of the last layer in inference for the data provided,
-as a NumPy array:
-
-```
-model.predict(x, batch_size=32)
-
-model.predict(dataset, steps=30)
-```
-
-
-## Build advanced models
-
-### Functional API
-
-The `tf.keras.Sequential` model is a simple stack of layers that cannot
-represent arbitrary models. Use the
-[Keras functional API](https://keras.io/getting-started/functional-api-guide/){:.external}
-to build complex model topologies such as:
-
-* Multi-input models,
-* Multi-output models,
-* Models with shared layers (the same layer called several times),
-* Models with non-sequential data flows (e.g. residual connections).
-
-Building a model with the functional API works like this:
-
-1. A layer instance is callable and returns a tensor.
-2. Input tensors and output tensors are used to define a `tf.keras.Model`
-   instance.
-3. This model is trained just like the `Sequential` model.
-
-The following example uses the functional API to build a simple, fully-connected
-network:
-
-```python
-inputs = keras.Input(shape=(32,))  # Returns a placeholder tensor
-
-# A layer instance is callable on a tensor, and returns a tensor.
-x = keras.layers.Dense(64, activation='relu')(inputs)
-x = keras.layers.Dense(64, activation='relu')(x)
-predictions = keras.layers.Dense(10, activation='softmax')(x)
-
-# Instantiate the model given inputs and outputs.
-model = keras.Model(inputs=inputs, outputs=predictions)
-
-# The compile step specifies the training configuration.
-model.compile(optimizer=tf.train.RMSPropOptimizer(0.001),
-              loss='categorical_crossentropy',
-              metrics=['accuracy'])
-
-# Trains for 5 epochs
-model.fit(data, labels, batch_size=32, epochs=5)
-```
-
-### Model subclassing
-
-Build a fully-customizable model by subclassing `tf.keras.Model` and defining
-your own forward pass. Create layers in the `__init__` method and set them as
-attributes of the class instance. Define the forward pass in the `call` method.
-
-Model subclassing is particularly useful when
-[eager execution](/programmers_guide/eager) is enabled since the forward pass
-can be written imperatively.
-
-Key Point: Use the right API for the job. While model subclassing offers
-flexibility, it comes at a cost of greater complexity and more opportunities for
-user errors. If possible, prefer the functional API.
-
-The following example shows a subclassed `tf.keras.Model` using a custom forward
-pass:
-
-```python
-class MyModel(keras.Model):
-
-  def __init__(self, num_classes=10):
-    super(MyModel, self).__init__(name='my_model')
-    self.num_classes = num_classes
-    # Define your layers here.
-    self.dense_1 = keras.layers.Dense(32, activation='relu')
-    self.dense_2 = keras.layers.Dense(num_classes, activation='sigmoid')
-
-  def call(self, inputs):
-    # Define your forward pass here,
-    # using layers you previously defined (in `__init__`).
-    x = self.dense_1(inputs)
-    return self.dense_2(x)
-
-  def compute_output_shape(self, input_shape):
-    # You need to override this function if you want to use the subclassed model
-    # as part of a functional-style model.
-    # Otherwise, this method is optional.
-    shape = tf.TensorShape(input_shape).as_list()
-    shape[-1] = self.num_classes
-    return tf.TensorShape(shape)
-
-
-# Instantiates the subclassed model.
-model = MyModel(num_classes=10)
-
-# The compile step specifies the training configuration.
-model.compile(optimizer=tf.train.RMSPropOptimizer(0.001),
-              loss='categorical_crossentropy',
-              metrics=['accuracy'])
-
-# Trains for 5 epochs.
-model.fit(data, labels, batch_size=32, epochs=5)
-```
-
-
-### Custom layers
-
-Create a custom layer by subclassing `tf.keras.layers.Layer` and implementing
-the following methods:
-
-* `build`: Create the weights of the layer. Add weights with the `add_weight`
-  method.
-* `call`: Define the forward pass.
-* `compute_output_shape`: Specify how to compute the output shape of the layer
-  given the input shape.
-* Optionally, a layer can be serialized by implementing the `get_config` method
-  and the `from_config` class method.
-
-Here's an example of a custom layer that implements a `matmul` of an input with
-a kernel matrix:
-
-```python
-class MyLayer(keras.layers.Layer):
-
-  def __init__(self, output_dim, **kwargs):
-    self.output_dim = output_dim
-    super(MyLayer, self).__init__(**kwargs)
-
-  def build(self, input_shape):
-    shape = tf.TensorShape((input_shape[1], self.output_dim))
-    # Create a trainable weight variable for this layer.
-    self.kernel = self.add_weight(name='kernel',
-                                  shape=shape,
-                                  initializer='uniform',
-                                  trainable=True)
-    # Be sure to call this at the end
-    super(MyLayer, self).build(input_shape)
-
-  def call(self, inputs):
-    return tf.matmul(inputs, self.kernel)
-
-  def compute_output_shape(self, input_shape):
-    shape = tf.TensorShape(input_shape).as_list()
-    shape[-1] = self.output_dim
-    return tf.TensorShape(shape)
-
-  def get_config(self):
-    base_config = super(MyLayer, self).get_config()
-    base_config['output_dim'] = self.output_dim
-
-  @classmethod
-  def from_config(cls, config):
-    return cls(**config)
-
-
-# Create a model using the custom layer
-model = keras.Sequential([MyLayer(10),
-                          keras.layers.Activation('softmax')])
-
-# The compile step specifies the training configuration
-model.compile(optimizer=tf.train.RMSPropOptimizer(0.001),
-              loss='categorical_crossentropy',
-              metrics=['accuracy'])
-
-# Trains for 5 epochs.
-model.fit(data, targets, batch_size=32, epochs=5)
-```
-
-
-## Callbacks
-
-A callback is an object passed to a model to customize and extend its behavior
-during training. You can write your own custom callback, or use the built-in
-`tf.keras.callbacks` that include:
-
-* `tf.keras.callbacks.ModelCheckpoint`: Save checkpoints of your model at
-  regular intervals.
-* `tf.keras.callbacks.LearningRateScheduler`: Dynamically change the learning
-  rate.
-* `tf.keras.callbacks.EarlyStopping`: Interrupt training when validation
-  performance has stopped improving.
-* `tf.keras.callbacks.TensorBoard`: Monitor the model's behavior using
-  [TensorBoard](/programmers_guide/summaries_and_tensorboard).
-
-To use a `tf.keras.callbacks.Callback`, pass it to the model's `fit` method:
-
-```python
-callbacks = [
-  # Interrupt training if `val_loss` stops improving for over 2 epochs
-  keras.callbacks.EarlyStopping(patience=2, monitor='val_loss'),
-  # Write TensorBoard logs to `./logs` directory
-  keras.callbacks.TensorBoard(log_dir='./logs')
-]
-model.fit(data, labels, batch_size=32, epochs=5, callbacks=callbacks,
-          validation_data=(val_data, val_targets))
-```
-
-
-## Save and restore
-
-### Weights only
-
-Save and load the weights of a model using `tf.keras.Model.save_weights`:
-
-```python
-# Save weights to a TensorFlow Checkpoint file
-model.save_weights('./my_model')
-
-# Restore the model's state,
-# this requires a model with the same architecture.
-model.load_weights('my_model')
-```
-
-By default, this saves the model's weights in the
-[TensorFlow checkpoint](/get_started/checkpoints) file format. Weights can also
-be saved to the Keras HDF5 format (the default for the multi-backend
-implementation of Keras):
-
-```python
-# Save weights to a HDF5 file
-model.save_weights('my_model.h5', save_format='h5')
-
-# Restore the model's state
-model.load_weights('my_model.h5')
-```
-
-
-### Configuration only
-
-A model's configuration can be saved—this serializes the model architecture
-without any weights. A saved configuration can recreate and initialize the same
-model, even without the code that defined the original model. Keras supports
-JSON and YAML serialization formats:
-
-```python
-# Serialize a model to JSON format
-json_string = model.to_json()
-
-# Recreate the model (freshly initialized)
-fresh_model = keras.models.from_json(json_string)
-
-# Serializes a model to YAML format
-yaml_string = model.to_yaml()
-
-# Recreate the model
-fresh_model = keras.models.from_yaml(yaml_string)
-```
-
-Caution: Subclassed models are not serializable because their architecture is
-defined by the Python code in the body of the `call` method.
-
-
-### Entire model
-
-The entire model can be saved to a file that contains the weight values, the
-model's configuration, and even the optimizer's configuration. This allows you
-to checkpoint a model and resume training later—from the exact same
-state—without access to the original code.
-
-```python
-# Create a trivial model
-model = keras.Sequential([
-  keras.layers.Dense(10, activation='softmax', input_shape=(32,)),
-  keras.layers.Dense(10, activation='softmax')
-])
-model.compile(optimizer='rmsprop',
-              loss='categorical_crossentropy',
-              metrics=['accuracy'])
-model.fit(data, targets, batch_size=32, epochs=5)
-
-
-# Save entire model to a HDF5 file
-model.save('my_model.h5')
-
-# Recreate the exact same model, including weights and optimizer.
-model = keras.models.load_model('my_model.h5')
-```
-
-
-## Eager execution
-
-[Eager execution](/programmers_guide/eager) is an imperative programming
-environment that evaluates operations immediately. This is not required for
-Keras, but is supported by `tf.keras` and useful for inspecting your program and
-debugging.
-
-All of the `tf.keras` model-building APIs are compatible with eager execution.
-And while the `Sequential` and functional APIs can be used, eager execution
-especially benefits *model subclassing* and building *custom layers*—the APIs
-that require you to write the forward pass as code (instead of the APIs that
-create models by assembling existing layers).
-
-See the [eager execution guide](/programmers_guide/eager#build_a_model) for
-examples of using Keras models with custom training loops and `tf.GradientTape`.
-
-
-## Distribution
-
-### Estimators
-
-The [Estimators](/programmers_guide/estimators) API is used for training models
-for distributed environments. This targets industry use cases such as
-distributed training on large datasets that can export a model for production.
-
-A `tf.keras.Model` can be trained with the `tf.estimator` API by converting the
-model to an `tf.estimator.Estimator` object with
-`tf.keras.estimator.model_to_estimator`. See
-[Creating Estimators from Keras models](/programmers_guide/estimators#creating_estimators_from_keras_models).
-
-```python
-model = keras.Sequential([layers.Dense(10,activation='softmax'),
-                          layers.Dense(10,activation='softmax')])
-
-model.compile(optimizer=tf.train.RMSPropOptimizer(0.001),
-              loss='categorical_crossentropy',
-              metrics=['accuracy'])
-
-estimator = keras.estimator.model_to_estimator(model)
-```
-
-Note: Enable [eager execution](/programmers_guide/eager) for debugging
-[Estimator input functions](/programmers_guide/premade_estimators#create_input_functions)
-and inspecting data.
-
-### Multiple GPUs
-
-`tf.keras` models can run on multiple GPUs using
-`tf.contrib.distribute.DistributionStrategy`. This API provides distributed
-training on multiple GPUs with almost no changes to existing code.
-
-Currently, `tf.contrib.distribute.MirroredStrategy` is the only supported
-distribution strategy. `MirroredStrategy` does in-graph replication with
-synchronous training using all-reduce on a single machine. To use
-`DistributionStrategy` with Keras, convert the `tf.keras.Model` to a
-`tf.estimator.Estimator` with `tf.keras.estimator.model_to_estimator`, then
-train the estimator
-
-The following example distributes a `tf.keras.Model` across multiple GPUs on a
-single machine.
-
-First, define a simple model:
-
-```python
-model = keras.Sequential()
-model.add(keras.layers.Dense(16, activation='relu', input_shape=(10,)))
-model.add(keras.layers.Dense(1, activation='sigmoid'))
-
-optimizer = tf.train.GradientDescentOptimizer(0.2)
-
-model.compile(loss='binary_crossentropy', optimizer=optimizer)
-model.summary()
-```
-
-Convert the Keras model to a `tf.estimator.Estimator` instance:
-
-```python
-keras_estimator = keras.estimator.model_to_estimator(
-  keras_model=model,
-  config=config,
-  model_dir='/tmp/model_dir')
-```
-
-Define an *input pipeline*. The `input_fn` returns a `tf.data.Dataset` object
-used to distribute the data across multiple devices—with each device processing
-a slice of the input batch.
-
-```python
-def input_fn():
-  x = np.random.random((1024, 10))
-  y = np.random.randint(2, size=(1024, 1))
-  x = tf.cast(x, tf.float32)
-  dataset = tf.data.Dataset.from_tensor_slices((x, y))
-  dataset = dataset.repeat(10)
-  dataset = dataset.batch(32)
-  return dataset
-```
-
-Next, create a `tf.estimator.RunConfig` and set the `train_distribute` argument
-to the `tf.contrib.distribute.MirroredStrategy` instance. When creating
-`MirroredStrategy`, you can specify a list of devices or set the `num_gpus`
-argument. The default uses all available GPUs, like the following:
-
-```python
-strategy = tf.contrib.distribute.MirroredStrategy()
-config = tf.estimator.RunConfig(train_distribute=strategy)
-```
-
-Finally, train the `Estimator` instance by providing the `input_fn` and `steps`
-arguments:
-
-```python
-keras_estimator.train(input_fn=input_fn, steps=10)
-```
diff --git a/tensorflow/docs_src/programmers_guide/leftnav_files b/tensorflow/docs_src/programmers_guide/leftnav_files
deleted file mode 100644
index 357a2a1cb9..0000000000
--- a/tensorflow/docs_src/programmers_guide/leftnav_files
+++ /dev/null
@@ -1,40 +0,0 @@
-index.md
-
-### High Level APIs
-keras.md
-eager.md
-datasets.md
-
-### Estimators
-estimators.md: Introduction to Estimators
-premade_estimators.md
-custom_estimators.md
-feature_columns.md
-datasets_for_estimators.md
-checkpoints.md
-
-### Accelerators
-using_gpu.md
-using_tpu.md
-
-### Low Level APIs
-low_level_intro.md
-tensors.md
-variables.md
-graphs.md
-saved_model.md
-
-### ML Concepts
-embedding.md
-
-### Debugging
-debugger.md
-
-### TensorBoard
-summaries_and_tensorboard.md: Visualizing Learning
-graph_viz.md: Graphs
-tensorboard_histograms.md: Histograms
-
-### Misc
-version_compat.md
-faq.md
diff --git a/tensorflow/docs_src/programmers_guide/low_level_intro.md b/tensorflow/docs_src/programmers_guide/low_level_intro.md
deleted file mode 100644
index 478e2bb70b..0000000000
--- a/tensorflow/docs_src/programmers_guide/low_level_intro.md
+++ /dev/null
@@ -1,604 +0,0 @@
-# Introduction
-
-This guide gets you started programming in the low-level TensorFlow APIs
-(TensorFlow Core), showing you how to:
-
-  * Manage your own TensorFlow program (a `tf.Graph`) and TensorFlow
-    runtime (a `tf.Session`), instead of relying on Estimators to manage them.
-  * Run TensorFlow operations, using a `tf.Session`.
-  * Use high level components ([datasets](#datasets), [layers](#layers), and
-    [feature_columns](#feature_columns)) in this low level environment.
-  * Build your own training loop, instead of using the one
-    @{$premade_estimators$provided by Estimators}.
-
-We recommend using the higher level APIs to build models when possible.
-Knowing TensorFlow Core is valuable for the following reasons:
-
-  * Experimentation and debugging are both more straight forward
-    when you can use low level TensorFlow operations directly.
-  * It gives you a mental model of how things work internally when
-    using the higher level APIs.
-
-## Setup
-
-Before using this guide, @{$install$install TensorFlow}.
-
-To get the most out of this guide, you should know the following:
-
-*   How to program in Python.
-*   At least a little bit about arrays.
-*   Ideally, something about machine learning.
-
-Feel free to launch `python` and follow along with this walkthrough.
-Run the following lines to set up your Python environment:
-
-```python
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import tensorflow as tf
-```
-
-## Tensor Values
-
-The central unit of data in TensorFlow is the **tensor**. A tensor consists of a
-set of primitive values shaped into an array of any number of dimensions. A
-tensor's **rank** is its number of dimensions, while its **shape** is a tuple
-of integers specifying the array's length along each dimension. Here are some
-examples of tensor values:
-
-```python
-3. # a rank 0 tensor; a scalar with shape [],
-[1., 2., 3.] # a rank 1 tensor; a vector with shape [3]
-[[1., 2., 3.], [4., 5., 6.]] # a rank 2 tensor; a matrix with shape [2, 3]
-[[[1., 2., 3.]], [[7., 8., 9.]]] # a rank 3 tensor with shape [2, 1, 3]
-```
-
-TensorFlow uses numpy arrays to represent tensor **values**.
-
-## TensorFlow Core Walkthrough
-
-You might think of TensorFlow Core programs as consisting of two discrete
-sections:
-
-1.  Building the computational graph (a @{tf.Graph}).
-2.  Running the computational graph (using a @{tf.Session}).
-
-### Graph
-
-A **computational graph** is a series of TensorFlow operations arranged into a
-graph. The graph is composed of two types of objects.
-
-  * @{tf.Operation$Operations} (or "ops"): The nodes of the graph.
-    Operations describe calculations that consume and produce tensors.
-  * @{tf.Tensor$Tensors}: The edges in the graph. These represent the values
-    that will flow through the graph. Most TensorFlow functions return
-    `tf.Tensors`.
-
-Important: `tf.Tensors` do not have values, they are just handles to elements
-in the computation graph.
-
-Let's build a simple computational graph. The most basic operation is a
-constant. The Python function that builds the operation takes a tensor value as
-input. The resulting operation takes no inputs. When run, it outputs the
-value that was passed to the constructor. We can create two floating point
-constants `a` and `b` as follows:
-
-```python
-a = tf.constant(3.0, dtype=tf.float32)
-b = tf.constant(4.0) # also tf.float32 implicitly
-total = a + b
-print(a)
-print(b)
-print(total)
-```
-
-The print statements produce:
-
-```
-Tensor("Const:0", shape=(), dtype=float32)
-Tensor("Const_1:0", shape=(), dtype=float32)
-Tensor("add:0", shape=(), dtype=float32)
-```
-
-Notice that printing the tensors does not output the values `3.0`, `4.0`, and
-`7.0` as you might expect. The above statements only build the computation
-graph. These `tf.Tensor` objects just represent the results of the operations
-that will be run.
-
-Each operation in a graph is given a unique name. This name is independent of
-the names the objects are assigned to in Python. Tensors are named after the
-operation that produces them followed by an output index, as in
-`"add:0"` above.
-
-### TensorBoard
-
-TensorFlow provides a utility called TensorBoard. One of TensorBoard's many
-capabilities is visualizing a computation graph. You can easily do this with
-a few simple commands.
-
-First you save the computation graph to a TensorBoard summary file as
-follows:
-
-```
-writer = tf.summary.FileWriter('.')
-writer.add_graph(tf.get_default_graph())
-```
-
-This will produce an `event` file in the current directory with a name in the
-following format:
-
-```
-events.out.tfevents.{timestamp}.{hostname}
-```
-
-Now, in a new terminal, launch TensorBoard with the following shell command:
-
-```bsh
-tensorboard --logdir .
-```
-
-Then open TensorBoard's [graphs page](http://localhost:6006/#graphs) in your
-browser, and you should see a graph similar to the following:
-
-![TensorBoard screenshot](https://www.tensorflow.org/images/getting_started_add.png)
-
-For more about TensorBoard's graph visualization tools see @{$graph_viz}.
-
-### Session
-
-To evaluate tensors, instantiate a @{tf.Session} object, informally known as a
-**session**. A session encapsulates the state of the TensorFlow runtime, and
-runs TensorFlow operations. If a `tf.Graph` is like a `.py` file, a `tf.Session`
-is like the `python` executable.
-
-The following code creates a `tf.Session` object and then invokes its `run`
-method to evaluate the `total` tensor we created above:
-
-```python
-sess = tf.Session()
-print(sess.run(total))
-```
-
-When you request the output of a node with `Session.run` TensorFlow backtracks
-through the graph and runs all the nodes that provide input to the requested
-output node. So this prints the expected value of 7.0:
-
-```
-7.0
-```
-
-You can pass multiple tensors to `tf.Session.run`. The `run` method
-transparently handles any combination of tuples or dictionaries, as in the
-following example:
-
-```python
-print(sess.run({'ab':(a, b), 'total':total}))
-```
-
-which returns the results in a structure of the same layout:
-
-``` None
-{'total': 7.0, 'ab': (3.0, 4.0)}
-```
-
-During a call to `tf.Session.run` any `tf.Tensor` only has a single value.
-For example, the following code calls `tf.random_uniform` to produce a
-`tf.Tensor` that generates a random 3-element vector (with values in `[0,1)`):
-
-```python
-vec = tf.random_uniform(shape=(3,))
-out1 = vec + 1
-out2 = vec + 2
-print(sess.run(vec))
-print(sess.run(vec))
-print(sess.run((out1, out2)))
-```
-
-The result shows a different random value on each call to `run`, but
-a consistent value during a single `run` (`out1` and `out2` receive the same
-random input):
-
-```
-[ 0.52917576  0.64076328  0.68353939]
-[ 0.66192627  0.89126778  0.06254101]
-(
-  array([ 1.88408756,  1.87149239,  1.84057522], dtype=float32),
-  array([ 2.88408756,  2.87149239,  2.84057522], dtype=float32)
-)
-```
-
-Some TensorFlow functions return `tf.Operations` instead of `tf.Tensors`.
-The result of calling `run` on an Operation is `None`. You run an operation
-to cause a side-effect, not to retrieve a value. Examples of this include the
-[initialization](#Initializing Layers), and [training](#Training) ops
-demonstrated later.
-
-### Feeding
-
-As it stands, this graph is not especially interesting because it always
-produces a constant result. A graph can be parameterized to accept external
-inputs, known as **placeholders**. A **placeholder** is a promise to provide a
-value later, like a function argument.
-
-```python
-x = tf.placeholder(tf.float32)
-y = tf.placeholder(tf.float32)
-z = x + y
-```
-
-The preceding three lines are a bit like a function in which we
-define two input parameters (`x` and `y`) and then an operation on them. We can
-evaluate this graph with multiple inputs by using the `feed_dict` argument of
-the @{tf.Session.run$run method} to feed concrete values to the placeholders:
-
-```python
-print(sess.run(z, feed_dict={x: 3, y: 4.5}))
-print(sess.run(z, feed_dict={x: [1, 3], y: [2, 4]}))
-```
-This results in the following output:
-
-```
-7.5
-[ 3.  7.]
-```
-
-Also note that the `feed_dict` argument can be used to overwrite any tensor in
-the graph. The only difference between placeholders and other `tf.Tensors` is
-that placeholders throw an error if no value is fed to them.
-
-## Datasets
-
-Placeholders work for simple experiments, but @{tf.data$Datasets} are the
-preferred method of streaming data into a model.
-
-To get a runnable `tf.Tensor` from a Dataset you must first convert it to a
-@{tf.data.Iterator}, and then call the Iterator's
-@{tf.data.Iterator.get_next$`get_next`} method.
-
-The simplest way to create an Iterator is with the
-@{tf.data.Dataset.make_one_shot_iterator$`make_one_shot_iterator`} method.
-For example, in the following code the `next_item` tensor will return a row from
-the `my_data` array on each `run` call:
-
-``` python
-my_data = [
-    [0, 1,],
-    [2, 3,],
-    [4, 5,],
-    [6, 7,],
-]
-slices = tf.data.Dataset.from_tensor_slices(my_data)
-next_item = slices.make_one_shot_iterator().get_next()
-```
-
-Reaching the end of the data stream causes `Dataset` to throw an
-@{tf.errors.OutOfRangeError$`OutOfRangeError`}. For example, the following code
-reads the `next_item` until there is no more data to read:
-
-``` python
-while True:
-  try:
-    print(sess.run(next_item))
-  except tf.errors.OutOfRangeError:
-    break
-```
-
-If the `Dataset` depends on stateful operations you may need to
-initialize the iterator before using it, as shown below:
-
-``` python
-r = tf.random_normal([10,3])
-dataset = tf.data.Dataset.from_tensor_slices(r)
-iterator = dataset.make_initializable_iterator()
-next_row = iterator.get_next()
-
-sess.run(iterator.initializer)
-while True:
-  try:
-    print(sess.run(next_row))
-  except tf.errors.OutOfRangeError:
-    break
-```
-
-For more details on Datasets and Iterators see: @{$programmers_guide/datasets}.
-
-## Layers
-
-A trainable model must modify the values in the graph to get new outputs with
-the same input.  @{tf.layers$Layers} are the preferred way to add trainable
-parameters to a graph.
-
-Layers package together both the variables and the operations that act
-on them. For example a
-[densely-connected layer](https://developers.google.com/machine-learning/glossary/#fully_connected_layer)
-performs a weighted sum across all inputs
-for each output and applies an optional
-[activation function](https://developers.google.com/machine-learning/glossary/#activation_function).
-The connection weights and biases are managed by the layer object.
-
-### Creating Layers
-
-The following code creates a @{tf.layers.Dense$`Dense`} layer that takes a
-batch of input vectors, and produces a single output value for each. To apply a
-layer to an input, call the layer as if it were a function. For example:
-
-```python
-x = tf.placeholder(tf.float32, shape=[None, 3])
-linear_model = tf.layers.Dense(units=1)
-y = linear_model(x)
-```
-
-The layer inspects its input to determine sizes for its internal variables. So
-here we must set the shape of the `x` placeholder so that the layer can
-build a weight matrix of the correct size.
-
-Now that we have defined the calculation of the output, `y`, there is one more
-detail we need to take care of before we run the calculation.
-
-### Initializing Layers
-
-The layer contains variables that must be **initialized** before they can be
-used. While it is possible to initialize variables individually, you can easily
-initialize all the variables in a TensorFlow graph as follows:
-
-```python
-init = tf.global_variables_initializer()
-sess.run(init)
-```
-
-Important: Calling `tf.global_variables_initializer` only
-creates and returns a handle to a TensorFlow operation. That op
-will initialize all the global variables when we run it with `tf.Session.run`.
-
-Also note that this `global_variables_initializer` only initializes variables
-that existed in the graph when the  initializer was created. So the initializer
-should be one of the last things added during graph construction.
-
-### Executing Layers
-
-Now that the layer is initialized, we can evaluate the `linear_model`'s output
-tensor as we would any other tensor. For example, the following code:
-
-```python
-print(sess.run(y, {x: [[1, 2, 3],[4, 5, 6]]}))
-```
-
-will generate a two-element output vector such as the following:
-
-```
-[[-3.41378999]
- [-9.14999008]]
-```
-
-### Layer Function shortcuts
-
-For each layer class (like @{tf.layers.Dense}) TensorFlow also supplies a
-shortcut function (like @{tf.layers.dense}). The only difference is that the
-shortcut function versions create and run the layer in a single call. For
-example, the following code is equivalent to the earlier version:
-
-```python
-x = tf.placeholder(tf.float32, shape=[None, 3])
-y = tf.layers.dense(x, units=1)
-
-init = tf.global_variables_initializer()
-sess.run(init)
-
-print(sess.run(y, {x: [[1, 2, 3], [4, 5, 6]]}))
-```
-
-While convenient, this approach allows no access to the @{tf.layers.Layer}
-object. This makes introspection and debugging more difficult,
-and layer reuse impossible.
-
-## Feature columns
-
-The easiest way to experiment with feature columns is using the
-@{tf.feature_column.input_layer} function. This function only accepts
-@{$feature_columns$dense columns} as inputs, so to view the result
-of a categorical column you must wrap it in an
-@{tf.feature_column.indicator_column}. For example:
-
-``` python
-features = {
-    'sales' : [[5], [10], [8], [9]],
-    'department': ['sports', 'sports', 'gardening', 'gardening']}
-
-department_column = tf.feature_column.categorical_column_with_vocabulary_list(
-        'department', ['sports', 'gardening'])
-department_column = tf.feature_column.indicator_column(department_column)
-
-columns = [
-    tf.feature_column.numeric_column('sales'),
-    department_column
-]
-
-inputs = tf.feature_column.input_layer(features, columns)
-```
-
-Running the `inputs` tensor will parse the `features` into a batch of vectors.
-
-Feature columns can have internal state, like layers, so they often need to be
-initialized. Categorical columns use @{tf.contrib.lookup$lookup tables}
-internally and these require a separate initialization op,
-@{tf.tables_initializer}.
-
-``` python
-var_init = tf.global_variables_initializer()
-table_init = tf.tables_initializer()
-sess = tf.Session()
-sess.run((var_init, table_init))
-```
-
-Once the internal state has been initialized you can run `inputs` like any
-other `tf.Tensor`:
-
-```python
-print(sess.run(inputs))
-```
-
-This shows how the feature columns have packed the input vectors, with the
-one-hot "department" as the first two indices and "sales" as the third.
-
-```None
-[[  1.   0.   5.]
- [  1.   0.  10.]
- [  0.   1.   8.]
- [  0.   1.   9.]]
-```
-
-## Training
-
-Now that you're familiar with the basics of core TensorFlow, let's train a
-small regression model manually.
-
-### Define the data
-
-First let's define some inputs, `x`, and the expected output for each input,
-`y_true`:
-
-```python
-x = tf.constant([[1], [2], [3], [4]], dtype=tf.float32)
-y_true = tf.constant([[0], [-1], [-2], [-3]], dtype=tf.float32)
-```
-
-### Define the model
-
-Next, build a simple linear model, with 1 output:
-
-``` python
-linear_model = tf.layers.Dense(units=1)
-
-y_pred = linear_model(x)
-```
-
-You can evaluate the predictions as follows:
-
-``` python
-sess = tf.Session()
-init = tf.global_variables_initializer()
-sess.run(init)
-
-print(sess.run(y_pred))
-```
-
-The model hasn't yet been trained, so the four "predicted" values aren't very
-good. Here's what we got; your own output will almost certainly differ:
-
-``` None
-[[ 0.02631879]
- [ 0.05263758]
- [ 0.07895637]
- [ 0.10527515]]
-```
-
-### Loss
-
-To optimize a model, you first need to define the loss. We'll use the mean
-square error, a standard loss for regression problems.
-
-While you could do this manually with lower level math operations,
-the @{tf.losses} module provides a set of common loss functions. You can use it
-to calculate the mean square error as follows:
-
-``` python
-loss = tf.losses.mean_squared_error(labels=y_true, predictions=y_pred)
-
-print(sess.run(loss))
-```
-This will produce a loss value, something like:
-
-``` None
-2.23962
-```
-
-### Training
-
-TensorFlow provides
-[**optimizers**](https://developers.google.com/machine-learning/glossary/#optimizer)
-implementing standard optimization algorithms. These are implemented as
-sub-classes of @{tf.train.Optimizer}. They incrementally change each
-variable in order to minimize the loss. The simplest optimization algorithm is
-[**gradient descent**](https://developers.google.com/machine-learning/glossary/#gradient_descent),
-implemented by @{tf.train.GradientDescentOptimizer}. It modifies each
-variable according to the magnitude of the derivative of loss with respect to
-that variable. For example:
-
-```python
-optimizer = tf.train.GradientDescentOptimizer(0.01)
-train = optimizer.minimize(loss)
-```
-
-This code builds all the graph components necessary for the optimization, and
-returns a training operation. When run, the training op will update variables
-in the graph. You might run it as follows:
-
-```python
-for i in range(100):
-  _, loss_value = sess.run((train, loss))
-  print(loss_value)
-```
-
-Since `train` is an op, not a tensor, it doesn't return a value when run.
-To see the progression of the loss during training, we run the loss tensor at
-the same time, producing output like the following:
-
-``` None
-1.35659
-1.00412
-0.759167
-0.588829
-0.470264
-0.387626
-0.329918
-0.289511
-0.261112
-0.241046
-...
-```
-
-### Complete program
-
-```python
-x = tf.constant([[1], [2], [3], [4]], dtype=tf.float32)
-y_true = tf.constant([[0], [-1], [-2], [-3]], dtype=tf.float32)
-
-linear_model = tf.layers.Dense(units=1)
-
-y_pred = linear_model(x)
-loss = tf.losses.mean_squared_error(labels=y_true, predictions=y_pred)
-
-optimizer = tf.train.GradientDescentOptimizer(0.01)
-train = optimizer.minimize(loss)
-
-init = tf.global_variables_initializer()
-
-sess = tf.Session()
-sess.run(init)
-for i in range(100):
-  _, loss_value = sess.run((train, loss))
-  print(loss_value)
-
-print(sess.run(y_pred))
-```
-
-## Next steps
-
-To learn more about building models with TensorFlow consider the following:
-
-* @{$custom_estimators$Custom Estimators}, to learn how to build
-  customized models with TensorFlow. Your knowledge of TensorFlow Core will
-  help you understand and debug your own models.
-
-If you want to learn more about the inner workings of TensorFlow consider the
-following documents, which go into more depth on many of the topics discussed
-here:
-
-* @{$graphs}
-* @{$tensors}
-* @{$variables}
-
-
diff --git a/tensorflow/docs_src/programmers_guide/premade_estimators.md b/tensorflow/docs_src/programmers_guide/premade_estimators.md
deleted file mode 100644
index 02e2caf64b..0000000000
--- a/tensorflow/docs_src/programmers_guide/premade_estimators.md
+++ /dev/null
@@ -1,430 +0,0 @@
-# Premade Estimators
-
-This document introduces the TensorFlow programming environment and shows you
-how to solve the Iris classification problem in TensorFlow.
-
-## Prerequisites
-
-Prior to using the sample code in this document, you'll need to do the
-following:
-
-* @{$install$Install TensorFlow}.
-* If you installed TensorFlow with virtualenv or Anaconda, activate your
-  TensorFlow environment.
-* Install or upgrade pandas by issuing the following command:
-
-        pip install pandas
-
-## Getting the sample code
-
-Take the following steps to get the sample code we'll be going through:
-
-1. Clone the TensorFlow Models repository from GitHub by entering the following
-   command:
-
-        git clone https://github.com/tensorflow/models
-
-1. Change directory within that branch to the location containing the examples
-   used in this document:
-
-        cd models/samples/core/get_started/
-
-The program described in this document is
-[`premade_estimator.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/premade_estimator.py).
-This program uses
-[`iris_data.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/iris_data.py)
-to fetch its training data.
-
-### Running the program
-
-You run TensorFlow programs as you would run any Python program. For example:
-
-``` bsh
-python premade_estimator.py
-```
-
-The program should output training logs followed by some predictions against
-the test set. For example, the first line in the following output shows that
-the model thinks there is a 99.6% chance that the first example in the test
-set is a Setosa. Since the test set expected Setosa, this appears to be
-a good prediction.
-
-``` None
-...
-Prediction is "Setosa" (99.6%), expected "Setosa"
-
-Prediction is "Versicolor" (99.8%), expected "Versicolor"
-
-Prediction is "Virginica" (97.9%), expected "Virginica"
-```
-
-If the program generates errors instead of answers, ask yourself the following
-questions:
-
-* Did you install TensorFlow properly?
-* Are you using the correct version of TensorFlow?
-* Did you activate the environment you installed TensorFlow in? (This is
-  only relevant in certain installation mechanisms.)
-
-## The programming stack
-
-Before getting into the details of the program itself, let's investigate the
-programming environment. As the following illustration shows, TensorFlow
-provides a programming stack consisting of multiple API layers:
-
-<div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src="../images/tensorflow_programming_environment.png">
-</div>
-
-We strongly recommend writing TensorFlow programs with the following APIs:
-
-* @{$programmers_guide/estimators$Estimators}, which represent a complete model.
-  The Estimator API provides methods to train the model, to judge the model's
-  accuracy, and to generate predictions.
-* @{$programmers_guide/datasets_for_estimators}, which build a data input
-  pipeline. The Dataset API has methods to load and manipulate data, and feed
-  it into your model. The Dataset API meshes well with the Estimators API.
-
-## Classifying irises: an overview
-
-The sample program in this document builds and tests a model that
-classifies Iris flowers into three different species based on the size of their
-[sepals](https://en.wikipedia.org/wiki/Sepal) and
-[petals](https://en.wikipedia.org/wiki/Petal).
-
-<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%"
-  alt="Petal geometry compared for three iris species: Iris setosa, Iris virginica, and Iris versicolor"
-  src="../images/iris_three_species.jpg">
-</div>
-
-**From left to right,
-[*Iris setosa*](https://commons.wikimedia.org/w/index.php?curid=170298) (by
-[Radomil](https://commons.wikimedia.org/wiki/User:Radomil), CC BY-SA 3.0),
-[*Iris versicolor*](https://commons.wikimedia.org/w/index.php?curid=248095) (by
-[Dlanglois](https://commons.wikimedia.org/wiki/User:Dlanglois), CC BY-SA 3.0),
-and [*Iris virginica*](https://www.flickr.com/photos/33397993@N05/3352169862)
-(by [Frank Mayfield](https://www.flickr.com/photos/33397993@N05), CC BY-SA
-2.0).**
-
-### The data set
-
-The Iris data set contains four features and one
-[label](https://developers.google.com/machine-learning/glossary/#label).
-The four features identify the following botanical characteristics of
-individual Iris flowers:
-
-* sepal length
-* sepal width
-* petal length
-* petal width
-
-Our model will represent these features as `float32` numerical data.
-
-The label identifies the Iris species, which must be one of the following:
-
-* Iris setosa (0)
-* Iris versicolor (1)
-* Iris virginica (2)
-
-Our model will represent the label as `int32` categorical data.
-
-The following table shows three examples in the data set:
-
-|sepal length | sepal width | petal length | petal width| species (label) |
-|------------:|------------:|-------------:|-----------:|:---------------:|
-|         5.1 |         3.3 |          1.7 |        0.5 |   0 (Setosa)   |
-|         5.0 |         2.3 |          3.3 |        1.0 |   1 (versicolor)|
-|         6.4 |         2.8 |          5.6 |        2.2 |   2 (virginica) |
-
-### The algorithm
-
-The program trains a Deep Neural Network classifier model having the following
-topology:
-
-* 2 hidden layers.
-* Each hidden layer contains 10 nodes.
-
-The following figure illustrates the features, hidden layers, and predictions
-(not all of the nodes in the hidden layers are shown):
-
-<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%"
-  alt="A diagram of the network architecture: Inputs, 2 hidden layers, and outputs"
-  src="../images/custom_estimators/full_network.png">
-</div>
-
-### Inference
-
-Running the trained model on an unlabeled example yields three predictions,
-namely, the likelihood that this flower is the given Iris species. The sum of
-those output predictions will be 1.0. For example, the prediction on an
-unlabeled example might be something like the following:
-
-* 0.03 for Iris Setosa
-* 0.95 for Iris Versicolor
-* 0.02 for Iris Virginica
-
-The preceding prediction indicates a 95% probability that the given unlabeled
-example is an Iris Versicolor.
-
-## Overview of programming with Estimators
-
-An Estimator is TensorFlow's high-level representation of a complete model. It
-handles the details of initialization, logging, saving and restoring, and many
-other features so you can concentrate on your model. For more details see
-@{$programmers_guide/estimators}.
-
-An Estimator is any class derived from @{tf.estimator.Estimator}. TensorFlow
-provides a collection of
-@{tf.estimator$pre-made Estimators}
-(for example, `LinearRegressor`) to implement common ML algorithms. Beyond
-those, you may write your own
-@{$custom_estimators$custom Estimators}.
-We recommend using pre-made Estimators when just getting started.
-
-To write a TensorFlow program based on pre-made Estimators, you must perform the
-following tasks:
-
-* Create one or more input functions.
-* Define the model's feature columns.
-* Instantiate an Estimator, specifying the feature columns and various
-  hyperparameters.
-* Call one or more methods on the Estimator object, passing the appropriate
-  input function as the source of the data.
-
-Let's see how those tasks are implemented for Iris classification.
-
-## Create input functions
-
-You must create input functions to supply data for training,
-evaluating, and prediction.
-
-An **input function** is a function that returns a @{tf.data.Dataset} object
-which outputs the following two-element tuple:
-
-* [`features`](https://developers.google.com/machine-learning/glossary/#feature) - A Python dictionary in which:
-    * Each key is the name of a feature.
-    * Each value is an array containing all of that feature's values.
-* `label` - An array containing the values of the
-  [label](https://developers.google.com/machine-learning/glossary/#label) for
-  every example.
-
-Just to demonstrate the format of the input function, here's a simple
-implementation:
-
-```python
-def input_evaluation_set():
-    features = {'SepalLength': np.array([6.4, 5.0]),
-                'SepalWidth':  np.array([2.8, 2.3]),
-                'PetalLength': np.array([5.6, 3.3]),
-                'PetalWidth':  np.array([2.2, 1.0])}
-    labels = np.array([2, 1])
-    return features, labels
-```
-
-Your input function may generate the `features` dictionary and `label` list any
-way you like. However, we recommend using TensorFlow's Dataset API, which can
-parse all sorts of data. At a high level, the Dataset API consists of the
-following classes:
-
-<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%"
-  alt="A diagram showing subclasses of the Dataset class"
-  src="../images/dataset_classes.png">
-</div>
-
-Where the individual members are:
-
-* `Dataset` - Base class containing methods to create and transform
-  datasets. Also allows you to initialize a dataset from data in memory, or from
-  a Python generator.
-* `TextLineDataset` - Reads lines from text files.
-* `TFRecordDataset` - Reads records from TFRecord files.
-* `FixedLengthRecordDataset` - Reads fixed size records from binary files.
-* `Iterator` - Provides a way to access one data set element at a time.
-
-The Dataset API can handle a lot of common cases for you. For example,
-using the Dataset API, you can easily read in records from a large collection
-of files in parallel and join them into a single stream.
-
-To keep things simple in this example we are going to load the data with
-[pandas](https://pandas.pydata.org/), and build our input pipeline from this
-in-memory data.
-
-Here is the input function used for training in this program, which is available
-in [`iris_data.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/iris_data.py):
-
-``` python
-def train_input_fn(features, labels, batch_size):
-    """An input function for training"""
-    # Convert the inputs to a Dataset.
-    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
-
-    # Shuffle, repeat, and batch the examples.
-    return dataset.shuffle(1000).repeat().batch(batch_size)
-```
-
-## Define the feature columns
-
-A [**feature column**](https://developers.google.com/machine-learning/glossary/#feature_columns)
-is an object describing how the model should use raw input data from the
-features dictionary. When you build an Estimator model, you pass it a list of
-feature columns that describes each of the features you want the model to use.
-The @{tf.feature_column} module provides many options for representing data
-to the model.
-
-For Iris, the 4 raw features are numeric values, so we'll build a list of
-feature columns to tell the Estimator model to represent each of the four
-features as 32-bit floating-point values. Therefore, the code to create the
-feature column is:
-
-```python
-# Feature columns describe how to use the input.
-my_feature_columns = []
-for key in train_x.keys():
-    my_feature_columns.append(tf.feature_column.numeric_column(key=key))
-```
-
-Feature columns can be far more sophisticated than those we're showing here.  We
-detail feature columns @{$feature_columns$later on} in our Getting
-Started guide.
-
-Now that we have the description of how we want the model to represent the raw
-features, we can build the estimator.
-
-
-## Instantiate an estimator
-
-The Iris problem is a classic classification problem. Fortunately, TensorFlow
-provides several pre-made classifier Estimators, including:
-
-* @{tf.estimator.DNNClassifier} for deep models that perform multi-class
-  classification.
-* @{tf.estimator.DNNLinearCombinedClassifier} for wide & deep models.
-* @{tf.estimator.LinearClassifier} for classifiers based on linear models.
-
-For the Iris problem, `tf.estimator.DNNClassifier` seems like the best choice.
-Here's how we instantiated this Estimator:
-
-```python
-# Build a DNN with 2 hidden layers and 10 nodes in each hidden layer.
-classifier = tf.estimator.DNNClassifier(
-    feature_columns=my_feature_columns,
-    # Two hidden layers of 10 nodes each.
-    hidden_units=[10, 10],
-    # The model must choose between 3 classes.
-    n_classes=3)
-```
-
-## Train, Evaluate, and Predict
-
-Now that we have an Estimator object, we can call methods to do the following:
-
-* Train the model.
-* Evaluate the trained model.
-* Use the trained model to make predictions.
-
-### Train the model
-
-Train the model by calling the Estimator's `train` method as follows:
-
-```python
-# Train the Model.
-classifier.train(
-    input_fn=lambda:iris_data.train_input_fn(train_x, train_y, args.batch_size),
-    steps=args.train_steps)
-```
-
-Here we wrap up our `input_fn` call in a
-[`lambda`](https://docs.python.org/3/tutorial/controlflow.html)
-to capture the arguments while providing an input function that takes no
-arguments, as expected by the Estimator. The `steps` argument tells the method
-to stop training after a number of training steps.
-
-### Evaluate the trained model
-
-Now that the model has been trained, we can get some statistics on its
-performance. The following code block evaluates the accuracy of the trained
-model on the test data:
-
-```python
-# Evaluate the model.
-eval_result = classifier.evaluate(
-    input_fn=lambda:iris_data.eval_input_fn(test_x, test_y, args.batch_size))
-
-print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result))
-```
-
-Unlike our call to the `train` method, we did not pass the `steps`
-argument to evaluate. Our `eval_input_fn` only yields a single
-[epoch](https://developers.google.com/machine-learning/glossary/#epoch) of data.
-
-Running this code yields the following output (or something similar):
-
-```none
-Test set accuracy: 0.967
-```
-
-### Making predictions (inferring) from the trained model
-
-We now have a trained model that produces good evaluation results.
-We can now use the trained model to predict the species of an Iris flower
-based on some unlabeled measurements. As with training and evaluation, we make
-predictions using a single function call:
-
-```python
-# Generate predictions from the model
-expected = ['Setosa', 'Versicolor', 'Virginica']
-predict_x = {
-    'SepalLength': [5.1, 5.9, 6.9],
-    'SepalWidth': [3.3, 3.0, 3.1],
-    'PetalLength': [1.7, 4.2, 5.4],
-    'PetalWidth': [0.5, 1.5, 2.1],
-}
-
-predictions = classifier.predict(
-    input_fn=lambda:iris_data.eval_input_fn(predict_x,
-                                            batch_size=args.batch_size))
-```
-
-The `predict` method returns a Python iterable, yielding a dictionary of
-prediction results for each example. The following code prints a few
-predictions and their probabilities:
-
-
-``` python
-template = ('\nPrediction is "{}" ({:.1f}%), expected "{}"')
-
-for pred_dict, expec in zip(predictions, expected):
-    class_id = pred_dict['class_ids'][0]
-    probability = pred_dict['probabilities'][class_id]
-
-    print(template.format(iris_data.SPECIES[class_id],
-                          100 * probability, expec))
-```
-
-Running the preceding code yields the following output:
-
-``` None
-...
-Prediction is "Setosa" (99.6%), expected "Setosa"
-
-Prediction is "Versicolor" (99.8%), expected "Versicolor"
-
-Prediction is "Virginica" (97.9%), expected "Virginica"
-```
-
-
-## Summary
-
-Pre-made Estimators are an effective way to quickly create standard models.
-
-Now that you've gotten started writing TensorFlow programs, consider the
-following material:
-
-* @{$checkpoints$Checkpoints} to learn how to save and restore models.
-* @{$programmers_guide/datasets_for_estimators} to learn more about importing
-  data into your model.
-* @{$custom_estimators$Creating Custom Estimators} to learn how to
-  write your own Estimator, customized for a particular problem.
diff --git a/tensorflow/docs_src/programmers_guide/saved_model.md b/tensorflow/docs_src/programmers_guide/saved_model.md
deleted file mode 100644
index c6ef87c54a..0000000000
--- a/tensorflow/docs_src/programmers_guide/saved_model.md
+++ /dev/null
@@ -1,999 +0,0 @@
-# Save and Restore
-
-The @{tf.train.Saver} class provides methods to save and restore models. The
-@{tf.saved_model.simple_save} function is an easy way to build a
-@{tf.saved_model$saved model} suitable for serving.
-[Estimators](@{$programmers_guide/estimators}) automatically save and restore
-variables in the `model_dir`.
-
-## Save and restore variables
-
-TensorFlow @{$variables} are the best way to represent shared, persistent state
-manipulated by your program. The `tf.train.Saver` constructor adds `save` and
-`restore` ops to the graph for all, or a specified list, of the variables in the
-graph.  The `Saver` object provides methods to run these ops, specifying paths
-for the checkpoint files to write to or read from.
-
-`Saver` restores all variables already defined in your model. If you're
-loading a model without knowing how to build its graph (for example, if you're
-writing a generic program to load models), then read the
-[Overview of saving and restoring models](#models) section
-later in this document.
-
-TensorFlow saves variables in binary *checkpoint files* that map variable
-names to tensor values.
-
-Caution: TensorFlow model files are code. Be careful with untrusted code.
-See [Using TensorFlow Securely](https://github.com/tensorflow/tensorflow/blob/master/SECURITY.md)
-for details.
-
-### Save variables
-
-Create a `Saver` with `tf.train.Saver()` to manage all variables in the
-model. For example, the following snippet demonstrates how to call the
-`tf.train.Saver.save` method to save variables to checkpoint files:
-
-```python
-# Create some variables.
-v1 = tf.get_variable("v1", shape=[3], initializer = tf.zeros_initializer)
-v2 = tf.get_variable("v2", shape=[5], initializer = tf.zeros_initializer)
-
-inc_v1 = v1.assign(v1+1)
-dec_v2 = v2.assign(v2-1)
-
-# Add an op to initialize the variables.
-init_op = tf.global_variables_initializer()
-
-# Add ops to save and restore all the variables.
-saver = tf.train.Saver()
-
-# Later, launch the model, initialize the variables, do some work, and save the
-# variables to disk.
-with tf.Session() as sess:
-  sess.run(init_op)
-  # Do some work with the model.
-  inc_v1.op.run()
-  dec_v2.op.run()
-  # Save the variables to disk.
-  save_path = saver.save(sess, "/tmp/model.ckpt")
-  print("Model saved in path: %s" % save_path)
-```
-
-### Restore variables
-
-The `tf.train.Saver` object not only saves variables to checkpoint files, it
-also restores variables. Note that when you restore variables you do not have
-to initialize them beforehand. For example, the following snippet demonstrates
-how to call the `tf.train.Saver.restore` method to restore variables from the
-checkpoint files:
-
-```python
-tf.reset_default_graph()
-
-# Create some variables.
-v1 = tf.get_variable("v1", shape=[3])
-v2 = tf.get_variable("v2", shape=[5])
-
-# Add ops to save and restore all the variables.
-saver = tf.train.Saver()
-
-# Later, launch the model, use the saver to restore variables from disk, and
-# do some work with the model.
-with tf.Session() as sess:
-  # Restore variables from disk.
-  saver.restore(sess, "/tmp/model.ckpt")
-  print("Model restored.")
-  # Check the values of the variables
-  print("v1 : %s" % v1.eval())
-  print("v2 : %s" % v2.eval())
-```
-
-Note: There is not a physical file called `/tmp/model.ckpt`. It is the *prefix* of
-filenames created for the checkpoint. Users only interact with the prefix
-instead of physical checkpoint files.
-
-### Choose variables to save and restore
-
-If you do not pass any arguments to `tf.train.Saver()`, the saver handles all
-variables in the graph.  Each variable is saved under the name that was passed
-when the variable was created.
-
-It is sometimes useful to explicitly specify names for variables in the
-checkpoint files.  For example, you may have trained a model with a variable
-named `"weights"` whose value you want to restore into a variable named
-`"params"`.
-
-It is also sometimes useful to only save or restore a subset of the variables
-used by a model.  For example, you may have trained a neural net with five
-layers, and you now want to train a new model with six layers that reuses the
-existing weights of the five trained layers. You can use the saver to restore
-the weights of just the first five layers.
-
-You can easily specify the names and variables to save or load by passing to the
-`tf.train.Saver()` constructor either of the following:
-
-* A list of variables (which will be stored under their own names).
-* A Python dictionary in which keys are the names to use and the values are the
-variables to manage.
-
-Continuing from the save/restore examples shown earlier:
-
-```python
-tf.reset_default_graph()
-# Create some variables.
-v1 = tf.get_variable("v1", [3], initializer = tf.zeros_initializer)
-v2 = tf.get_variable("v2", [5], initializer = tf.zeros_initializer)
-
-# Add ops to save and restore only `v2` using the name "v2"
-saver = tf.train.Saver({"v2": v2})
-
-# Use the saver object normally after that.
-with tf.Session() as sess:
-  # Initialize v1 since the saver will not.
-  v1.initializer.run()
-  saver.restore(sess, "/tmp/model.ckpt")
-
-  print("v1 : %s" % v1.eval())
-  print("v2 : %s" % v2.eval())
-```
-
-Notes:
-
-*  You can create as many `Saver` objects as you want if you need to save and
-   restore different subsets of the model variables.  The same variable can be
-   listed in multiple saver objects; its value is only changed when the
-   `Saver.restore()` method is run.
-
-*  If you only restore a subset of the model variables at the start of a
-   session, you have to run an initialize op for the other variables.  See
-   @{tf.variables_initializer} for more information.
-
-*  To inspect the variables in a checkpoint, you can use the
-   [`inspect_checkpoint`](https://www.tensorflow.org/code/tensorflow/python/tools/inspect_checkpoint.py)
-   library, particularly the `print_tensors_in_checkpoint_file` function.
-
-*  By default, `Saver` uses the value of the @{tf.Variable.name} property
-   for each variable.  However, when you create a `Saver` object, you may
-   optionally choose names for the variables in the checkpoint files.
-
-
-### Inspect variables in a checkpoint
-
-We can quickly inspect variables in a checkpoint with the
-[`inspect_checkpoint`](https://www.tensorflow.org/code/tensorflow/python/tools/inspect_checkpoint.py) library.
-
-Continuing from the save/restore examples shown earlier:
-
-```python
-# import the inspect_checkpoint library
-from tensorflow.python.tools import inspect_checkpoint as chkp
-
-# print all tensors in checkpoint file
-chkp.print_tensors_in_checkpoint_file("/tmp/model.ckpt", tensor_name='', all_tensors=True)
-
-# tensor_name:  v1
-# [ 1.  1.  1.]
-# tensor_name:  v2
-# [-1. -1. -1. -1. -1.]
-
-# print only tensor v1 in checkpoint file
-chkp.print_tensors_in_checkpoint_file("/tmp/model.ckpt", tensor_name='v1', all_tensors=False)
-
-# tensor_name:  v1
-# [ 1.  1.  1.]
-
-# print only tensor v2 in checkpoint file
-chkp.print_tensors_in_checkpoint_file("/tmp/model.ckpt", tensor_name='v2', all_tensors=False)
-
-# tensor_name:  v2
-# [-1. -1. -1. -1. -1.]
-```
-
-
-<a name="models"></a>
-## Save and restore models
-
-Use `SavedModel` to save and load your model—variables, the graph, and the
-graph's metadata. This is a language-neutral, recoverable, hermetic
-serialization format that enables higher-level systems and tools to produce,
-consume, and transform TensorFlow models. TensorFlow provides several ways to
-interact with `SavedModel`, including the @{tf.saved_model} APIs,
-@{tf.estimator.Estimator}, and a command-line interface.
-
-
-## Build and load a SavedModel
-
-### Simple save
-
-The easiest way to create a `SavedModel` is to use the @{tf.saved_model.simple_save}
-function:
-
-```python
-simple_save(session,
-            export_dir,
-            inputs={"x": x, "y": y},
-            outputs={"z": z})
-```
-
-This configures the `SavedModel` so it can be loaded by
-[TensorFlow serving](/serving/serving_basic) and supports the
-[Predict API](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/apis/predict.proto).
-To access the classify, regress, or multi-inference APIs, use the manual
-`SavedModel` builder APIs or an @{tf.estimator.Estimator}.
-
-### Manually build a SavedModel
-
-If your use case isn't covered by @{tf.saved_model.simple_save}, use the manual
-@{tf.saved_model.builder$builder APIs} to create a `SavedModel`.
-
-The @{tf.saved_model.builder.SavedModelBuilder} class provides functionality to
-save multiple `MetaGraphDef`s.  A **MetaGraph** is a dataflow graph, plus
-its associated variables, assets, and signatures.  A **`MetaGraphDef`**
-is the protocol buffer representation of a MetaGraph.  A **signature** is
-the set of inputs to and outputs from a graph.
-
-If assets need to be saved and written or copied to disk, they can be provided
-when the first `MetaGraphDef` is added. If multiple `MetaGraphDef`s are
-associated with an asset of the same name, only the first version is retained.
-
-Each `MetaGraphDef` added to the SavedModel must be annotated with
-user-specified tags. The tags provide a means to identify the specific
-`MetaGraphDef` to load and restore, along with the shared set of variables
-and assets. These tags
-typically annotate a `MetaGraphDef` with its functionality (for example,
-serving or training), and optionally with hardware-specific aspects (for
-example, GPU).
-
-For example, the following code suggests a typical way to use
-`SavedModelBuilder` to build a SavedModel:
-
-```python
-export_dir = ...
-...
-builder = tf.saved_model.builder.SavedModelBuilder(export_dir)
-with tf.Session(graph=tf.Graph()) as sess:
-  ...
-  builder.add_meta_graph_and_variables(sess,
-                                       [tag_constants.TRAINING],
-                                       signature_def_map=foo_signatures,
-                                       assets_collection=foo_assets,
-                                       strip_default_attrs=True)
-...
-# Add a second MetaGraphDef for inference.
-with tf.Session(graph=tf.Graph()) as sess:
-  ...
-  builder.add_meta_graph([tag_constants.SERVING], strip_default_attrs=True)
-...
-builder.save()
-```
-
-<a name="forward_compatibility"></a>
-#### Forward compatibility via `strip_default_attrs=True`
-
-Following the guidance below gives you forward compatibility only if the set of
-Ops has not changed.
-
-The @{tf.saved_model.builder.SavedModelBuilder$`SavedModelBuilder`} class allows
-users to control whether default-valued attributes must be stripped from the
-@{$extend/tool_developers#nodes$`NodeDefs`}
-while adding a meta graph to the SavedModel bundle. Both
-@{tf.saved_model.builder.SavedModelBuilder.add_meta_graph_and_variables$`SavedModelBuilder.add_meta_graph_and_variables`}
-and @{tf.saved_model.builder.SavedModelBuilder.add_meta_graph$`SavedModelBuilder.add_meta_graph`}
-methods accept a Boolean flag `strip_default_attrs` that controls this behavior.
-
-If `strip_default_attrs` is `False`, the exported @{tf.MetaGraphDef} will have
-the default valued attributes in all its @{tf.NodeDef} instances.
-This can break forward compatibility with a sequence of events such as the
-following:
-
-*  An existing Op (`Foo`) is updated to include a new attribute (`T`) with a
-   default (`bool`) at version 101.
-*  A model producer such as a "trainer binary" picks up this change (version 101)
-   to the `OpDef` and re-exports an existing model that uses Op `Foo`.
-*  A model consumer (such as [Tensorflow Serving](/serving)) running an older
-   binary (version 100) doesn't have attribute `T` for Op `Foo`, but tries to
-   import this model. The model consumer doesn't recognize attribute `T` in a
-   `NodeDef` that uses Op `Foo` and therefore fails to load the model.
-*  By setting `strip_default_attrs` to True, the model producers can strip away
-   any default valued attributes in the `NodeDefs`. This helps ensure that newly
-   added attributes with defaults don't cause older model consumers to fail
-   loading models regenerated with newer training binaries.
-
-See [compatibility guidance](https://www.tensorflow.org/programmers_guide/version_compat)
-for more information.
-
-### Loading a SavedModel in Python
-
-The Python version of the SavedModel
-@{tf.saved_model.loader$loader}
-provides load and restore capability for a SavedModel. The `load` operation
-requires the following information:
-
-* The session in which to restore the graph definition and variables.
-* The tags used to identify the MetaGraphDef to load.
-* The location (directory) of the SavedModel.
-
-Upon a load, the subset of variables, assets, and signatures supplied as part of
-the specific MetaGraphDef will be restored into the supplied session.
-
-
-```python
-export_dir = ...
-...
-with tf.Session(graph=tf.Graph()) as sess:
-  tf.saved_model.loader.load(sess, [tag_constants.TRAINING], export_dir)
-  ...
-```
-
-
-### Load a SavedModel in C++
-
-The C++ version of the SavedModel
-[loader](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/cc/saved_model/loader.h)
-provides an API to load a SavedModel from a path, while allowing
-`SessionOptions` and `RunOptions`.
-You have to specify the tags associated with the graph to be loaded.
-The loaded version of SavedModel is referred to as `SavedModelBundle`
-and contains the MetaGraphDef and the session within which it is loaded.
-
-```c++
-const string export_dir = ...
-SavedModelBundle bundle;
-...
-LoadSavedModel(session_options, run_options, export_dir, {kSavedModelTagTrain},
-               &bundle);
-```
-
-### Load and serve a SavedModel in TensorFlow serving
-
-You can easily load and serve a SavedModel with the TensorFlow Serving Model
-Server binary. See [instructions](https://www.tensorflow.org/serving/setup#installing_using_apt-get)
-on how to install the server, or build it if you wish.
-
-Once you have the Model Server, run it with:
-```
-tensorflow_model_server --port=port-numbers --model_name=your-model-name --model_base_path=your_model_base_path
-```
-Set the port and model_name flags to values of your choosing. The
-model_base_path flag expects to be to a base directory, with each version of
-your model residing in a numerically named subdirectory. If you only have a
-single version of your model, simply place it in a subdirectory like so:
-* Place the model in /tmp/model/0001
-* Set model_base_path to /tmp/model
-
-Store different versions of your model in numerically named subdirectories of a
-common base directory. For example, suppose the base directory is `/tmp/model`.
-If you have only one version of your model, store it in `/tmp/model/0001`. If
-you have two versions of your model, store the second version in
-`/tmp/model/0002`, and so on.  Set the `--model-base_path` flag to the base
-directory (`/tmp/model`, in this example).  TensorFlow Model Server will serve
-the model in the highest numbered subdirectory of that base directory.
-
-### Standard constants
-
-SavedModel offers the flexibility to build and load TensorFlow graphs for a
-variety of use-cases. For the most common use-cases, SavedModel's APIs
-provide a set of constants in Python and C++ that are easy to
-reuse and share across tools consistently.
-
-#### Standard MetaGraphDef tags
-
-You may use sets of tags to uniquely identify a `MetaGraphDef` saved in a
-SavedModel. A subset of commonly used tags is specified in:
-
-* [Python](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/tag_constants.py)
-* [C++](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/cc/saved_model/tag_constants.h)
-
-
-#### Standard SignatureDef constants
-
-A [**SignatureDef**](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/meta_graph.proto)
-is a protocol buffer that defines the signature of a computation
-supported by a graph.
-Commonly used input keys, output keys, and method names are
-defined in:
-
-* [Python](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/signature_constants.py)
-* [C++](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/cc/saved_model/signature_constants.h)
-
-## Using SavedModel with Estimators
-
-After training an `Estimator` model, you may want to create a service
-from that model that takes requests and returns a result.  You can run such a
-service locally on your machine or deploy it in the cloud.
-
-To prepare a trained Estimator for serving, you must export it in the standard
-SavedModel format. This section explains how to:
-
-* Specify the output nodes and the corresponding
-  [APIs](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/apis/prediction_service.proto)
-  that can be served (Classify, Regress, or Predict).
-* Export your model to the SavedModel format.
-* Serve the model from a local server and request predictions.
-
-
-### Prepare serving inputs
-
-During training, an @{$premade_estimators#input_fn$`input_fn()`} ingests data
-and prepares it for use by the model.  At serving time, similarly, a
-`serving_input_receiver_fn()` accepts inference requests and prepares them for
-the model.  This function has the following purposes:
-
-*  To add placeholders to the graph that the serving system will feed
-   with inference requests.
-*  To add any additional ops needed to convert data from the input format
-   into the feature `Tensor`s expected by the model.
-
-The function returns a @{tf.estimator.export.ServingInputReceiver} object,
-which packages the placeholders and the resulting feature `Tensor`s together.
-
-A typical pattern is that inference requests arrive in the form of serialized
-`tf.Example`s, so the `serving_input_receiver_fn()` creates a single string
-placeholder to receive them.  The `serving_input_receiver_fn()` is then also
-responsible for parsing the `tf.Example`s by adding a @{tf.parse_example} op to
-the graph.
-
-When writing such a `serving_input_receiver_fn()`, you must pass a parsing
-specification to @{tf.parse_example} to tell the parser what feature names to
-expect and how to map them to `Tensor`s. A parsing specification takes the
-form of a dict from feature names to @{tf.FixedLenFeature}, @{tf.VarLenFeature},
-and @{tf.SparseFeature}.  Note this parsing specification should not include
-any label or weight columns, since those will not be available at serving
-time&mdash;in contrast to a parsing specification used in the `input_fn()` at
-training time.
-
-In combination, then:
-
-```py
-feature_spec = {'foo': tf.FixedLenFeature(...),
-                'bar': tf.VarLenFeature(...)}
-
-def serving_input_receiver_fn():
-  """An input receiver that expects a serialized tf.Example."""
-  serialized_tf_example = tf.placeholder(dtype=tf.string,
-                                         shape=[default_batch_size],
-                                         name='input_example_tensor')
-  receiver_tensors = {'examples': serialized_tf_example}
-  features = tf.parse_example(serialized_tf_example, feature_spec)
-  return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)
-```
-
-The @{tf.estimator.export.build_parsing_serving_input_receiver_fn} utility
-function provides that input receiver for the common case.
-
-> Note: when training a model to be served using the Predict API with a local
-> server, the parsing step is not needed because the model will receive raw
-> feature data.
-
-Even if you require no parsing or other input processing&mdash;that is, if the
-serving system will feed feature `Tensor`s directly&mdash;you must still provide
-a `serving_input_receiver_fn()` that creates placeholders for the feature
-`Tensor`s and passes them through.  The
-@{tf.estimator.export.build_raw_serving_input_receiver_fn} utility provides for
-this.
-
-If these utilities do not meet your needs, you are free to write your own
-`serving_input_receiver_fn()`.  One case where this may be needed is if your
-training `input_fn()` incorporates some preprocessing logic that must be
-recapitulated at serving time.  To reduce the risk of training-serving skew, we
-recommend encapsulating such processing in a function which is then called
-from both `input_fn()` and `serving_input_receiver_fn()`.
-
-Note that the `serving_input_receiver_fn()` also determines the *input*
-portion of the signature.  That is, when writing a
-`serving_input_receiver_fn()`, you must tell the parser what signatures
-to expect and how to map them to your model's expected inputs.
-By contrast, the *output* portion of the signature is determined by the model.
-
-<a name="specify_outputs"></a>
-### Specify the outputs of a custom model
-
-When writing a custom `model_fn`, you must populate the `export_outputs` element
-of the @{tf.estimator.EstimatorSpec} return value. This is a dict of
-`{name: output}` describing the output signatures to be exported and used during
-serving.
-
-In the usual case of making a single prediction, this dict contains
-one element, and the `name` is immaterial.  In a multi-headed model, each head
-is represented by an entry in this dict.  In this case the `name` is a string
-of your choice that can be used to request a specific head at serving time.
-
-Each `output` value must be an `ExportOutput` object  such as
-@{tf.estimator.export.ClassificationOutput},
-@{tf.estimator.export.RegressionOutput}, or
-@{tf.estimator.export.PredictOutput}.
-
-These output types map straightforwardly to the
-[TensorFlow Serving APIs](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/apis/prediction_service.proto),
-and so determine which request types will be honored.
-
-Note: In the multi-headed case, a `SignatureDef` will be generated for each
-element of the `export_outputs` dict returned from the model_fn, named using
-the same keys.  These `SignatureDef`s differ only in their outputs, as
-provided by the corresponding `ExportOutput` entry.  The inputs are always
-those provided by the `serving_input_receiver_fn`.
-An inference request may specify the head by name.  One head must be named
-using [`signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY`](https://www.tensorflow.org/code/tensorflow/python/saved_model/signature_constants.py)
-indicating which `SignatureDef` will be served when an inference request
-does not specify one.
-
-<a name="perform_export"></a>
-### Perform the export
-
-To export your trained Estimator, call
-@{tf.estimator.Estimator.export_savedmodel} with the export base path and
-the `serving_input_receiver_fn`.
-
-```py
-estimator.export_savedmodel(export_dir_base, serving_input_receiver_fn,
-                            strip_default_attrs=True)
-```
-
-This method builds a new graph by first calling the
-`serving_input_receiver_fn()` to obtain feature `Tensor`s, and then calling
-this `Estimator`'s `model_fn()` to generate the model graph based on those
-features. It starts a fresh `Session`, and, by default, restores the most recent
-checkpoint into it.  (A different checkpoint may be passed, if needed.)
-Finally it creates a time-stamped export directory below the given
-`export_dir_base` (i.e., `export_dir_base/<timestamp>`), and writes a
-SavedModel into it containing a single `MetaGraphDef` saved from this
-Session.
-
-> Note: It is your responsibility to garbage-collect old exports.
-> Otherwise, successive exports will accumulate under `export_dir_base`.
-
-### Serve the exported model locally
-
-For local deployment, you can serve your model using
-[TensorFlow Serving](https://github.com/tensorflow/serving), an open-source project that loads a
-SavedModel and exposes it as a [gRPC](https://www.grpc.io/) service.
-
-First, [install TensorFlow Serving](https://github.com/tensorflow/serving).
-
-Then build and run the local model server, substituting `$export_dir_base` with
-the path to the SavedModel you exported above:
-
-```sh
-bazel build //tensorflow_serving/model_servers:tensorflow_model_server
-bazel-bin/tensorflow_serving/model_servers/tensorflow_model_server --port=9000 --model_base_path=$export_dir_base
-```
-
-Now you have a server listening for inference requests via gRPC on port 9000!
-
-
-### Request predictions from a local server
-
-The server responds to gRPC requests according to the
-[PredictionService](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/apis/prediction_service.proto#L15)
-gRPC API service definition.  (The nested protocol buffers are defined in
-various [neighboring files](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/apis)).
-
-From the API service definition, the gRPC framework generates client libraries
-in various languages providing remote access to the API.  In a project using the
-Bazel build tool, these libraries are built automatically and provided via
-dependencies like these (using Python for example):
-
-```build
-  deps = [
-    "//tensorflow_serving/apis:classification_proto_py_pb2",
-    "//tensorflow_serving/apis:regression_proto_py_pb2",
-    "//tensorflow_serving/apis:predict_proto_py_pb2",
-    "//tensorflow_serving/apis:prediction_service_proto_py_pb2"
-  ]
-```
-
-Python client code can then import the libraries thus:
-
-```py
-from tensorflow_serving.apis import classification_pb2
-from tensorflow_serving.apis import regression_pb2
-from tensorflow_serving.apis import predict_pb2
-from tensorflow_serving.apis import prediction_service_pb2
-```
-
-> Note: `prediction_service_pb2` defines the service as a whole and so
-> is always required.  However a typical client will need only one of
-> `classification_pb2`, `regression_pb2`, and `predict_pb2`, depending on the
-> type of requests being made.
-
-Sending a gRPC request is then accomplished by assembling a protocol buffer
-containing the request data and passing it to the service stub.  Note how the
-request protocol buffer is created empty and then populated via the
-[generated protocol buffer API](https://developers.google.com/protocol-buffers/docs/reference/python-generated).
-
-```py
-from grpc.beta import implementations
-
-channel = implementations.insecure_channel(host, int(port))
-stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
-
-request = classification_pb2.ClassificationRequest()
-example = request.input.example_list.examples.add()
-example.features.feature['x'].float_list.value.extend(image[0].astype(float))
-
-result = stub.Classify(request, 10.0)  # 10 secs timeout
-```
-
-The returned result in this example is a `ClassificationResponse` protocol
-buffer.
-
-This is a skeletal example; please see the @{$deploy$Tensorflow Serving}
-documentation and [examples](https://github.com/tensorflow/serving/tree/master/tensorflow_serving/example)
-for more details.
-
-> Note: `ClassificationRequest` and `RegressionRequest` contain a
-> `tensorflow.serving.Input` protocol buffer, which in turn contains a list of
-> `tensorflow.Example` protocol buffers.  `PredictRequest`, by contrast,
-> contains a mapping from feature names to values encoded via `TensorProto`.
-> Correspondingly: When using the `Classify` and `Regress` APIs, TensorFlow
-> Serving feeds serialized `tf.Example`s to the graph, so your
-> `serving_input_receiver_fn()` should include a `tf.parse_example()` Op.
-> When using the generic `Predict` API, however, TensorFlow Serving feeds raw
-> feature data to the graph, so a pass through `serving_input_receiver_fn()`
-> should be used.
-
-
-<!-- TODO(soergel): give examples of making requests against this server, using
-the different Tensorflow Serving APIs, selecting the signature by key, etc. -->
-
-<!-- TODO(soergel): document ExportStrategy here once Experiment moves
-from contrib to core. -->
-
-
-
-
-## CLI to inspect and execute SavedModel
-
-You can use the SavedModel Command Line Interface (CLI) to inspect and
-execute a SavedModel.
-For example, you can use the CLI to inspect the model's `SignatureDef`s.
-The CLI enables you to quickly confirm that the input
-@{$tensors$Tensor dtype and shape} match the model. Moreover, if you
-want to test your model, you can use the CLI to do a sanity check by
-passing in sample inputs in various formats (for example, Python
-expressions) and then fetching the output.
-
-
-### Install the SavedModel CLI
-
-Broadly speaking, you can install TensorFlow in either of the following
-two ways:
-
-*  By installing a pre-built TensorFlow binary.
-*  By building TensorFlow from source code.
-
-If you installed TensorFlow through a pre-built TensorFlow binary,
-then the SavedModel CLI is already installed on your system
-at pathname `bin\saved_model_cli`.
-
-If you built TensorFlow from source code, you must run the following
-additional command to build `saved_model_cli`:
-
-```
-$ bazel build tensorflow/python/tools:saved_model_cli
-```
-
-### Overview of commands
-
-The SavedModel CLI supports the following two commands on a
-`MetaGraphDef` in a SavedModel:
-
-* `show`, which shows a computation on a `MetaGraphDef` in a SavedModel.
-* `run`, which runs a computation on a `MetaGraphDef`.
-
-
-### `show` command
-
-A SavedModel contains one or more `MetaGraphDef`s, identified by their tag-sets.
-To serve a model, you
-might wonder what kind of `SignatureDef`s are in each model, and what are their
-inputs and outputs.  The `show` command let you examine the contents of the
-SavedModel in hierarchical order.  Here's the syntax:
-
-```
-usage: saved_model_cli show [-h] --dir DIR [--all]
-[--tag_set TAG_SET] [--signature_def SIGNATURE_DEF_KEY]
-```
-
-For example, the following command shows all available
-MetaGraphDef tag-sets in the SavedModel:
-
-```
-$ saved_model_cli show --dir /tmp/saved_model_dir
-The given SavedModel contains the following tag-sets:
-serve
-serve, gpu
-```
-
-The following command shows all available `SignatureDef` keys in
-a `MetaGraphDef`:
-
-```
-$ saved_model_cli show --dir /tmp/saved_model_dir --tag_set serve
-The given SavedModel `MetaGraphDef` contains `SignatureDefs` with the
-following keys:
-SignatureDef key: "classify_x2_to_y3"
-SignatureDef key: "classify_x_to_y"
-SignatureDef key: "regress_x2_to_y3"
-SignatureDef key: "regress_x_to_y"
-SignatureDef key: "regress_x_to_y2"
-SignatureDef key: "serving_default"
-```
-
-If a `MetaGraphDef` has *multiple* tags in the tag-set, you must specify
-all tags, each tag separated by a comma. For example:
-
-```none
-$ saved_model_cli show --dir /tmp/saved_model_dir --tag_set serve,gpu
-```
-
-To show all inputs and outputs TensorInfo for a specific `SignatureDef`, pass in
-the `SignatureDef` key to `signature_def` option. This is very useful when you
-want to know the tensor key value, dtype and shape of the input tensors for
-executing the computation graph later. For example:
-
-```
-$ saved_model_cli show --dir \
-/tmp/saved_model_dir --tag_set serve --signature_def serving_default
-The given SavedModel SignatureDef contains the following input(s):
-  inputs['x'] tensor_info:
-      dtype: DT_FLOAT
-      shape: (-1, 1)
-      name: x:0
-The given SavedModel SignatureDef contains the following output(s):
-  outputs['y'] tensor_info:
-      dtype: DT_FLOAT
-      shape: (-1, 1)
-      name: y:0
-Method name is: tensorflow/serving/predict
-```
-
-To show all available information in the SavedModel, use the `--all` option.
-For example:
-
-```none
-$ saved_model_cli show --dir /tmp/saved_model_dir --all
-MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:
-
-signature_def['classify_x2_to_y3']:
-  The given SavedModel SignatureDef contains the following input(s):
-    inputs['inputs'] tensor_info:
-        dtype: DT_FLOAT
-        shape: (-1, 1)
-        name: x2:0
-  The given SavedModel SignatureDef contains the following output(s):
-    outputs['scores'] tensor_info:
-        dtype: DT_FLOAT
-        shape: (-1, 1)
-        name: y3:0
-  Method name is: tensorflow/serving/classify
-
-...
-
-signature_def['serving_default']:
-  The given SavedModel SignatureDef contains the following input(s):
-    inputs['x'] tensor_info:
-        dtype: DT_FLOAT
-        shape: (-1, 1)
-        name: x:0
-  The given SavedModel SignatureDef contains the following output(s):
-    outputs['y'] tensor_info:
-        dtype: DT_FLOAT
-        shape: (-1, 1)
-        name: y:0
-  Method name is: tensorflow/serving/predict
-```
-
-
-### `run` command
-
-Invoke the `run` command to run a graph computation, passing
-inputs and then displaying (and optionally saving) the outputs.
-Here's the syntax:
-
-```
-usage: saved_model_cli run [-h] --dir DIR --tag_set TAG_SET --signature_def
-                           SIGNATURE_DEF_KEY [--inputs INPUTS]
-                           [--input_exprs INPUT_EXPRS] [--outdir OUTDIR]
-                           [--overwrite] [--tf_debug]
-```
-
-The `run` command provides the following two ways to pass inputs to the model:
-
-* `--inputs` option enables you to pass numpy ndarray in files.
-* `--input_exprs` option enables you to pass Python expressions.
-* `--input_examples` option enables you to pass `tf.train.Example`.
-
-
-#### `--inputs`
-
-To pass input data in files, specify the `--inputs` option, which takes the
-following general format:
-
-```bsh
---inputs <INPUTS>
-```
-
-where *INPUTS* is either of the following formats:
-
-*  `<input_key>=<filename>`
-*  `<input_key>=<filename>[<variable_name>]`
-
-You may pass multiple *INPUTS*. If you do pass multiple inputs, use a semicolon
-to separate each of the *INPUTS*.
-
-`saved_model_cli` uses `numpy.load` to load the *filename*.
-The *filename* may be in any of the following formats:
-
-*  `.npy`
-*  `.npz`
-*  pickle format
-
-A `.npy` file always contains a numpy ndarray. Therefore, when loading from
-a `.npy` file, the content will be directly assigned to the specified input
-tensor. If you specify a *variable_name* with that `.npy` file, the
-*variable_name* will be ignored and a warning will be issued.
-
-When loading from a `.npz` (zip) file, you may optionally specify a
-*variable_name* to identify the variable within the zip file to load for
-the input tensor key.  If you don't specify a *variable_name*, the SavedModel
-CLI will check that only one file is included in the zip file and load it
-for the specified input tensor key.
-
-When loading from a pickle file, if no `variable_name` is specified in the
-square brackets, whatever that is inside the pickle file will be passed to the
-specified input tensor key. Otherwise, the SavedModel CLI will assume a
-dictionary is stored in the pickle file and the value corresponding to
-the *variable_name* will be used.
-
-
-#### `--inputs_exprs`
-
-To pass inputs through Python expressions, specify the `--input_exprs` option.
-This can be useful for when you don't have data
-files lying around, but still want to sanity check the model with some simple
-inputs that match the dtype and shape of the model's `SignatureDef`s.
-For example:
-
-```bsh
-`<input_key>=[[1],[2],[3]]`
-```
-
-In addition to Python expressions, you may also pass numpy functions. For
-example:
-
-```bsh
-`<input_key>=np.ones((32,32,3))`
-```
-
-(Note that the `numpy` module is already available to you as `np`.)
-
-
-#### `--inputs_examples`
-
-To pass `tf.train.Example` as inputs, specify the `--input_examples` option.
-For each input key, it takes a list of dictionary, where each dictionary is an
-instance of `tf.train.Example`. The dictionary keys are the features and the
-values are the value lists for each feature.
-For example:
-
-```bsh
-`<input_key>=[{"age":[22,24],"education":["BS","MS"]}]`
-```
-
-#### Save output
-
-By default, the SavedModel CLI writes output to stdout. If a directory is
-passed to `--outdir` option, the outputs will be saved as npy files named after
-output tensor keys under the given directory.
-
-Use `--overwrite` to overwrite existing output files.
-
-
-#### TensorFlow debugger (tfdbg) integration
-
-If `--tf_debug` option is set, the SavedModel CLI will use the
-TensorFlow Debugger (tfdbg) to watch the intermediate Tensors and runtime
-graphs or subgraphs while running the SavedModel.
-
-
-#### Full examples of `run`
-
-Given:
-
-*  Your model simply adds `x1` and `x2` to get output `y`.
-*  All tensors in the model have shape `(-1, 1)`.
-*  You have two `npy` files:
-   *  `/tmp/my_data1.npy`, which contains a numpy ndarray `[[1], [2], [3]]`.
-   *  `/tmp/my_data2.npy`, which contains another numpy
-      ndarray `[[0.5], [0.5], [0.5]]`.
-
-To run these two `npy` files through the model to get output `y`, issue
-the following command:
-
-```
-$ saved_model_cli run --dir /tmp/saved_model_dir --tag_set serve \
---signature_def x1_x2_to_y --inputs x1=/tmp/my_data1.npy;x2=/tmp/my_data2.npy \
---outdir /tmp/out
-Result for output key y:
-[[ 1.5]
- [ 2.5]
- [ 3.5]]
-```
-
-Let's change the preceding example slightly. This time, instead of two
-`.npy` files, you now have an `.npz` file and a pickle file. Furthermore,
-you want to overwrite any existing output file.  Here's the command:
-
-```
-$ saved_model_cli run --dir /tmp/saved_model_dir --tag_set serve \
---signature_def x1_x2_to_y \
---inputs x1=/tmp/my_data1.npz[x];x2=/tmp/my_data2.pkl --outdir /tmp/out \
---overwrite
-Result for output key y:
-[[ 1.5]
- [ 2.5]
- [ 3.5]]
-```
-
-You may specify python expression instead of an input file. For example,
-the following command replaces input `x2` with a Python expression:
-
-```
-$ saved_model_cli run --dir /tmp/saved_model_dir --tag_set serve \
---signature_def x1_x2_to_y --inputs x1=/tmp/my_data1.npz[x] \
---input_exprs 'x2=np.ones((3,1))'
-Result for output key y:
-[[ 2]
- [ 3]
- [ 4]]
-```
-
-To run the model with the TensorFlow Debugger on, issue the
-following command:
-
-```
-$ saved_model_cli run --dir /tmp/saved_model_dir --tag_set serve \
---signature_def serving_default --inputs x=/tmp/data.npz[x] --tf_debug
-```
-
-
-<a name="structure"></a>
-## Structure of a SavedModel directory
-
-When you save a model in SavedModel format, TensorFlow creates
-a SavedModel directory consisting of the following subdirectories
-and files:
-
-```bsh
-assets/
-assets.extra/
-variables/
-    variables.data-?????-of-?????
-    variables.index
-saved_model.pb|saved_model.pbtxt
-```
-
-where:
-
-* `assets` is a subfolder containing auxiliary (external) files,
-  such as vocabularies.  Assets are copied to the SavedModel location
-  and can be read when loading a specific `MetaGraphDef`.
-* `assets.extra` is a subfolder where higher-level libraries and users can
-  add their own assets that co-exist with the model, but are not loaded by
-  the graph.  This subfolder is not managed by the SavedModel libraries.
-* `variables` is a subfolder that includes output from
-  `tf.train.Saver`.
-* `saved_model.pb` or `saved_model.pbtxt` is the SavedModel protocol buffer.
-  It includes the graph definitions as `MetaGraphDef` protocol buffers.
-
-A single SavedModel can represent multiple graphs.  In this case, all the
-graphs in the SavedModel share a *single* set of checkpoints (variables)
-and assets. For example, the following diagram shows one SavedModel
-containing three `MetaGraphDef`s, all three of which share the same set
-of checkpoints and assets:
-
-![SavedModel represents checkpoints, assets, and one or more MetaGraphDefs](../images/SavedModel.svg)
-
-Each graph is associated with a specific set of tags, which enables
-identification during a load or restore operation.
diff --git a/tensorflow/docs_src/programmers_guide/summaries_and_tensorboard.md b/tensorflow/docs_src/programmers_guide/summaries_and_tensorboard.md
deleted file mode 100644
index fadfa03e78..0000000000
--- a/tensorflow/docs_src/programmers_guide/summaries_and_tensorboard.md
+++ /dev/null
@@ -1,225 +0,0 @@
-# TensorBoard: Visualizing Learning
-
-The computations you'll use TensorFlow for - like training a massive
-deep neural network - can be complex and confusing. To make it easier to
-understand, debug, and optimize TensorFlow programs, we've included a suite of
-visualization tools called TensorBoard. You can use TensorBoard to visualize
-your TensorFlow graph, plot quantitative metrics about the execution of your
-graph, and show additional data like images that pass through it. When
-TensorBoard is fully configured, it looks like this:
-
-![MNIST TensorBoard](https://www.tensorflow.org/images/mnist_tensorboard.png "MNIST TensorBoard")
-
-<div class="video-wrapper">
-  <iframe class="devsite-embedded-youtube-video" data-video-id="eBbEDRsCmv4"
-          data-autohide="1" data-showinfo="0" frameborder="0" allowfullscreen>
-  </iframe>
-</div>
-
-This 30-minute tutorial is intended to get you started with simple TensorBoard
-usage. It assumes a basic understanding of TensorFlow.
-
-There are other resources available as well! The [TensorBoard GitHub](https://github.com/tensorflow/tensorboard)
-has a lot more information on using individual dashboards within TensorBoard
-including tips & tricks and debugging information.
-
-## Setup
-
-[Install TensorFlow](https://www.tensorflow.org/install/). Installing TensorFlow
-via pip should also automatically install TensorBoard.
-
-## Serializing the data
-
-TensorBoard operates by reading TensorFlow events files, which contain summary
-data that you can generate when running TensorFlow. Here's the general
-lifecycle for summary data within TensorBoard.
-
-First, create the TensorFlow graph that you'd like to collect summary
-data from, and decide which nodes you would like to annotate with
-@{$python/summary$summary operations}.
-
-For example, suppose you are training a convolutional neural network for
-recognizing MNIST digits. You'd like to record how the learning rate
-varies over time, and how the objective function is changing. Collect these by
-attaching @{tf.summary.scalar} ops
-to the nodes that output the learning rate and loss respectively. Then, give
-each `scalar_summary` a meaningful `tag`, like `'learning rate'` or `'loss
-function'`.
-
-Perhaps you'd also like to visualize the distributions of activations coming
-off a particular layer, or the distribution of gradients or weights. Collect
-this data by attaching
-@{tf.summary.histogram} ops to
-the gradient outputs and to the variable that holds your weights, respectively.
-
-For details on all of the summary operations available, check out the docs on
-@{$python/summary$summary operations}.
-
-Operations in TensorFlow don't do anything until you run them, or an op that
-depends on their output. And the summary nodes that we've just created are
-peripheral to your graph: none of the ops you are currently running depend on
-them. So, to generate summaries, we need to run all of these summary nodes.
-Managing them by hand would be tedious, so use
-@{tf.summary.merge_all}
-to combine them into a single op that generates all the summary data.
-
-Then, you can just run the merged summary op, which will generate a serialized
-`Summary` protobuf object with all of your summary data at a given step.
-Finally, to write this summary data to disk, pass the summary protobuf to a
-@{tf.summary.FileWriter}.
-
-The `FileWriter` takes a logdir in its constructor - this logdir is quite
-important, it's the directory where all of the events will be written out.
-Also, the `FileWriter` can optionally take a `Graph` in its constructor.
-If it receives a `Graph` object, then TensorBoard will visualize your graph
-along with tensor shape information. This will give you a much better sense of
-what flows through the graph: see
-@{$graph_viz#tensor-shape-information$Tensor shape information}.
-
-Now that you've modified your graph and have a `FileWriter`, you're ready to
-start running your network! If you want, you could run the merged summary op
-every single step, and record a ton of training data. That's likely to be more
-data than you need, though. Instead, consider running the merged summary op
-every `n` steps.
-
-The code example below is a modification of the
-[simple MNIST tutorial](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/tutorials/mnist/mnist.py),
-in which we have added some summary ops, and run them every ten steps. If you
-run this and then launch `tensorboard --logdir=/tmp/tensorflow/mnist`, you'll be able
-to visualize statistics, such as how the weights or accuracy varied during
-training. The code below is an excerpt; full source is
-[here](https://www.tensorflow.org/code/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py).
-
-```python
-def variable_summaries(var):
-  """Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
-  with tf.name_scope('summaries'):
-    mean = tf.reduce_mean(var)
-    tf.summary.scalar('mean', mean)
-    with tf.name_scope('stddev'):
-      stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
-    tf.summary.scalar('stddev', stddev)
-    tf.summary.scalar('max', tf.reduce_max(var))
-    tf.summary.scalar('min', tf.reduce_min(var))
-    tf.summary.histogram('histogram', var)
-
-def nn_layer(input_tensor, input_dim, output_dim, layer_name, act=tf.nn.relu):
-  """Reusable code for making a simple neural net layer.
-
-  It does a matrix multiply, bias add, and then uses relu to nonlinearize.
-  It also sets up name scoping so that the resultant graph is easy to read,
-  and adds a number of summary ops.
-  """
-  # Adding a name scope ensures logical grouping of the layers in the graph.
-  with tf.name_scope(layer_name):
-    # This Variable will hold the state of the weights for the layer
-    with tf.name_scope('weights'):
-      weights = weight_variable([input_dim, output_dim])
-      variable_summaries(weights)
-    with tf.name_scope('biases'):
-      biases = bias_variable([output_dim])
-      variable_summaries(biases)
-    with tf.name_scope('Wx_plus_b'):
-      preactivate = tf.matmul(input_tensor, weights) + biases
-      tf.summary.histogram('pre_activations', preactivate)
-    activations = act(preactivate, name='activation')
-    tf.summary.histogram('activations', activations)
-    return activations
-
-hidden1 = nn_layer(x, 784, 500, 'layer1')
-
-with tf.name_scope('dropout'):
-  keep_prob = tf.placeholder(tf.float32)
-  tf.summary.scalar('dropout_keep_probability', keep_prob)
-  dropped = tf.nn.dropout(hidden1, keep_prob)
-
-# Do not apply softmax activation yet, see below.
-y = nn_layer(dropped, 500, 10, 'layer2', act=tf.identity)
-
-with tf.name_scope('cross_entropy'):
-  # The raw formulation of cross-entropy,
-  #
-  # tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.softmax(y)),
-  #                               reduction_indices=[1]))
-  #
-  # can be numerically unstable.
-  #
-  # So here we use tf.losses.sparse_softmax_cross_entropy on the
-  # raw logit outputs of the nn_layer above.
-  with tf.name_scope('total'):
-    cross_entropy = tf.losses.sparse_softmax_cross_entropy(labels=y_, logits=y)
-tf.summary.scalar('cross_entropy', cross_entropy)
-
-with tf.name_scope('train'):
-  train_step = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(
-      cross_entropy)
-
-with tf.name_scope('accuracy'):
-  with tf.name_scope('correct_prediction'):
-    correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
-  with tf.name_scope('accuracy'):
-    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
-tf.summary.scalar('accuracy', accuracy)
-
-# Merge all the summaries and write them out to /tmp/mnist_logs (by default)
-merged = tf.summary.merge_all()
-train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train',
-                                      sess.graph)
-test_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/test')
-tf.global_variables_initializer().run()
-```
-
-After we've initialized the `FileWriters`, we have to add summaries to the
-`FileWriters` as we train and test the model.
-
-```python
-# Train the model, and also write summaries.
-# Every 10th step, measure test-set accuracy, and write test summaries
-# All other steps, run train_step on training data, & add training summaries
-
-def feed_dict(train):
-  """Make a TensorFlow feed_dict: maps data onto Tensor placeholders."""
-  if train or FLAGS.fake_data:
-    xs, ys = mnist.train.next_batch(100, fake_data=FLAGS.fake_data)
-    k = FLAGS.dropout
-  else:
-    xs, ys = mnist.test.images, mnist.test.labels
-    k = 1.0
-  return {x: xs, y_: ys, keep_prob: k}
-
-for i in range(FLAGS.max_steps):
-  if i % 10 == 0:  # Record summaries and test-set accuracy
-    summary, acc = sess.run([merged, accuracy], feed_dict=feed_dict(False))
-    test_writer.add_summary(summary, i)
-    print('Accuracy at step %s: %s' % (i, acc))
-  else:  # Record train set summaries, and train
-    summary, _ = sess.run([merged, train_step], feed_dict=feed_dict(True))
-    train_writer.add_summary(summary, i)
-```
-
-You're now all set to visualize this data using TensorBoard.
-
-
-## Launching TensorBoard
-
-To run TensorBoard, use the following command (alternatively `python -m
-tensorboard.main`)
-
-```bash
-tensorboard --logdir=path/to/log-directory
-```
-
-where `logdir` points to the directory where the `FileWriter` serialized its
-data.  If this `logdir` directory contains subdirectories which contain
-serialized data from separate runs, then TensorBoard will visualize the data
-from all of those runs. Once TensorBoard is running, navigate your web browser
-to `localhost:6006` to view the TensorBoard.
-
-When looking at TensorBoard, you will see the navigation tabs in the top right
-corner. Each tab represents a set of serialized data that can be visualized.
-
-For in depth information on how to use the *graph* tab to visualize your graph,
-see @{$graph_viz$TensorBoard: Graph Visualization}.
-
-For more usage information on TensorBoard in general, see the
-[TensorBoard GitHub](https://github.com/tensorflow/tensorboard).
diff --git a/tensorflow/docs_src/programmers_guide/tensorboard_histograms.md b/tensorflow/docs_src/programmers_guide/tensorboard_histograms.md
deleted file mode 100644
index 918deda190..0000000000
--- a/tensorflow/docs_src/programmers_guide/tensorboard_histograms.md
+++ /dev/null
@@ -1,245 +0,0 @@
-# TensorBoard Histogram Dashboard
-
-The TensorBoard Histogram Dashboard displays how the distribution of some
-`Tensor` in your TensorFlow graph has changed over time. It does this by showing
-many histograms visualizations of your tensor at different points in time.
-
-## A Basic Example
-
-Let's start with a simple case: a normally-distributed variable, where the mean
-shifts over time.
-TensorFlow has an op
-[`tf.random_normal`](https://www.tensorflow.org/api_docs/python/tf/random_normal)
-which is perfect for this purpose. As is usually the case with TensorBoard, we
-will ingest data using a summary op; in this case,
-['tf.summary.histogram'](https://www.tensorflow.org/api_docs/python/tf/summary/histogram).
-For a primer on how summaries work, please see the general
-[TensorBoard tutorial](https://www.tensorflow.org/get_started/summaries_and_tensorboard).
-
-Here is a code snippet that will generate some histogram summaries containing
-normally distributed data, where the mean of the distribution increases over
-time.
-
-```python
-import tensorflow as tf
-
-k = tf.placeholder(tf.float32)
-
-# Make a normal distribution, with a shifting mean
-mean_moving_normal = tf.random_normal(shape=[1000], mean=(5*k), stddev=1)
-# Record that distribution into a histogram summary
-tf.summary.histogram("normal/moving_mean", mean_moving_normal)
-
-# Setup a session and summary writer
-sess = tf.Session()
-writer = tf.summary.FileWriter("/tmp/histogram_example")
-
-summaries = tf.summary.merge_all()
-
-# Setup a loop and write the summaries to disk
-N = 400
-for step in range(N):
-  k_val = step/float(N)
-  summ = sess.run(summaries, feed_dict={k: k_val})
-  writer.add_summary(summ, global_step=step)
-```
-
-Once that code runs, we can load the data into TensorBoard via the command line:
-
-
-```sh
-tensorboard --logdir=/tmp/histogram_example
-```
-
-Once TensorBoard is running, load it in Chrome or Firefox and navigate to the
-Histogram Dashboard. Then we can see a histogram visualization for our normally
-distributed data.
-
-![](https://www.tensorflow.org/images/tensorboard/histogram_dashboard/1_moving_mean.png)
-
-`tf.summary.histogram` takes an arbitrarily sized and shaped Tensor, and
-compresses it into a histogram data structure consisting of many bins with
-widths and counts. For example, let's say we want to organize the numbers
-`[0.5, 1.1, 1.3, 2.2, 2.9, 2.99]` into bins. We could make three bins:
-* a bin
-containing everything from 0 to 1 (it would contain one element, 0.5),
-* a bin
-containing everything from 1-2 (it would contain two elements, 1.1 and 1.3),
-* a bin containing everything from 2-3 (it would contain three elements: 2.2,
-2.9 and 2.99).
-
-TensorFlow uses a similar approach to create bins, but unlike in our example, it
-doesn't create integer bins. For large, sparse datasets, that might result in
-many thousands of bins.
-Instead, [the bins are exponentially distributed, with many bins close to 0 and
-comparatively few bins for very large numbers.](https://github.com/tensorflow/tensorflow/blob/c8b59c046895fa5b6d79f73e0b5817330fcfbfc1/tensorflow/core/lib/histogram/histogram.cc#L28)
-However, visualizing exponentially-distributed bins is tricky; if height is used
-to encode count, then wider bins take more space, even if they have the same
-number of elements. Conversely, encoding count in the area makes height
-comparisons impossible. Instead, the histograms [resample the data](https://github.com/tensorflow/tensorflow/blob/17c47804b86e340203d451125a721310033710f1/tensorflow/tensorboard/components/tf_backend/backend.ts#L400)
-into uniform bins. This can lead to unfortunate artifacts in some cases.
-
-Each slice in the histogram visualizer displays a single histogram.
-The slices are organized by step;
-older slices (e.g. step 0) are further "back" and darker, while newer slices
-(e.g. step 400) are close to the foreground, and lighter in color.
-The y-axis on the right shows the step number.
-
-You can mouse over the histogram to see tooltips with some more detailed
-information. For example, in the following image we can see that the histogram
-at timestep 176 has a bin centered at 2.25 with 177 elements in that bin.
-
-![](https://www.tensorflow.org/images/tensorboard/histogram_dashboard/2_moving_mean_tooltip.png)
-
-Also, you may note that the histogram slices are not always evenly spaced in
-step count or time. This is because TensorBoard uses
-[reservoir sampling](https://en.wikipedia.org/wiki/Reservoir_sampling) to keep a
-subset of all the histograms, to save on memory. Reservoir sampling guarantees
-that every sample has an equal likelihood of being included, but because it is
-a randomized algorithm, the samples chosen don't occur at even steps.
-
-## Overlay Mode
-
-There is a control on the left of the dashboard that allows you to toggle the
-histogram mode from "offset" to "overlay":
-
-![](https://www.tensorflow.org/images/tensorboard/histogram_dashboard/3_overlay_offset.png)
-
-In "offset" mode, the visualization rotates 45 degrees, so that the individual
-histogram slices are no longer spread out in time, but instead are all plotted
-on the same y-axis.
-
-![](https://www.tensorflow.org/images/tensorboard/histogram_dashboard/4_overlay.png)
-Now, each slice is a separate line on the chart, and the y-axis shows the item
-count within each bucket. Darker lines are older, earlier steps, and lighter
-lines are more recent, later steps. Once again, you can mouse over the chart to
-see some additional information.
-
-![](https://www.tensorflow.org/images/tensorboard/histogram_dashboard/5_overlay_tooltips.png)
-
-In general, the overlay visualization is useful if you want to directly compare
-the counts of different histograms.
-
-## Multimodal Distributions
-
-The Histogram Dashboard is great for visualizing multimodal
-distributions. Let's construct a simple bimodal distribution by concatenating
-the outputs from two different normal distributions. The code will look like
-this:
-
-```python
-import tensorflow as tf
-
-k = tf.placeholder(tf.float32)
-
-# Make a normal distribution, with a shifting mean
-mean_moving_normal = tf.random_normal(shape=[1000], mean=(5*k), stddev=1)
-# Record that distribution into a histogram summary
-tf.summary.histogram("normal/moving_mean", mean_moving_normal)
-
-# Make a normal distribution with shrinking variance
-variance_shrinking_normal = tf.random_normal(shape=[1000], mean=0, stddev=1-(k))
-# Record that distribution too
-tf.summary.histogram("normal/shrinking_variance", variance_shrinking_normal)
-
-# Let's combine both of those distributions into one dataset
-normal_combined = tf.concat([mean_moving_normal, variance_shrinking_normal], 0)
-# We add another histogram summary to record the combined distribution
-tf.summary.histogram("normal/bimodal", normal_combined)
-
-summaries = tf.summary.merge_all()
-
-# Setup a session and summary writer
-sess = tf.Session()
-writer = tf.summary.FileWriter("/tmp/histogram_example")
-
-# Setup a loop and write the summaries to disk
-N = 400
-for step in range(N):
-  k_val = step/float(N)
-  summ = sess.run(summaries, feed_dict={k: k_val})
-  writer.add_summary(summ, global_step=step)
-```
-
-You already remember our "moving mean" normal distribution from the example
-above. Now we also have a "shrinking variance" distribution. Side-by-side, they
-look like this:
-![](https://www.tensorflow.org/images/tensorboard/histogram_dashboard/6_two_distributions.png)
-
-When we concatenate them, we get a chart that clearly reveals the divergent,
-bimodal structure:
-![](https://www.tensorflow.org/images/tensorboard/histogram_dashboard/7_bimodal.png)
-
-## Some more distributions
-
-Just for fun, let's generate and visualize a few more distributions, and then
-combine them all into one chart. Here's the code we'll use:
-
-```python
-import tensorflow as tf
-
-k = tf.placeholder(tf.float32)
-
-# Make a normal distribution, with a shifting mean
-mean_moving_normal = tf.random_normal(shape=[1000], mean=(5*k), stddev=1)
-# Record that distribution into a histogram summary
-tf.summary.histogram("normal/moving_mean", mean_moving_normal)
-
-# Make a normal distribution with shrinking variance
-variance_shrinking_normal = tf.random_normal(shape=[1000], mean=0, stddev=1-(k))
-# Record that distribution too
-tf.summary.histogram("normal/shrinking_variance", variance_shrinking_normal)
-
-# Let's combine both of those distributions into one dataset
-normal_combined = tf.concat([mean_moving_normal, variance_shrinking_normal], 0)
-# We add another histogram summary to record the combined distribution
-tf.summary.histogram("normal/bimodal", normal_combined)
-
-# Add a gamma distribution
-gamma = tf.random_gamma(shape=[1000], alpha=k)
-tf.summary.histogram("gamma", gamma)
-
-# And a poisson distribution
-poisson = tf.random_poisson(shape=[1000], lam=k)
-tf.summary.histogram("poisson", poisson)
-
-# And a uniform distribution
-uniform = tf.random_uniform(shape=[1000], maxval=k*10)
-tf.summary.histogram("uniform", uniform)
-
-# Finally, combine everything together!
-all_distributions = [mean_moving_normal, variance_shrinking_normal,
-                     gamma, poisson, uniform]
-all_combined = tf.concat(all_distributions, 0)
-tf.summary.histogram("all_combined", all_combined)
-
-summaries = tf.summary.merge_all()
-
-# Setup a session and summary writer
-sess = tf.Session()
-writer = tf.summary.FileWriter("/tmp/histogram_example")
-
-# Setup a loop and write the summaries to disk
-N = 400
-for step in range(N):
-  k_val = step/float(N)
-  summ = sess.run(summaries, feed_dict={k: k_val})
-  writer.add_summary(summ, global_step=step)
-```
-### Gamma Distribution
-![](https://www.tensorflow.org/images/tensorboard/histogram_dashboard/8_gamma.png)
-
-### Uniform Distribution
-![](https://www.tensorflow.org/images/tensorboard/histogram_dashboard/9_uniform.png)
-
-### Poisson Distribution
-![](https://www.tensorflow.org/images/tensorboard/histogram_dashboard/10_poisson.png)
-The poisson distribution is defined over the integers. So, all of the values
-being generated are perfect integers. The histogram compression moves the data
-into floating-point bins, causing the visualization to show little
-bumps over the integer values rather than perfect spikes.
-
-### All Together Now
-Finally, we can concatenate all of the data into one funny-looking curve.
-![](https://www.tensorflow.org/images/tensorboard/histogram_dashboard/11_all_combined.png)
-
diff --git a/tensorflow/docs_src/programmers_guide/tensors.md b/tensorflow/docs_src/programmers_guide/tensors.md
deleted file mode 100644
index 1248c3cabe..0000000000
--- a/tensorflow/docs_src/programmers_guide/tensors.md
+++ /dev/null
@@ -1,330 +0,0 @@
-# Tensors
-
-TensorFlow, as the name indicates, is a framework to define and run computations
-involving tensors. A **tensor** is a generalization of vectors and matrices to
-potentially higher dimensions. Internally, TensorFlow represents tensors as
-n-dimensional arrays of base datatypes.
-
-When writing a TensorFlow program, the main object you manipulate and pass
-around is the `tf.Tensor`. A `tf.Tensor` object represents a partially defined
-computation that will eventually produce a value. TensorFlow programs work by
-first building a graph of `tf.Tensor` objects, detailing how each tensor is
-computed based on the other available tensors and then by running parts of this
-graph to achieve the desired results.
-
-A `tf.Tensor` has the following properties:
-
- * a data type (`float32`, `int32`, or `string`, for example)
- * a shape
-
-
-Each element in the Tensor has the same data type, and the data type is always
-known. The shape (that is, the number of dimensions it has and the size of each
-dimension) might be only partially known. Most operations produce tensors of
-fully-known shapes if the shapes of their inputs are also fully known, but in
-some cases it's only possible to find the shape of a tensor at graph execution
-time.
-
-Some types of tensors are special, and these will be covered in other
-units of the Programmer's guide. The main ones are:
-
-  * `tf.Variable`
-  * `tf.constant`
-  * `tf.placeholder`
-  * `tf.SparseTensor`
-
-With the exception of `tf.Variable`, the value of a tensor is immutable, which
-means that in the context of a single execution tensors only have a single
-value. However, evaluating the same tensor twice can return different values;
-for example that tensor can be the result of reading data from disk, or
-generating a random number.
-
-## Rank
-
-The **rank** of a `tf.Tensor` object is its number of dimensions. Synonyms for
-rank include **order** or **degree** or **n-dimension**.
-Note that rank in TensorFlow is not the same as matrix rank in mathematics.
-As the following table shows, each rank in TensorFlow corresponds to a
-different mathematical entity:
-
-Rank | Math entity
---- | ---
-0 | Scalar (magnitude only)
-1 | Vector (magnitude and direction)
-2 | Matrix (table of numbers)
-3 | 3-Tensor (cube of numbers)
-n | n-Tensor (you get the idea)
-
-
-### Rank 0
-
-The following snippet demonstrates creating a few rank 0 variables:
-
-```python
-mammal = tf.Variable("Elephant", tf.string)
-ignition = tf.Variable(451, tf.int16)
-floating = tf.Variable(3.14159265359, tf.float64)
-its_complicated = tf.Variable(12.3 - 4.85j, tf.complex64)
-```
-
-Note: A string is treated as a single item in TensorFlow, not as a sequence of
-characters. It is possible to have scalar strings, vectors of strings, etc.
-
-### Rank 1
-
-To create a rank 1 `tf.Tensor` object, you can pass a list of items as the
-initial value. For example:
-
-```python
-mystr = tf.Variable(["Hello"], tf.string)
-cool_numbers  = tf.Variable([3.14159, 2.71828], tf.float32)
-first_primes = tf.Variable([2, 3, 5, 7, 11], tf.int32)
-its_very_complicated = tf.Variable([12.3 - 4.85j, 7.5 - 6.23j], tf.complex64)
-```
-
-
-### Higher ranks
-
-A rank 2 `tf.Tensor` object consists of at least one row and at least
-one column:
-
-```python
-mymat = tf.Variable([[7],[11]], tf.int16)
-myxor = tf.Variable([[False, True],[True, False]], tf.bool)
-linear_squares = tf.Variable([[4], [9], [16], [25]], tf.int32)
-squarish_squares = tf.Variable([ [4, 9], [16, 25] ], tf.int32)
-rank_of_squares = tf.rank(squarish_squares)
-mymatC = tf.Variable([[7],[11]], tf.int32)
-```
-
-Higher-rank Tensors, similarly, consist of an n-dimensional array. For example,
-during image processing, many tensors of rank 4 are used, with dimensions
-corresponding to example-in-batch, image width, image height, and color channel.
-
-``` python
-my_image = tf.zeros([10, 299, 299, 3])  # batch x height x width x color
-```
-
-### Getting a `tf.Tensor` object's rank
-
-To determine the rank of a `tf.Tensor` object, call the `tf.rank` method.
-For example, the following method programmatically determines the rank
-of the `tf.Tensor` defined in the previous section:
-
-```python
-r = tf.rank(my_image)
-# After the graph runs, r will hold the value 4.
-```
-
-### Referring to `tf.Tensor` slices
-
-Since a `tf.Tensor` is an n-dimensional array of cells, to access a single cell
-in a `tf.Tensor` you need to specify n indices.
-
-For a rank 0 tensor (a scalar), no indices are necessary, since it is already a
-single number.
-
-For a rank 1 tensor (a vector), passing a single index allows you to access a
-number:
-
-```python
-my_scalar = my_vector[2]
-```
-
-Note that the index passed inside the `[]` can itself be a scalar `tf.Tensor`, if
-you want to dynamically choose an element from the vector.
-
-For tensors of rank 2 or higher, the situation is more interesting. For a
-`tf.Tensor` of rank 2, passing two numbers returns a scalar, as expected:
-
-
-```python
-my_scalar = my_matrix[1, 2]
-```
-
-
-Passing a single number, however, returns a subvector of a matrix, as follows:
-
-
-```python
-my_row_vector = my_matrix[2]
-my_column_vector = my_matrix[:, 3]
-```
-
-The `:` notation is python slicing syntax for "leave this dimension alone". This
-is useful in higher-rank Tensors, as it allows you to access its subvectors,
-submatrices, and even other subtensors.
-
-
-## Shape
-
-The **shape** of a tensor is the number of elements in each dimension.
-TensorFlow automatically infers shapes during graph construction. These inferred
-shapes might have known or unknown rank. If the rank is known, the sizes of each
-dimension might be known or unknown.
-
-The TensorFlow documentation uses three notational conventions to describe
-tensor dimensionality: rank, shape, and dimension number. The following table
-shows how these relate to one another:
-
-Rank | Shape | Dimension number | Example
---- | --- | --- | ---
-0 | [] | 0-D | A 0-D tensor.  A scalar.
-1 | [D0] | 1-D | A 1-D tensor with shape [5].
-2 | [D0, D1] | 2-D | A 2-D tensor with shape [3, 4].
-3 | [D0, D1, D2] | 3-D | A 3-D tensor with shape [1, 4, 3].
-n | [D0, D1, ... Dn-1] | n-D | A tensor with shape [D0, D1, ... Dn-1].
-
-Shapes can be represented via Python lists / tuples of ints, or with the
-@{tf.TensorShape}.
-
-### Getting a `tf.Tensor` object's shape
-
-There are two ways of accessing the shape of a `tf.Tensor`. While building the
-graph, it is often useful to ask what is already known about a tensor's
-shape. This can be done by reading the `shape` property of a `tf.Tensor` object.
-This method returns a `TensorShape` object, which is a convenient way of
-representing partially-specified shapes (since, when building the graph, not all
-shapes will be fully known).
-
-It is also possible to get a `tf.Tensor` that will represent the fully-defined
-shape of another `tf.Tensor` at runtime. This is done by calling the `tf.shape`
-operation. This way, you can build a graph that manipulates the shapes of
-tensors by building other tensors that depend on the dynamic shape of the input
-`tf.Tensor`.
-
-For example, here is how to make a vector of zeros with the same size as the
-number of columns in a given matrix:
-
-``` python
-zeros = tf.zeros(my_matrix.shape[1])
-```
-
-### Changing the shape of a `tf.Tensor`
-
-The **number of elements** of a tensor is the product of the sizes of all its
-shapes. The number of elements of a scalar is always `1`. Since there are often
-many different shapes that have the same number of elements, it's often
-convenient to be able to change the shape of a `tf.Tensor`, keeping its elements
-fixed. This can be done with `tf.reshape`.
-
-The following examples demonstrate how to reshape tensors:
-
-```python
-rank_three_tensor = tf.ones([3, 4, 5])
-matrix = tf.reshape(rank_three_tensor, [6, 10])  # Reshape existing content into
-                                                 # a 6x10 matrix
-matrixB = tf.reshape(matrix, [3, -1])  #  Reshape existing content into a 3x20
-                                       # matrix. -1 tells reshape to calculate
-                                       # the size of this dimension.
-matrixAlt = tf.reshape(matrixB, [4, 3, -1])  # Reshape existing content into a
-                                             #4x3x5 tensor
-
-# Note that the number of elements of the reshaped Tensors has to match the
-# original number of elements. Therefore, the following example generates an
-# error because no possible value for the last dimension will match the number
-# of elements.
-yet_another = tf.reshape(matrixAlt, [13, 2, -1])  # ERROR!
-```
-
-## Data types
-
-In addition to dimensionality, Tensors have a data type. Refer to the
-`tf.DataType` page in the programmer's guide for a full list of the data types.
-
-It is not possible to have a `tf.Tensor` with more than one data type. It is
-possible, however, to serialize arbitrary data structures as `string`s and store
-those in `tf.Tensor`s.
-
-It is possible to cast `tf.Tensor`s from one datatype to another using
-`tf.cast`:
-
-``` python
-# Cast a constant integer tensor into floating point.
-float_tensor = tf.cast(tf.constant([1, 2, 3]), dtype=tf.float32)
-```
-
-To inspect a `tf.Tensor`'s data type use the `Tensor.dtype` property.
-
-When creating a `tf.Tensor` from a python object you may optionally specify the
-datatype. If you don't, TensorFlow chooses a datatype that can represent your
-data. TensorFlow converts Python integers to `tf.int32` and python floating
-point numbers to `tf.float32`. Otherwise TensorFlow uses the same rules numpy
-uses when converting to arrays.
-
-## Evaluating Tensors
-
-Once the computation graph has been built, you can run the computation that
-produces a particular `tf.Tensor` and fetch the value assigned to it. This is
-often useful for debugging as well as being required for much of TensorFlow to
-work.
-
-The simplest way to evaluate a Tensor is using the `Tensor.eval` method. For
-example:
-
-```python
-constant = tf.constant([1, 2, 3])
-tensor = constant * constant
-print(tensor.eval())
-```
-
-The `eval` method only works when a default `tf.Session` is active (see
-Graphs and Sessions for more information).
-
-`Tensor.eval` returns a numpy array with the same contents as the tensor.
-
-Sometimes it is not possible to evaluate a `tf.Tensor` with no context because
-its value might depend on dynamic information that is not available. For
-example, tensors that depend on `placeholder`s can't be evaluated without
-providing a value for the `placeholder`.
-
-``` python
-p = tf.placeholder(tf.float32)
-t = p + 1.0
-t.eval()  # This will fail, since the placeholder did not get a value.
-t.eval(feed_dict={p:2.0})  # This will succeed because we're feeding a value
-                           # to the placeholder.
-```
-
-Note that it is possible to feed any `tf.Tensor`, not just placeholders.
-
-Other model constructs might make evaluating a `tf.Tensor`
-complicated. TensorFlow can't directly evaluate `tf.Tensor`s defined inside
-functions or inside control flow constructs. If a `tf.Tensor` depends on a value
-from a queue, evaluating the `tf.Tensor` will only work once something has been
-enqueued; otherwise, evaluating it will hang. When working with queues, remember
-to call `tf.train.start_queue_runners` before evaluating any `tf.Tensor`s.
-
-## Printing Tensors
-
-For debugging purposes you might want to print the value of a `tf.Tensor`. While
- @{$debugger$tfdbg} provides advanced debugging support, TensorFlow also has an
- operation to directly print the value of a `tf.Tensor`.
-
-Note that you rarely want to use the following pattern when printing a
-`tf.Tensor`:
-
-``` python
-t = <<some tensorflow operation>>
-print(t)  # This will print the symbolic tensor when the graph is being built.
-          # This tensor does not have a value in this context.
-```
-
-This code prints the `tf.Tensor` object (which represents deferred computation)
-and not its value. Instead, TensorFlow provides the `tf.Print` operation, which
-returns its first tensor argument unchanged while printing the set of
-`tf.Tensor`s it is passed as the second argument.
-
-To correctly use `tf.Print` its return value must be used. See the example below
-
-``` python
-t = <<some tensorflow operation>>
-tf.Print(t, [t])  # This does nothing
-t = tf.Print(t, [t])  # Here we are using the value returned by tf.Print
-result = t + 1  # Now when result is evaluated the value of `t` will be printed.
-```
-
-When you evaluate `result` you will evaluate everything `result` depends
-upon. Since `result` depends upon `t`, and evaluating `t` has the side effect of
-printing its input (the old value of `t`), `t` gets printed.
-
diff --git a/tensorflow/docs_src/programmers_guide/using_gpu.md b/tensorflow/docs_src/programmers_guide/using_gpu.md
deleted file mode 100644
index c429ca4750..0000000000
--- a/tensorflow/docs_src/programmers_guide/using_gpu.md
+++ /dev/null
@@ -1,215 +0,0 @@
-# Using GPUs
-
-## Supported devices
-
-On a typical system, there are multiple computing devices. In TensorFlow, the
-supported device types are `CPU` and `GPU`. They are represented as `strings`.
-For example:
-
-*   `"/cpu:0"`: The CPU of your machine.
-*   `"/device:GPU:0"`: The GPU of your machine, if you have one.
-*   `"/device:GPU:1"`: The second GPU of your machine, etc.
-
-If a TensorFlow operation has both CPU and GPU implementations, the GPU devices
-will be given priority when the operation is assigned to a device. For example,
-`matmul` has both CPU and GPU kernels. On a system with devices `cpu:0` and
-`gpu:0`, `gpu:0` will be selected to run `matmul`.
-
-## Logging Device placement
-
-To find out which devices your operations and tensors are assigned to, create
-the session with `log_device_placement` configuration option set to `True`.
-
-```python
-# Creates a graph.
-a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a')
-b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b')
-c = tf.matmul(a, b)
-# Creates a session with log_device_placement set to True.
-sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
-# Runs the op.
-print(sess.run(c))
-```
-
-You should see the following output:
-
-```
-Device mapping:
-/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: Tesla K40c, pci bus
-id: 0000:05:00.0
-b: /job:localhost/replica:0/task:0/device:GPU:0
-a: /job:localhost/replica:0/task:0/device:GPU:0
-MatMul: /job:localhost/replica:0/task:0/device:GPU:0
-[[ 22.  28.]
- [ 49.  64.]]
-
-```
-
-## Manual device placement
-
-If you would like a particular operation to run on a device of your choice
-instead of what's automatically selected for you, you can use `with tf.device`
-to create a device context such that all the operations within that context will
-have the same device assignment.
-
-```python
-# Creates a graph.
-with tf.device('/cpu:0'):
-  a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a')
-  b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b')
-c = tf.matmul(a, b)
-# Creates a session with log_device_placement set to True.
-sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
-# Runs the op.
-print(sess.run(c))
-```
-
-You will see that now `a` and `b` are assigned to `cpu:0`. Since a device was
-not explicitly specified for the `MatMul` operation, the TensorFlow runtime will
-choose one based on the operation and available devices (`gpu:0` in this
-example) and automatically copy tensors between devices if required.
-
-```
-Device mapping:
-/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: Tesla K40c, pci bus
-id: 0000:05:00.0
-b: /job:localhost/replica:0/task:0/cpu:0
-a: /job:localhost/replica:0/task:0/cpu:0
-MatMul: /job:localhost/replica:0/task:0/device:GPU:0
-[[ 22.  28.]
- [ 49.  64.]]
-```
-
-## Allowing GPU memory growth
-
-By default, TensorFlow maps nearly all of the GPU memory of all GPUs (subject to
-[`CUDA_VISIBLE_DEVICES`](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars))
-visible to the process. This is done to more efficiently use the relatively
-precious GPU memory resources on the devices by reducing [memory
-fragmentation](https://en.wikipedia.org/wiki/Fragmentation_\(computing\)).
-
-In some cases it is desirable for the process to only allocate a subset of the
-available memory, or to only grow the memory usage as is needed by the process.
-TensorFlow provides two Config options on the Session to control this.
-
-The first is the `allow_growth` option, which attempts to allocate only as much
-GPU memory based on runtime allocations: it starts out allocating very little
-memory, and as Sessions get run and more GPU memory is needed, we extend the GPU
-memory region needed by the TensorFlow process. Note that we do not release
-memory, since that can lead to even worse memory fragmentation. To turn this
-option on, set the option in the ConfigProto by:
-
-```python
-config = tf.ConfigProto()
-config.gpu_options.allow_growth = True
-session = tf.Session(config=config, ...)
-```
-
-The second method is the `per_process_gpu_memory_fraction` option, which
-determines the fraction of the overall amount of memory that each visible GPU
-should be allocated. For example, you can tell TensorFlow to only allocate 40%
-of the total memory of each GPU by:
-
-```python
-config = tf.ConfigProto()
-config.gpu_options.per_process_gpu_memory_fraction = 0.4
-session = tf.Session(config=config, ...)
-```
-
-This is useful if you want to truly bound the amount of GPU memory available to
-the TensorFlow process.
-
-## Using a single GPU on a multi-GPU system
-
-If you have more than one GPU in your system, the GPU with the lowest ID will be
-selected by default. If you would like to run on a different GPU, you will need
-to specify the preference explicitly:
-
-```python
-# Creates a graph.
-with tf.device('/device:GPU:2'):
-  a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a')
-  b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b')
-  c = tf.matmul(a, b)
-# Creates a session with log_device_placement set to True.
-sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
-# Runs the op.
-print(sess.run(c))
-```
-
-If the device you have specified does not exist, you will get
-`InvalidArgumentError`:
-
-```
-InvalidArgumentError: Invalid argument: Cannot assign a device to node 'b':
-Could not satisfy explicit device specification '/device:GPU:2'
-   [[Node: b = Const[dtype=DT_FLOAT, value=Tensor<type: float shape: [3,2]
-   values: 1 2 3...>, _device="/device:GPU:2"]()]]
-```
-
-If you would like TensorFlow to automatically choose an existing and supported
-device to run the operations in case the specified one doesn't exist, you can
-set `allow_soft_placement` to `True` in the configuration option when creating
-the session.
-
-```python
-# Creates a graph.
-with tf.device('/device:GPU:2'):
-  a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a')
-  b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b')
-  c = tf.matmul(a, b)
-# Creates a session with allow_soft_placement and log_device_placement set
-# to True.
-sess = tf.Session(config=tf.ConfigProto(
-      allow_soft_placement=True, log_device_placement=True))
-# Runs the op.
-print(sess.run(c))
-```
-
-## Using multiple GPUs
-
-If you would like to run TensorFlow on multiple GPUs, you can construct your
-model in a multi-tower fashion where each tower is assigned to a different GPU.
-For example:
-
-``` python
-# Creates a graph.
-c = []
-for d in ['/device:GPU:2', '/device:GPU:3']:
-  with tf.device(d):
-    a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3])
-    b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2])
-    c.append(tf.matmul(a, b))
-with tf.device('/cpu:0'):
-  sum = tf.add_n(c)
-# Creates a session with log_device_placement set to True.
-sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
-# Runs the op.
-print(sess.run(sum))
-```
-
-You will see the following output.
-
-```
-Device mapping:
-/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: Tesla K20m, pci bus
-id: 0000:02:00.0
-/job:localhost/replica:0/task:0/device:GPU:1 -> device: 1, name: Tesla K20m, pci bus
-id: 0000:03:00.0
-/job:localhost/replica:0/task:0/device:GPU:2 -> device: 2, name: Tesla K20m, pci bus
-id: 0000:83:00.0
-/job:localhost/replica:0/task:0/device:GPU:3 -> device: 3, name: Tesla K20m, pci bus
-id: 0000:84:00.0
-Const_3: /job:localhost/replica:0/task:0/device:GPU:3
-Const_2: /job:localhost/replica:0/task:0/device:GPU:3
-MatMul_1: /job:localhost/replica:0/task:0/device:GPU:3
-Const_1: /job:localhost/replica:0/task:0/device:GPU:2
-Const: /job:localhost/replica:0/task:0/device:GPU:2
-MatMul: /job:localhost/replica:0/task:0/device:GPU:2
-AddN: /job:localhost/replica:0/task:0/cpu:0
-[[  44.   56.]
- [  98.  128.]]
-```
-
-The @{$deep_cnn$cifar10 tutorial} is a good example
-demonstrating how to do training with multiple GPUs.
diff --git a/tensorflow/docs_src/programmers_guide/using_tpu.md b/tensorflow/docs_src/programmers_guide/using_tpu.md
deleted file mode 100644
index 44aabf0557..0000000000
--- a/tensorflow/docs_src/programmers_guide/using_tpu.md
+++ /dev/null
@@ -1,395 +0,0 @@
-# Using TPUs
-
-This document walks through the principal TensorFlow APIs necessary to make
-effective use of a [Cloud TPU](https://cloud.google.com/tpu/), and highlights
-the differences between regular TensorFlow usage, and usage on a TPU.
-
-This doc is aimed at users who:
-
-* Are familiar with TensorFlow's `Estimator` and `Dataset` APIs
-* Have maybe [tried out a Cloud TPU](https://cloud.google.com/tpu/docs/quickstart)
-  using an existing model.
-* Have, perhaps, skimmed the code of an example TPU model
-  [[1]](https://github.com/tensorflow/models/blob/master/official/mnist/mnist_tpu.py)
-  [[2]](https://github.com/tensorflow/tpu/tree/master/models).
-* Are interested in porting an existing `Estimator` model to
-  run on Cloud TPUs
-
-## TPUEstimator
-
-@{tf.estimator.Estimator$Estimators} are TensorFlow's model-level abstraction.
-Standard `Estimators` can drive models on CPU and GPUs. You must use
-@{tf.contrib.tpu.TPUEstimator} to drive a model on TPUs.
-
-Refer to TensorFlow's Getting Started section for an introduction to the basics
-of using a @{$premade_estimators$pre-made `Estimator`}, and
-@{$custom_estimators$custom `Estimator`s}.
-
-The `TPUEstimator` class differs somewhat from the `Estimator` class.
-
-The simplest way to maintain a model that can be run both on CPU/GPU or on a
-Cloud TPU is to define the model's inference phase (from inputs to predictions)
-outside of the `model_fn`. Then maintain separate implementations of the
-`Estimator` setup and `model_fn`, both wrapping this inference step. For an
-example of this pattern compare the `mnist.py` and `mnist_tpu.py` implementation in
-[tensorflow/models](https://github.com/tensorflow/models/tree/master/official/mnist).
-
-### Running a `TPUEstimator` locally
-
-To create a standard `Estimator` you call the constructor, and pass it a
-`model_fn`, for example:
-
-```
-my_estimator = tf.estimator.Estimator(
-  model_fn=my_model_fn)
-```
-
-The changes required to use a @{tf.contrib.tpu.TPUEstimator} on your local
-machine are relatively minor. The constructor requires two additional arguments.
-You should set the `use_tpu` argument to `False`, and pass a
-@{tf.contrib.tpu.RunConfig} as the `config` argument, as shown below:
-
-``` python
-my_tpu_estimator = tf.contrib.tpu.TPUEstimator(
-    model_fn=my_model_fn,
-    config=tf.contrib.tpu.RunConfig()
-    use_tpu=False)
-```
-
-Just this simple change will allow you to run a `TPUEstimator` locally.
-The majority of example TPU models can be run in this local mode,
-by setting the command line flags as follows:
-
-
-```
-$> python mnist_tpu.py --use_tpu=false --master=''
-```
-
-Note: This `use_tpu=False` argument is useful for trying out the `TPUEstimator`
-API. It is not meant to be a complete TPU compatibility test. Successfully
-running a model locally in a `TPUEstimator` does not guarantee that it will
-work on a TPU.
-
-
-### Building a `tpu.RunConfig`
-
-While the default `RunConfig` is sufficient  for local training, these settings
-cannot be ignored in real usage.
-
-A more typical setup for a `RunConfig`, that can be switched to use a Cloud
-TPU, might be as follows:
-
-``` python
-import tempfile
-import subprocess
-
-class FLAGS(object):
-  use_tpu=False
-  tpu_name=None
-  # Use a local temporary path for the `model_dir`
-  model_dir = tempfile.mkdtemp()
-  # Number of training steps to run on the Cloud TPU before returning control.
-  iterations = 50
-  # A single Cloud TPU has 8 shards.
-  num_shards = 8
-
-if FLAGS.use_tpu:
-    my_project_name = subprocess.check_output([
-        'gcloud','config','get-value','project'])
-    my_zone = subprocess.check_output([
-        'gcloud','config','get-value','compute/zone'])
-    cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
-            tpu_names=[FLAGS.tpu_name],
-            zone=my_zone,
-            project=my_project)
-    master = tpu_cluster_resolver.get_master()
-else:
-    master = ''
-
-my_tpu_run_config = tf.contrib.tpu.RunConfig(
-    master=master,
-    evaluation_master=master,
-    model_dir=FLAGS.model_dir,
-    session_config=tf.ConfigProto(
-        allow_soft_placement=True, log_device_placement=True),
-    tpu_config=tf.contrib.tpu.TPUConfig(FLAGS.iterations,
-                                        FLAGS.num_shards),
-)
-```
-
-Then you must pass the @{tf.contrib.tpu.RunConfig} to the constructor:
-
-``` python
-my_tpu_estimator = tf.contrib.tpu.TPUEstimator(
-    model_fn=my_model_fn,
-    config = my_tpu_run_config,
-    use_tpu=FLAGS.use_tpu)
-```
-
-Typically the `FLAGS` would be set by command line arguments. To switch from
-training locally to training on a cloud TPU you would need to:
-
-* Set `FLAGS.use_tpu` to `True`
-* Set `FLAGS.tpu_name` so the `tf.contrib.cluster_resolver.TPUClusterResolver` can find it
-* Set `FLAGS.model_dir` to a Google Cloud Storage bucket url (`gs://`).
-
-
-## Optimizer
-
-When training on a cloud TPU you **must** wrap the optimizer in a
-@{tf.contrib.tpu.CrossShardOptimizer}, which uses an `allreduce` to aggregate
-gradients and broadcast the result to each shard (each TPU core).
-
-The `CrossShardOptimizer` is not compatible with local training. So, to have
-the same code run both locally and on a Cloud TPU, add lines like the following:
-
-``` python
-optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
-if FLAGS.use_tpu:
-  optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer)
-```
-
-If you prefer to avoid a global `FLAGS` variable in your model code, one
-approach is to set the optimizer as one of the `Estimator`'s params,
-as follows:
-
-``` python
-my_tpu_estimator = tf.contrib.tpu.TPUEstimator(
-    model_fn=my_model_fn,
-    config = my_tpu_run_config,
-    use_tpu=FLAGS.use_tpu,
-    params={'optimizer':optimizer})
-```
-
-## Model Function
-
-This section details the changes you must make to the model function
-(`model_fn()`) to make it `TPUEstimator` compatible.
-
-### Static shapes
-
-During regular usage TensorFlow attempts to determine the shapes of each
-`tf.Tensor` during graph construction. During execution any unknown shape
-dimensions are determined dynamically,
-see @{$programmers_guide/tensors#shape$Tensor Shapes} for more details.
-
-To run on Cloud TPUs TensorFlow models are compiled using @{$xla$XLA}.
-XLA uses a similar system for determining shapes at compile time. XLA requires
-that all tensor dimensions be statically defined at compile time. All shapes
-must evaluate to a constant, and not depend on external data, or stateful
-operations like variables or a random number generator.
-
-
-### Summaries
-
-Remove any use of `tf.summary` from your model.
-
-@{$summaries_and_tensorboard$TensorBoard summaries} are a great way see inside
-your model. A minimal set of basic summaries are automatically recorded by the
-`TPUEstimator`, to `event` files in the `model_dir`. Custom summaries, however,
-are currently unsupported when training on a Cloud TPU. So while the
-`TPUEstimator` will still run locally with summaries, it will fail if used on a
-TPU.
-
-### Metrics
-
-Build your evaluation metrics dictionary in a stand-alone `metric_fn`.
-
-<!-- TODO(markdaoust) link to programmers_guide/metrics when it exists -->
-
-Evaluation metrics are an essential part of training a model. These are fully
-supported on Cloud TPUs, but with a slightly different syntax.
-
-A standard @{tf.metrics} returns two tensors. The first returns the running
-average of the metric value, while the second updates the running average and
-returns the value for this batch:
-
-```
-running_average, current_batch = tf.metrics.accuracy(labels, predictions)
-```
-
-In a standard `Estimator` you create a dictionary of these pairs, and return it
-as part of the `EstimatorSpec`.
-
-```python
-my_metrics = {'accuracy': tf.metrics.accuracy(labels, predictions)}
-
-return tf.estimator.EstimatorSpec(
-  ...
-  eval_metric_ops=my_metrics
-)
-```
-
-In a `TPUEstimator` you instead pass a function (which returns a metrics
-dictionary) and a list of argument tensors, as shown below:
-
-```python
-def my_metric_fn(labels, predictions):
-   return {'accuracy': tf.metrics.accuracy(labels, predictions)}
-
-return tf.contrib.tpu.TPUEstimatorSpec(
-  ...
-  eval_metrics=(my_metric_fn, [labels, predictions])
-)
-```
-
-### Use `TPUEstimatorSpec`
-
-`TPUEstimatorSpec` do not support hooks, and require function wrappers for
-some fields.
-
-An `Estimator`'s `model_fn` must return an `EstimatorSpec`. An `EstimatorSpec`
-is a simple structure of named fields containing all the `tf.Tensors` of the
-model that the `Estimator` may need to interact with.
-
-`TPUEstimators` use a @{tf.contrib.tpu.TPUEstimatorSpec}. There are a few
-differences between it and a standard @{tf.estimator.EstimatorSpec}:
-
-
-*  The `eval_metric_ops` must be wrapped into a `metrics_fn`, this field is
-   renamed `eval_metrics` ([see above](#metrics)).
-*  The @{tf.train.SessionRunHook$hooks} are unsupported, so these fields are
-   omitted.
-*  The @{tf.train.Scaffold$`scaffold`}, if used, must also be wrapped in a
-   function. This field is renamed to `scaffold_fn`.
-
-`Scaffold` and `Hooks` are for advanced usage, and can typically be omitted.
-
-## Input functions
-
-Input functions work mainly unchanged as they run on the host computer, not the
-Cloud TPU itself. This section explains the two necessary adjustments.
-
-### Params argument
-
-<!-- TODO(markdaoust) link to input_fn doc when it exists -->
-
-The `input_fn` for a standard `Estimator` _can_ include a
-`params` argument; the `input_fn` for a `TPUEstimator` *must* include a
-`params` argument. This is necessary to allow the estimator to set the batch
-size for each replica of the input stream. So the minimum signature for an
-`input_fn` for a `TPUEstimator` is:
-
-```
-def my_input_fn(params):
-  pass
-```
-
-Where `params['batch-size']` will contain the batch size.
-
-### Static shapes and batch size
-
-The input pipeline generated by your `input_fn` is run on CPU. So it is mostly
-free from the strict static shape requirements imposed by the XLA/TPU environment.
-The one requirement is that the batches of data fed from your input pipeline to
-the TPU have a static shape, as determined by the standard TensorFlow shape
-inference algorithm. Intermediate tensors are free to have a dynamic shapes.
-If shape inference has failed, but the shape is known it is possible to
-impose the correct shape using `tf.set_shape()`. 
-
-In the example below the shape
-inference algorithm fails, but it is correctly using `set_shape`:
-
-```
->>> x = tf.zeros(tf.constant([1,2,3])+1)
->>> x.shape
-
-TensorShape([Dimension(None), Dimension(None), Dimension(None)])
-
->>> x.set_shape([2,3,4])
-```
-
-In many cases the batch size is the only unknown dimension.
-
-A typical input pipeline, using `tf.data`, will usually produce batches of a
-fixed size. The last batch of a finite `Dataset`, however, is typically smaller,
-containing just the remaining elements. Since a `Dataset` does not know its own
-length or finiteness, the standard @{tf.data.Dataset.batch$`batch`} method
-cannot determine if all batches will have a fixed size batch on its own:
-
-```
->>> params = {'batch_size':32}
->>> ds = tf.data.Dataset.from_tensors([0, 1, 2])
->>> ds = ds.repeat().batch(params['batch-size'])
->>> ds
-
-<BatchDataset shapes: (?, 3), types: tf.int32>
-```
-
-The most straightforward fix is to
-@{tf.data.Dataset.apply$apply} @{tf.contrib.data.batch_and_drop_remainder}
-as follows:
-
-```
->>> params = {'batch_size':32}
->>> ds = tf.data.Dataset.from_tensors([0, 1, 2])
->>> ds = ds.repeat().apply(
-...     tf.contrib.data.batch_and_drop_remainder(params['batch-size']))
->>> ds
-
- <_RestructuredDataset shapes: (32, 3), types: tf.int32>
-```
-
-The one downside to this approach is that, as the name implies, this batching
-method throws out any fractional batch at the end of the dataset. This is fine
-for an infinitely repeating dataset being used for training, but could be a
-problem if you want to train for an exact number of epochs.
-
-To do an exact 1-epoch of _evaluation_ you can work around this by manually
-padding the length of the batches, and setting the padding entries to have zero
-weight when creating your `tf.metrics`.
-
-## Datasets
-
-Efficient use of the `tf.data.Dataset` API is critical when using a Cloud
-TPU, as it is impossible to use the Cloud TPU's unless you can feed it data
-quickly enough. See @{$datasets_performance} for details on dataset performance.
-
-For all but the simplest experimentation (using
-@{tf.data.Dataset.from_tensor_slices} or other in-graph data) you will need to
-store all data files read by the `TPUEstimator`'s `Dataset` in Google Cloud
-Storage Buckets.
-
-<!--TODO(markdaoust): link to the `TFRecord` doc when it exists.-->
-
-For most use-cases, we recommend converting your data into `TFRecord`
-format and using a @{tf.data.TFRecordDataset} to read it. This, however, is not
-a hard requirement and you can use other dataset readers
-(`FixedLengthRecordDataset` or `TextLineDataset`) if you prefer.
-
-Small datasets can be loaded entirely into memory using
-@{tf.data.Dataset.cache}.
-
-Regardless of the data format used, it is strongly recommended that you
-@{$performance_guide#use_large_files$use large files}, on the order of
-100MB. This is especially important in this networked setting as the overhead
-of opening a file is significantly higher.
-
-It is also important, regardless of the type of reader used, to enable buffering
-using the `buffer_size` argument to the constructor. This argument is specified
-in bytes. A minimum of a few MB (`buffer_size=8*1024*1024`) is recommended so
-that data is available when needed.
-
-The TPU-demos repo includes
-[a script](https://github.com/tensorflow/tpu/blob/master/tools/datasets/imagenet_to_gcs.py)
-for downloading the imagenet dataset and converting it to an appropriate format.
-This together with the imagenet
-[models](https://github.com/tensorflow/tpu/tree/master/models)
-included in the repo demonstrate all of these best-practices.
-
-
-## What Next
-
-For details on how to actually set up and run a Cloud TPU see:
-
- * [Google Cloud TPU Documentation](https://cloud.google.com/tpu/docs/)
-
-This document is by no means exhaustive. The best source of more detail on how
-to make a Cloud TPU compatible model are the example models published in:
-
- * The [TPU Demos Repository.](https://github.com/tensorflow/tpu)
-
-For more information about tuning TensorFlow code for performance see:
-
- * The @{$performance$Performance Section.}
-
diff --git a/tensorflow/docs_src/programmers_guide/variables.md b/tensorflow/docs_src/programmers_guide/variables.md
deleted file mode 100644
index cd8c4b5b9a..0000000000
--- a/tensorflow/docs_src/programmers_guide/variables.md
+++ /dev/null
@@ -1,319 +0,0 @@
-# Variables
-
-A TensorFlow **variable** is the best way to represent shared, persistent state
-manipulated by your program.
-
-Variables are manipulated via the `tf.Variable` class. A `tf.Variable`
-represents a tensor whose value can be changed by running ops on it. Unlike
-`tf.Tensor` objects, a `tf.Variable` exists outside the context of a single
-`session.run` call.
-
-Internally, a `tf.Variable` stores a persistent tensor. Specific ops allow you
-to read and modify the values of this tensor. These modifications are visible
-across multiple `tf.Session`s, so multiple workers can see the same values for a
-`tf.Variable`.
-
-## Creating a Variable
-
-The best way to create a variable is to call the `tf.get_variable`
-function. This function requires you to specify the Variable's name. This name
-will be used by other replicas to access the same variable, as well as to name
-this variable's value when checkpointing and exporting models. `tf.get_variable`
-also allows you to reuse a previously created variable of the same name, making it
-easy to define models which reuse layers.
-
-To create a variable with `tf.get_variable`, simply provide the name and shape
-
-``` python
-my_variable = tf.get_variable("my_variable", [1, 2, 3])
-```
-
-This creates a variable named "my_variable" which is a three-dimensional tensor
-with shape `[1, 2, 3]`. This variable will, by default, have the `dtype`
-`tf.float32` and its initial value will be randomized via
-`tf.glorot_uniform_initializer`.
-
-You may optionally specify the `dtype` and initializer to `tf.get_variable`. For
-example:
-
-``` python
-my_int_variable = tf.get_variable("my_int_variable", [1, 2, 3], dtype=tf.int32,
-  initializer=tf.zeros_initializer)
-```
-
-TensorFlow provides many convenient initializers. Alternatively, you may
-initialize a `tf.Variable` to have the value of a `tf.Tensor`. For example:
-
-``` python
-other_variable = tf.get_variable("other_variable", dtype=tf.int32,
-  initializer=tf.constant([23, 42]))
-```
-
-Note that when the initializer is a `tf.Tensor` you should not specify the
-variable's shape, as the shape of the initializer tensor will be used.
-
-
-<a name="collections"></a>
-### Variable collections
-
-Because disconnected parts of a TensorFlow program might want to create
-variables, it is sometimes useful to have a single way to access all of
-them. For this reason TensorFlow provides **collections**, which are named lists
-of tensors or other objects, such as `tf.Variable` instances.
-
-By default every `tf.Variable` gets placed in the following two collections:
-
- * `tf.GraphKeys.GLOBAL_VARIABLES` --- variables that can be shared across
-   multiple devices,
- * `tf.GraphKeys.TRAINABLE_VARIABLES` --- variables for which TensorFlow will
-   calculate gradients.
-
-If you don't want a variable to be trainable, add it to the
-`tf.GraphKeys.LOCAL_VARIABLES` collection instead. For example, the following
-snippet demonstrates how to add a variable named `my_local` to this collection:
-
-``` python
-my_local = tf.get_variable("my_local", shape=(),
-collections=[tf.GraphKeys.LOCAL_VARIABLES])
-```
-
-Alternatively, you can specify `trainable=False` as an argument to
-`tf.get_variable`:
-
-``` python
-my_non_trainable = tf.get_variable("my_non_trainable",
-                                   shape=(),
-                                   trainable=False)
-```
-
-
-You can also use your own collections. Any string is a valid collection name,
-and there is no need to explicitly create a collection. To add a variable (or
-any other object) to a collection after creating the variable, call
-`tf.add_to_collection`.  For example, the following code adds an existing
-variable named `my_local` to a collection named `my_collection_name`:
-
-``` python
-tf.add_to_collection("my_collection_name", my_local)
-```
-
-And to retrieve a list of all the variables (or other objects) you've placed in
-a collection you can use:
-
-``` python
-tf.get_collection("my_collection_name")
-```
-
-### Device placement
-
-Just like any other TensorFlow operation, you can place variables on particular
-devices. For example, the following snippet creates a variable named `v` and
-places it on the second GPU device:
-
-``` python
-with tf.device("/device:GPU:1"):
-  v = tf.get_variable("v", [1])
-```
-
-It is particularly important for variables to be in the correct device in
-distributed settings. Accidentally putting variables on workers instead of
-parameter servers, for example, can severely slow down training or, in the worst
-case, let each worker blithely forge ahead with its own independent copy of each
-variable. For this reason we provide @{tf.train.replica_device_setter}, which
-can automatically place variables in parameter servers. For example:
-
-``` python
-cluster_spec = {
-    "ps": ["ps0:2222", "ps1:2222"],
-    "worker": ["worker0:2222", "worker1:2222", "worker2:2222"]}
-with tf.device(tf.train.replica_device_setter(cluster=cluster_spec)):
-  v = tf.get_variable("v", shape=[20, 20])  # this variable is placed
-                                            # in the parameter server
-                                            # by the replica_device_setter
-```
-
-## Initializing variables
-
-Before you can use a variable, it must be initialized. If you are programming in
-the low-level TensorFlow API (that is, you are explicitly creating your own
-graphs and sessions), you must explicitly initialize the variables.  Most
-high-level frameworks such as `tf.contrib.slim`, `tf.estimator.Estimator` and
-`Keras` automatically initialize variables for you before training a model.
-
-Explicit initialization is otherwise useful because it allows you not to rerun
-potentially expensive initializers when reloading a model from a checkpoint as
-well as allowing determinism when randomly-initialized variables are shared in a
-distributed setting.
-
-To initialize all trainable variables in one go, before training starts, call
-`tf.global_variables_initializer()`. This function returns a single operation
-responsible for initializing all variables in the
-`tf.GraphKeys.GLOBAL_VARIABLES` collection. Running this operation initializes
-all variables. For example:
-
-``` python
-session.run(tf.global_variables_initializer())
-# Now all variables are initialized.
-```
-
-If you do need to initialize variables yourself, you can run the variable's
-initializer operation. For example:
-
-``` python
-session.run(my_variable.initializer)
-```
-
-
-You can also ask which variables have still not been initialized. For example,
-the following code prints the names of all variables which have not yet been
-initialized:
-
-``` python
-print(session.run(tf.report_uninitialized_variables()))
-```
-
-
-Note that by default `tf.global_variables_initializer` does not specify the
-order in which variables are initialized. Therefore, if the initial value of a
-variable depends on another variable's value, it's likely that you'll get an
-error. Any time you use the value of a variable in a context in which not all
-variables are initialized (say, if you use a variable's value while initializing
-another variable), it is best to use `variable.initialized_value()` instead of
-`variable`:
-
-``` python
-v = tf.get_variable("v", shape=(), initializer=tf.zeros_initializer())
-w = tf.get_variable("w", initializer=v.initialized_value() + 1)
-```
-
-## Using variables
-
-To use the value of a `tf.Variable` in a TensorFlow graph, simply treat it like
-a normal `tf.Tensor`:
-
-``` python
-v = tf.get_variable("v", shape=(), initializer=tf.zeros_initializer())
-w = v + 1  # w is a tf.Tensor which is computed based on the value of v.
-           # Any time a variable is used in an expression it gets automatically
-           # converted to a tf.Tensor representing its value.
-```
-
-To assign a value to a variable, use the methods `assign`, `assign_add`, and
-friends in the `tf.Variable` class. For example, here is how you can call these
-methods:
-
-``` python
-v = tf.get_variable("v", shape=(), initializer=tf.zeros_initializer())
-assignment = v.assign_add(1)
-tf.global_variables_initializer().run()
-sess.run(assignment)  # or assignment.op.run(), or assignment.eval()
-```
-
-Most TensorFlow optimizers have specialized ops that efficiently update the
-values of variables according to some gradient descent-like algorithm. See
-@{tf.train.Optimizer} for an explanation of how to use optimizers.
-
-Because variables are mutable it's sometimes useful to know what version of a
-variable's value is being used at any point in time. To force a re-read of the
-value of a variable after something has happened, you can use
-`tf.Variable.read_value`. For example:
-
-``` python
-v = tf.get_variable("v", shape=(), initializer=tf.zeros_initializer())
-assignment = v.assign_add(1)
-with tf.control_dependencies([assignment]):
-  w = v.read_value()  # w is guaranteed to reflect v's value after the
-                      # assign_add operation.
-```
-
-
-## Sharing variables
-
-TensorFlow supports two ways of sharing variables:
-
- * Explicitly passing `tf.Variable` objects around.
- * Implicitly wrapping `tf.Variable` objects within `tf.variable_scope` objects.
-
-While code which explicitly passes variables around is very clear, it is
-sometimes convenient to write TensorFlow functions that implicitly use
-variables in their implementations. Most of the functional layers from
-`tf.layers` use this approach, as well as all `tf.metrics`, and a few other
-library utilities.
-
-Variable scopes allow you to control variable reuse when calling functions which
-implicitly create and use variables. They also allow you to name your variables
-in a hierarchical and understandable way.
-
-For example, let's say we write a function to create a convolutional / relu
-layer:
-
-```python
-def conv_relu(input, kernel_shape, bias_shape):
-    # Create variable named "weights".
-    weights = tf.get_variable("weights", kernel_shape,
-        initializer=tf.random_normal_initializer())
-    # Create variable named "biases".
-    biases = tf.get_variable("biases", bias_shape,
-        initializer=tf.constant_initializer(0.0))
-    conv = tf.nn.conv2d(input, weights,
-        strides=[1, 1, 1, 1], padding='SAME')
-    return tf.nn.relu(conv + biases)
-```
-
-This function uses short names `weights` and `biases`, which is good for
-clarity. In a real model, however, we want many such convolutional layers, and
-calling this function repeatedly would not work:
-
-``` python
-input1 = tf.random_normal([1,10,10,32])
-input2 = tf.random_normal([1,20,20,32])
-x = conv_relu(input1, kernel_shape=[5, 5, 32, 32], bias_shape=[32])
-x = conv_relu(x, kernel_shape=[5, 5, 32, 32], bias_shape = [32])  # This fails.
-```
-
-Since the desired behavior is unclear (create new variables or reuse the
-existing ones?) TensorFlow will fail. Calling `conv_relu` in different scopes,
-however, clarifies that we want to create new variables:
-
-```python
-def my_image_filter(input_images):
-    with tf.variable_scope("conv1"):
-        # Variables created here will be named "conv1/weights", "conv1/biases".
-        relu1 = conv_relu(input_images, [5, 5, 32, 32], [32])
-    with tf.variable_scope("conv2"):
-        # Variables created here will be named "conv2/weights", "conv2/biases".
-        return conv_relu(relu1, [5, 5, 32, 32], [32])
-```
-
-If you do want the variables to be shared, you have two options. First, you can
-create a scope with the same name using `reuse=True`:
-
-``` python
-with tf.variable_scope("model"):
-  output1 = my_image_filter(input1)
-with tf.variable_scope("model", reuse=True):
-  output2 = my_image_filter(input2)
-
-```
-
-You can also call `scope.reuse_variables()` to trigger a reuse:
-
-``` python
-with tf.variable_scope("model") as scope:
-  output1 = my_image_filter(input1)
-  scope.reuse_variables()
-  output2 = my_image_filter(input2)
-
-```
-
-Since depending on exact string names of scopes can feel dangerous, it's also
-possible to initialize a variable scope based on another one:
-
-``` python
-with tf.variable_scope("model") as scope:
-  output1 = my_image_filter(input1)
-with tf.variable_scope(scope, reuse=True):
-  output2 = my_image_filter(input2)
-
-```
-
diff --git a/tensorflow/docs_src/programmers_guide/version_compat.md b/tensorflow/docs_src/programmers_guide/version_compat.md
deleted file mode 100644
index 72e427c5f8..0000000000
--- a/tensorflow/docs_src/programmers_guide/version_compat.md
+++ /dev/null
@@ -1,319 +0,0 @@
-# TensorFlow Version Compatibility
-
-This document is for users who need backwards compatibility across different
-versions of TensorFlow (either for code or data), and for developers who want
-to modify TensorFlow while preserving compatibility.
-
-## Semantic Versioning 2.0
-
-TensorFlow follows Semantic Versioning 2.0 ([semver](http://semver.org)) for its
-public API. Each release version of TensorFlow has the form `MAJOR.MINOR.PATCH`.
-For example, TensorFlow version 1.2.3 has `MAJOR` version 1, `MINOR` version 2,
-and `PATCH` version 3. Changes to each number have the following meaning:
-
-* **MAJOR**:  Potentially backwards incompatible changes.  Code and data that
-  worked with a previous major release will not necessarily work with the new
-  release. However, in some cases existing TensorFlow graphs and checkpoints
-  may be migratable to the newer release; see
-  [Compatibility of graphs and checkpoints](#compatibility_of_graphs_and_checkpoints)
-  for details on data compatibility.
-
-* **MINOR**: Backwards compatible features, speed improvements, etc.  Code and
-  data that worked with a previous minor release *and* which depends only on the
-  public API will continue to work unchanged.  For details on what is and is
-  not the public API, see [What is covered](#what_is_covered).
-
-* **PATCH**: Backwards compatible bug fixes.
-
-For example, release 1.0.0 introduced backwards *incompatible* changes from
-release 0.12.1.  However, release 1.1.1 was backwards *compatible* with release
-1.0.0.
-
-## What is covered
-
-Only the public APIs of TensorFlow are backwards compatible across minor and
-patch versions.  The public APIs consist of
-
-* All the documented [Python](../api_docs/python) functions and classes in the
-  `tensorflow` module and its submodules, except for
-    * functions and classes in `tf.contrib`
-    * functions and classes whose names start with `_` (as these are private)
-  Note that the code in the `examples/` and `tools/` directories is not
-  reachable through the `tensorflow` Python module and is thus not covered by
-  the compatibility guarantee.
-
-  If a symbol is available through the `tensorflow` Python module or its
-  submodules, but is not documented, then it is **not** considered part of the
-  public API.
-
-* The [C API](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/c/c_api.h).
-
-* The following protocol buffer files:
-    * [`attr_value`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/attr_value.proto)
-    * [`config`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/config.proto)
-    * [`event`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/util/event.proto)
-    * [`graph`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/graph.proto)
-    * [`op_def`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/op_def.proto)
-    * [`reader_base`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/reader_base.proto)
-    * [`summary`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/summary.proto)
-    * [`tensor`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor.proto)
-    * [`tensor_shape`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor_shape.proto)
-    * [`types`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/types.proto)
-
-<a name="not_covered"></a>
-## What is *not* covered
-
-Some API functions are explicitly marked as "experimental" and can change in
-backward incompatible ways between minor releases. These include:
-
-*   **Experimental APIs**: The @{tf.contrib} module and its submodules in Python
-    and any functions in the C API or fields in protocol buffers that are
-    explicitly commented as being experimental. In particular, any field in a
-    protocol buffer which is called "experimental" and all its fields and
-    submessages can change at any time.
-
-*   **Other languages**: TensorFlow APIs in languages other than Python and C,
-    such as:
-
-  - @{$cc/guide$C++} (exposed through header files in
-    [`tensorflow/cc`](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/cc)).
-  - [Java](../api_docs/java/reference/org/tensorflow/package-summary),
-  - [Go](https://godoc.org/github.com/tensorflow/tensorflow/tensorflow/go)
-
-*   **Details of composite ops:** Many public functions in Python expand to
-    several primitive ops in the graph, and these details will be part of any
-    graphs saved to disk as `GraphDef`s. These details may change for
-    minor releases. In particular, regressions tests that check for exact
-    matching between graphs are likely to break across minor releases, even
-    though the behavior of the graph should be unchanged and existing
-    checkpoints will still work.
-
-*   **Floating point numerical details:** The specific floating point values
-    computed by ops may change at any time.  Users should rely only on
-    approximate accuracy and numerical stability, not on the specific bits
-    computed. Changes to numerical formulas in minor and patch releases should
-    result in comparable or improved accuracy, with the caveat that in machine
-    learning improved accuracy of specific formulas may result in decreased
-    accuracy for the overall system.
-
-*   **Random numbers:** The specific random numbers computed by the
-    @{$python/constant_op#Random_Tensors$random ops} may change at any time.
-    Users should rely only on approximately correct distributions and
-    statistical strength, not the specific bits computed. However, we will make
-    changes to random bits rarely (or perhaps never) for patch releases.  We
-    will, of course, document all such changes.
-
-*   **Version skew in distributed Tensorflow:** Running two different versions
-    of TensorFlow in a single cluster is unsupported. There are no guarantees
-    about backwards compatibility of the wire protocol.
-
-*   **Bugs:** We reserve the right to make backwards incompatible behavior
-    (though not API) changes if the current implementation is clearly broken,
-    that is, if it contradicts the documentation or if a well-known and
-    well-defined intended behavior is not properly implemented due to a bug.
-    For example, if an optimizer claims to implement a well-known optimization
-    algorithm but does not match that algorithm due to a bug, then we will fix
-    the optimizer. Our fix may break code relying on the wrong behavior for
-    convergence. We will note such changes in the release notes.
-
-*   **Error messages:** We reserve the right to change the text of error
-    messages. In addition, the type of an error may change unless the type is
-    specified in the documentation. For example, a function documented to
-    raise an `InvalidArgument` exception will continue to
-    raise `InvalidArgument`, but the human-readable message contents can change.
-
-## Compatibility of graphs and checkpoints
-
-You'll sometimes need to preserve graphs and checkpoints.
-Graphs describe the data flow of ops to be run during training and
-inference, and checkpoints contain the saved tensor values of variables in a
-graph.
-
-Many TensorFlow users save graphs and trained models to disk for
-later evaluation or additional training, but end up running their saved graphs
-or models on a later release. In compliance with semver, any graph or checkpoint
-written out with one version of TensorFlow can be loaded and evaluated with a
-later version of TensorFlow with the same major release.  However, we will
-endeavor to preserve backwards compatibility even across major releases when
-possible, so that the serialized files are usable over long periods of time.
-
-
-Graphs are serialized via the `GraphDef` protocol buffer.  To facilitate (rare)
-backwards incompatible changes to graphs, each `GraphDef` has a version number
-separate from the TensorFlow version.  For example, `GraphDef` version 17
-deprecated the `inv` op in favor of `reciprocal`.  The semantics are:
-
-* Each version of TensorFlow supports an interval of `GraphDef` versions.  This
-  interval will be constant across patch releases, and will only grow across
-  minor releases.  Dropping support for a `GraphDef` version will only occur
-  for a major release of TensorFlow.
-
-* Newly created graphs are assigned the latest `GraphDef` version number.
-
-* If a given version of TensorFlow supports the `GraphDef` version of a graph,
-  it will load and evaluate with the same behavior as the TensorFlow version
-  used to generate it (except for floating point numerical details and random
-  numbers), regardless of the major version of TensorFlow.  In particular, all
-  checkpoint files will be compatible.
-
-* If the `GraphDef` *upper* bound is increased to X in a (minor) release, there
-  will be at least six months before the *lower* bound is increased to X.  For
-  example (we're using hypothetical version numbers here):
-    * TensorFlow 1.2 might support `GraphDef` versions 4 to 7.
-    * TensorFlow 1.3 could add `GraphDef` version 8 and support versions 4 to 8.
-    * At least six months later, TensorFlow 2.0.0 could drop support for
-      versions 4 to 7, leaving version 8 only.
-
-Finally, when support for a `GraphDef` version is dropped, we will attempt to
-provide tools for automatically converting graphs to a newer supported
-`GraphDef` version.
-
-## Graph and checkpoint compatibility when extending TensorFlow
-
-This section is relevant only when making incompatible changes to the `GraphDef`
-format, such as when adding ops, removing ops, or changing the functionality
-of existing ops.  The previous section should suffice for most users.
-
-### Backward and partial forward compatibility
-
-Our versioning scheme has three requirements:
-
-*   **Backward compatibility** to support loading graphs and checkpoints
-    created with older versions of TensorFlow.
-*   **Forward compatibility** to support scenarios where the producer of a
-    graph or checkpoint is upgraded to a newer version of TensorFlow before
-    the consumer.
-*   Enable evolving TensorFlow in incompatible ways. For example, removing ops,
-    adding attributes, and removing attributes.
-
-Note that while the `GraphDef` version mechanism is separate from the TensorFlow
-version, backwards incompatible changes to the `GraphDef` format are still
-restricted by Semantic Versioning.  This means functionality can only be removed
-or changed between `MAJOR` versions of TensorFlow (such as `1.7` to `2.0`).
-Additionally, forward compatibility is enforced within Patch releases (`1.x.1`
-to `1.x.2` for example).
-
-To achieve backward and forward compatibility and to know when to enforce changes
-in formats, graphs and checkpoints have metadata that describes when they
-were produced. The sections below detail the TensorFlow implementation and
-guidelines for evolving `GraphDef` versions.
-
-### Independent data version schemes
-
-There are different data versions for graphs and checkpoints. The two data
-formats evolve at different rates from each other and also at different rates
-from TensorFlow. Both versioning systems are defined in
-[`core/public/version.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/public/version.h).
-Whenever a new version is added, a note is added to the header detailing what
-changed and the date.
-
-### Data, producers, and consumers
-
-We distinguish between the following kinds of data version information:
-* **producers**: binaries that produce data.  Producers have a version
-  (`producer`) and a minimum consumer version that they are compatible with
-  (`min_consumer`).
-* **consumers**: binaries that consume data.  Consumers have a version
-  (`consumer`) and a minimum producer version that they are compatible with
-  (`min_producer`).
-
-Each piece of versioned data has a [`VersionDef
-versions`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/versions.proto)
-field which records the `producer` that made the data, the `min_consumer`
-that it is compatible with, and a list of `bad_consumers` versions that are
-disallowed.
-
-By default, when a producer makes some data, the data inherits the producer's
-`producer` and `min_consumer` versions. `bad_consumers` can be set if specific
-consumer versions are known to contain bugs and must be avoided. A consumer can
-accept a piece of data if the following are all true:
-
-*   `consumer` >= data's `min_consumer`
-*   data's `producer` >= consumer's `min_producer`
-*   `consumer` not in data's `bad_consumers`
-
-Since both producers and consumers come from the same TensorFlow code base,
-[`core/public/version.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/public/version.h)
-contains a main data version which is treated as either `producer` or
-`consumer` depending on context and both `min_consumer` and `min_producer`
-(needed by producers and consumers, respectively). Specifically,
-
-*   For `GraphDef` versions, we have `TF_GRAPH_DEF_VERSION`,
-    `TF_GRAPH_DEF_VERSION_MIN_CONSUMER`, and
-    `TF_GRAPH_DEF_VERSION_MIN_PRODUCER`.
-*   For checkpoint versions, we have `TF_CHECKPOINT_VERSION`,
-    `TF_CHECKPOINT_VERSION_MIN_CONSUMER`, and
-    `TF_CHECKPOINT_VERSION_MIN_PRODUCER`.
-
-### Add a new attribute with default to an existing op
-
-Following the guidance below gives you forward compatibility only if the set of
-ops has not changed:
-
-1. If forward compatibility is desired,  set `strip_default_attrs` to `True`
-   while exporting the model using either the
-   @{tf.saved_model.builder.SavedModelBuilder.add_meta_graph_and_variables$`add_meta_graph_and_variables`}
-   and @{tf.saved_model.builder.SavedModelBuilder.add_meta_graph$`add_meta_graph`}
-   methods of the `SavedModelBuilder` class, or
-   @{tf.estimator.Estimator.export_savedmodel$`Estimator.export_savedmodel`}
-2. This strips off the default valued attributes at the time of
-   producing/exporting the models. This makes sure that the exported
-   @{tf.MetaGraphDef} does not contain the new op-attribute when the default
-   value is used.
-3. Having this control could allow out-of-date consumers (for example, serving
-   binaries that lag behind training binaries) to continue loading the models
-   and prevent interruptions in model serving.
-
-### Evolving GraphDef versions
-
-This section explains how to use this versioning mechanism to make different
-types of changes to the `GraphDef` format.
-
-#### Add an op
-
-Add the new op to both consumers and producers at the same time, and do not
-change any `GraphDef` versions. This type of change is automatically
-backward compatible, and does not impact forward compatibility plan since
-existing producer scripts will not suddenly use the new functionality.
-
-#### Add an op and switch existing Python wrappers to use it
-
-1.  Implement new consumer functionality and increment the `GraphDef` version.
-2.  If it is possible to make the wrappers use the new functionality only in
-    cases that did not work before, the wrappers can be updated now.
-3.  Change Python wrappers to use the new functionality. Do not increment
-    `min_consumer`, since models that do not use this op should not break.
-
-#### Remove or restrict an op's functionality
-
-1.  Fix all producer scripts (not TensorFlow itself) to not use the banned op or
-    functionality.
-2.  Increment the `GraphDef` version and implement new consumer functionality
-    that bans the removed op or functionality for GraphDefs at the new version
-    and above. If possible, make TensorFlow stop producing `GraphDefs` with the
-    banned functionality. To do so, add the
-    [`REGISTER_OP(...).Deprecated(deprecated_at_version,
-    message)`](https://github.com/tensorflow/tensorflow/blob/b289bc7a50fc0254970c60aaeba01c33de61a728/tensorflow/core/ops/array_ops.cc#L1009).
-3.  Wait for a major release for backward compatibility purposes.
-4.  Increase `min_producer` to the GraphDef version from (2) and remove the
-    functionality entirely.
-
-#### Change an op's functionality
-
-1.  Add a new similar op named `SomethingV2` or similar and go through the
-    process of adding it and switching existing Python wrappers to use it, which
-    may take three weeks if forward compatibility is desired.
-2.  Remove the old op (Can only take place with a major version change due to
-    backward compatibility).
-3.  Increase `min_consumer` to rule out consumers with the old op, add back the
-    old op as an alias for `SomethingV2`, and go through the process to switch
-    existing Python wrappers to use it.
-4.  Go through the process to remove `SomethingV2`.
-
-#### Ban a single unsafe consumer version
-
-1.  Bump the `GraphDef` version and add the bad version to `bad_consumers` for
-    all new GraphDefs. If possible, add to `bad_consumers` only for GraphDefs
-    which contain a certain op or similar.
-2.  If existing consumers have the bad version, push them out as soon as
-    possible.
diff --git a/tensorflow/docs_src/tutorials/deep_cnn.md b/tensorflow/docs_src/tutorials/deep_cnn.md
index 6a4c9a9b07..44a32d9d1d 100644
--- a/tensorflow/docs_src/tutorials/deep_cnn.md
+++ b/tensorflow/docs_src/tutorials/deep_cnn.md
@@ -268,7 +268,7 @@ in `cifar10_input.py`.
 
 `cifar10_train.py` periodically @{tf.train.Saver$saves}
 all model parameters in
-@{$programmers_guide/saved_model$checkpoint files}
+@{$guide/saved_model$checkpoint files}
 but it does *not* evaluate the model. The checkpoint file
 will be used by `cifar10_eval.py` to measure the predictive
 performance (see [Evaluating a Model](#evaluating-a-model) below).
diff --git a/tensorflow/docs_src/tutorials/layers.md b/tensorflow/docs_src/tutorials/layers.md
index 0f17899dae..212e337637 100644
--- a/tensorflow/docs_src/tutorials/layers.md
+++ b/tensorflow/docs_src/tutorials/layers.md
@@ -627,7 +627,7 @@ operation earlier when we generated the probabilities in `cnn_model_fn`.
 > argument, TensorFlow will assign a default name. A couple easy ways to
 > discover the names applied to operations are to visualize your graph on
 > @{$graph_viz$TensorBoard}) or to enable the
-> @{$programmers_guide/debugger$TensorFlow Debugger (tfdbg)}.
+> @{$guide/debugger$TensorFlow Debugger (tfdbg)}.
 
 Next, we create the `LoggingTensorHook`, passing `tensors_to_log` to the
 `tensors` argument. We set `every_n_iter=50`, which specifies that probabilities
diff --git a/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py b/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py
index 307eede5c0..7402247448 100644
--- a/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py
+++ b/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py
@@ -17,7 +17,7 @@
 This version is like fully_connected_feed.py but uses data converted
 to a TFRecords file containing tf.train.Example protocol buffers.
 See:
-https://www.tensorflow.org/programmers_guide/reading_data#reading_from_files
+https://www.tensorflow.org/guide/reading_data#reading_from_files
 for context.
 
 YOU MUST run convert_to_records before running this (but you only need to
diff --git a/tensorflow/java/README.md b/tensorflow/java/README.md
index 2f1ce253b2..c7382ff231 100644
--- a/tensorflow/java/README.md
+++ b/tensorflow/java/README.md
@@ -1,7 +1,7 @@
 # TensorFlow for Java
 
 > *WARNING*: The TensorFlow Java API is not currently covered by the TensorFlow
-> [API stability guarantees](https://www.tensorflow.org/programmers_guide/version_semantics).
+> [API stability guarantees](https://www.tensorflow.org/guide/version_semantics).
 >
 > For using TensorFlow on Android refer instead to
 > [contrib/android](https://www.tensorflow.org/code/tensorflow/contrib/android),
@@ -23,8 +23,7 @@ native libraries will need to be built from source.
 
 2.  Setup the environment to build TensorFlow from source code
     ([Linux](https://www.tensorflow.org/install/install_sources#PrepareLinux)
-    or [Mac OS
-    X](https://www.tensorflow.org/install/install_sources#PrepareMac)).
+    or [macOS](https://www.tensorflow.org/install/install_sources#PrepareMac)).
     If you'd like to skip reading those details and do not care about GPU
     support, try the following:
 
diff --git a/tensorflow/java/src/main/java/org/tensorflow/package-info.java b/tensorflow/java/src/main/java/org/tensorflow/package-info.java
index 521c5c610c..f353ee3145 100644
--- a/tensorflow/java/src/main/java/org/tensorflow/package-info.java
+++ b/tensorflow/java/src/main/java/org/tensorflow/package-info.java
@@ -17,7 +17,7 @@ limitations under the License.
  * Defines classes to build, save, load and execute TensorFlow models.
  *
  * <p><b>WARNING</b>: The API is currently experimental and is not covered by TensorFlow <a
- * href="https://www.tensorflow.org/programmers_guide/version_semantics">API stability
+ * href="https://www.tensorflow.org/guide/version_semantics">API stability
  * guarantees</a>. See <a
  * href="https://www.tensorflow.org/code/tensorflow/java/README.md">README.md</a> for installation
  * instructions.
diff --git a/tensorflow/python/data/__init__.py b/tensorflow/python/data/__init__.py
index 7efe0948e7..3b9bf2469e 100644
--- a/tensorflow/python/data/__init__.py
+++ b/tensorflow/python/data/__init__.py
@@ -14,7 +14,7 @@
 # ==============================================================================
 """`tf.data.Dataset` API for input pipelines.
 
-See the @{$datasets$Importing Data} Programmer's Guide for an overview.
+See @{$guide/datasets$Importing Data} for an overview.
 """
 
 from __future__ import absolute_import
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 6f9b12b123..0e020d86d0 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -212,6 +212,13 @@ class Dataset(object):
   def from_tensors(tensors):
     """Creates a `Dataset` with a single element, comprising the given tensors.
 
+    Note that if `tensors` contains a NumPy array, and eager execution is not
+    enabled, the values will be embedded in the graph as one or more
+    @{tf.constant} operations. For large datasets (> 1 GB), this can waste
+    memory and run into byte limits of graph serialization.  If tensors contains
+    one or more large NumPy arrays, consider the alternative described in
+    @{$guide/datasets#consuming_numpy_arrays$this guide}.
+
     Args:
       tensors: A nested structure of tensors.
 
@@ -224,6 +231,13 @@ class Dataset(object):
   def from_tensor_slices(tensors):
     """Creates a `Dataset` whose elements are slices of the given tensors.
 
+    Note that if `tensors` contains a NumPy array, and eager execution is not
+    enabled, the values will be embedded in the graph as one or more
+    @{tf.constant} operations. For large datasets (> 1 GB), this can waste
+    memory and run into byte limits of graph serialization.  If tensors contains
+    one or more large NumPy arrays, consider the alternative described in
+    @{$guide/datasets#consuming_numpy_arrays$this guide}.
+
     Args:
       tensors: A nested structure of tensors, each having the same size in the
         0th dimension.
diff --git a/tensorflow/python/debug/BUILD b/tensorflow/python/debug/BUILD
index 09062abd74..2d261f9be7 100644
--- a/tensorflow/python/debug/BUILD
+++ b/tensorflow/python/debug/BUILD
@@ -5,7 +5,7 @@
 #
 # ":debug_py": Public Python methods and classes of tfdbg.
 #   For API documentation, see https://www.tensorflow.org/api_docs/python/tfdbg
-#   For a user interface walkthrough, see https://www.tensorflow.org/programmers_guide/debugger
+#   For a user interface walkthrough, see https://www.tensorflow.org/guide/debugger
 # ":grpc_debug_server": Server interface for grpc:// debug URLs.
 
 package(
diff --git a/tensorflow/python/debug/README.md b/tensorflow/python/debug/README.md
index 269bbb19bd..9c16af4d79 100644
--- a/tensorflow/python/debug/README.md
+++ b/tensorflow/python/debug/README.md
@@ -28,7 +28,7 @@ models:
 
 * Easy access through session wrappers
 * Easy integration with common high-level APIs, such as
-  [TensorFlow Estimators](https://www.tensorflow.org/programmers_guide/estimators) and
+  [TensorFlow Estimators](https://www.tensorflow.org/guide/estimators) and
   [Keras](https://keras.io/)
 * Inspection of runtime tensor values and node connections
 * Conditional breaking after runs that generate tensors satisfying given
@@ -43,7 +43,7 @@ models:
 
 ## How to use TFDBG?
 
-* For a walkthrough of TFDBG command-line interface, see https://www.tensorflow.org/programmers_guide/debugger.
+* For a walkthrough of TFDBG command-line interface, see https://www.tensorflow.org/guide/debugger.
 * For information on the web GUI of TFDBG (TensorBoard Debugger Plugin), see
   [this README](https://github.com/tensorflow/tensorboard/blob/master/tensorboard/plugins/debugger/README.md).
 * For programmatic use of the API of TFDBG, see https://www.tensorflow.org/api_docs/python/tfdbg.
diff --git a/tensorflow/python/debug/examples/README.md b/tensorflow/python/debug/examples/README.md
index cb4d484092..3b431e04dc 100644
--- a/tensorflow/python/debug/examples/README.md
+++ b/tensorflow/python/debug/examples/README.md
@@ -3,7 +3,7 @@ Hi, there!
 The documentation of **TensorFlow Debugger (tfdbg)** has moved.
 
 See the source version at
-[this new location](../../../docs_src/programmers_guide/debugger.md).
+[this new location](../../../docs_src/guide/debugger.md).
 
 See the public website version at
-[https://www.tensorflow.org/programmers_guide/debugger](https://www.tensorflow.org/programmers_guide/debugger).
+[https://www.tensorflow.org/guide/debugger](https://www.tensorflow.org/guide/debugger).
diff --git a/tensorflow/python/estimator/keras.py b/tensorflow/python/estimator/keras.py
index 2f439f765e..6856b8b5a9 100644
--- a/tensorflow/python/estimator/keras.py
+++ b/tensorflow/python/estimator/keras.py
@@ -455,7 +455,7 @@ def model_to_estimator(keras_model=None,
   """Constructs an `Estimator` instance from given keras model.
 
   For usage example, please see
-  @{$programmers_guide/estimators$creating_estimators_from_keras_models}.
+  @{$guide/estimators$creating_estimators_from_keras_models}.
 
   Args:
     keras_model: A compiled Keras model object. This argument is mutually
diff --git a/tensorflow/python/ops/script_ops.py b/tensorflow/python/ops/script_ops.py
index 16c73213d5..f8df9b2c78 100644
--- a/tensorflow/python/ops/script_ops.py
+++ b/tensorflow/python/ops/script_ops.py
@@ -267,7 +267,7 @@ def eager_py_func(func, inp, Tout, name=None):
   or print statements as desired, and wrap those functions in
   `tf.contrib.eager.py_func`.
 
-  For more information on eager execution, see @{$programmers_guide/eager}.
+  For more information on eager execution, see @{$guide/eager}.
 
   `tf.contrib.eager.py_func` is similar in spirit to @{tf.py_func}, but unlike
   the latter, the former lets you use TensorFlow operations in the wrapped
diff --git a/tensorflow/python/tools/saved_model_cli.py b/tensorflow/python/tools/saved_model_cli.py
index 5b9d25d449..38fed5335e 100644
--- a/tensorflow/python/tools/saved_model_cli.py
+++ b/tensorflow/python/tools/saved_model_cli.py
@@ -15,7 +15,7 @@
 """Command-line interface to inspect and execute a graph in a SavedModel.
 
 For detailed usages and examples, please refer to:
-https://www.tensorflow.org/programmers_guide/saved_model_cli
+https://www.tensorflow.org/guide/saved_model_cli
 
 """
 
@@ -720,7 +720,7 @@ def create_parser():
              '\'input4_key=[{"id":[26],"weights":[0.5, 0.5]}]\' \\\n'
              '   --outdir=/out\n\n'
              'For more information about input file format, please see:\n'
-             'https://www.tensorflow.org/programmers_guide/saved_model_cli\n')
+             'https://www.tensorflow.org/guide/saved_model_cli\n')
   parser_run = subparsers.add_parser(
       'run', description=run_msg, formatter_class=argparse.RawTextHelpFormatter)
   parser_run.add_argument(
diff --git a/third_party/examples/eager/spinn/README.md b/third_party/examples/eager/spinn/README.md
index fbb1fde837..e2fd8009a0 100644
--- a/third_party/examples/eager/spinn/README.md
+++ b/third_party/examples/eager/spinn/README.md
@@ -22,7 +22,7 @@ Other eager execution examples can be found under [tensorflow/contrib/eager/pyth
 - [`data.py`](../../../../tensorflow/contrib/eager/python/examples/spinn/data.py): Pipeline for loading and preprocessing the
    [SNLI](https://nlp.stanford.edu/projects/snli/) data and
    [GloVe](https://nlp.stanford.edu/projects/glove/) word embedding, written
-   using the [`tf.data`](https://www.tensorflow.org/programmers_guide/datasets)
+   using the [`tf.data`](https://www.tensorflow.org/guide/datasets)
    API.
 - [`spinn.py`](./spinn.py): Model definition and training routines.
   This example illustrates how one might perform the following actions with
-- 
cgit v1.2.3