145 files changed, 11318 insertions, 179 deletions
diff --git a/README.md b/README.md
index dd407a3184..be29f134d2 100644
--- a/README.md
+++ b/README.md
@@ -31,8 +31,8 @@ and discussion.**
 
 People who are a little bit adventurous can also try our nightly binaries:
 
-* Linux CPU only: [Python 2](http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.7.1-cp27-none-linux_x86_64.whl) ([build history](http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/)) / [Python 3](http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.7.1-py3-none-any.whl) ([build history](http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/))
-* Linux GPU: [Python 2](http://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.7.1-py2-none-any.whl) ([build history](http://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-slave/)) / [Python 3](http://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.7.1-py3-none-any.whl) ([build history](http://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-slave/))
+* Linux CPU only: [Python 2](http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.7.1-cp27-none-linux_x86_64.whl) ([build history](http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/)) / [Python 3](http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.7.1-cp34-cp34m-linux_x86_64.whl) ([build history](http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/))
+* Linux GPU: [Python 2](http://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.7.1-cp27-none-linux_x86_64.whl) ([build history](http://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-slave/)) / [Python 3](http://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.7.1-cp34-cp34m-linux_x86_64.whl) ([build history](http://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-slave/))
 * Mac CPU only: [Python 2](http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.7.1-py2-none-any.whl) ([build history](http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac-slave/)) / [Python 3](http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.7.1-py3-none-any.whl) ([build history](http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac-slave/))
 * [Android](http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-android/TF_BUILD_CONTAINER_TYPE=ANDROID,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=NO_PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=android-slave/lastSuccessfulBuild/artifact/bazel-out/local_linux/bin/tensorflow/examples/android/tensorflow_demo.apk) ([build history](http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-android/TF_BUILD_CONTAINER_TYPE=ANDROID,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=NO_PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=android-slave/))
 
diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD
index 80ad8bc29a..708cfddefc 100644
--- a/tensorflow/contrib/BUILD
+++ b/tensorflow/contrib/BUILD
@@ -18,6 +18,7 @@ py_library(
         "//tensorflow/contrib/layers:layers_py",
         "//tensorflow/contrib/linear_optimizer:sdca_ops_py",
         "//tensorflow/contrib/lookup:lookup_py",
+        "//tensorflow/contrib/skflow",
         "//tensorflow/contrib/testing:testing_py",
         "//tensorflow/contrib/util:util_py",
     ],
diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md
index 18d535faea..a2fbc11383 100644
--- a/tensorflow/contrib/cmake/README.md
+++ b/tensorflow/contrib/cmake/README.md
@@ -1,12 +1,38 @@
 This directory contains *CMake* files that can be used to build TensorFlow
 core library.
 
+
+Current Status
+--------------
+
+CMake build is not yet ready for general usage!
+
+We are actively working on CMake support. Please help us improve it.
+Pull requests are welcomed!
+
+
+Linux CMake + Docker (very simple)
+----------------------------------
+
+```bash
+git clone --recursive https://github.com/tensorflow/tensorflow.git
+cd tensorflow
+tensorflow/tools/ci_build/ci_build.sh CPU tensorflow/tools/ci_build/builds/cmake.sh
+```
+
+That's it. Dependencies included. Otherwise read the rest of this readme...
+
+
+Prerequisites
+=============
+
 You need to have [CMake](http://www.cmake.org) and [Git](http://git-scm.com)
 installed on your computer before proceeding.
 
 Most of the instructions will be given to the *Сommand Prompt*, but the same
 actions can be performed using appropriate GUI tools.
 
+
 Environment Setup
 =================
 
diff --git a/tensorflow/contrib/lookup/lookup_ops_test.py b/tensorflow/contrib/lookup/lookup_ops_test.py
index 645cd7a82b..4715039313 100644
--- a/tensorflow/contrib/lookup/lookup_ops_test.py
+++ b/tensorflow/contrib/lookup/lookup_ops_test.py
@@ -284,7 +284,7 @@ class IndexToStringTest(tf.test.TestCase):
       self.assertRaises(tf.OpError, feats.eval)
       tf.initialize_all_tables().run()
 
-      self.assertAllEqual(("brain", "salad", "surgery", "UNK"), feats.eval())
+      self.assertAllEqual((b"brain", b"salad", b"surgery", b"UNK"), feats.eval())
 
   def test_duplicate_entries(self):
     with self.test_session():
@@ -293,12 +293,12 @@ class IndexToStringTest(tf.test.TestCase):
       feats = tf.contrib.lookup.index_to_string(indices,
                                                 mapping=mapping_strings)
       tf.initialize_all_tables().run()
-      self.assertAllEqual(("hello", "hello", "UNK"), feats.eval())
+      self.assertAllEqual((b"hello", b"hello", b"UNK"), feats.eval())
 
       self.assertRaises(tf.OpError, tf.initialize_all_tables().run)
 
   def test_index_to_string_with_default_value(self):
-    default_value = "NONE"
+    default_value = b"NONE"
     with self.test_session():
       mapping_strings = tf.constant(["brain", "salad", "surgery"])
       indices = tf.constant([1, 2, 4], tf.int64)
@@ -308,7 +308,7 @@ class IndexToStringTest(tf.test.TestCase):
       self.assertRaises(tf.OpError, feats.eval)
 
       tf.initialize_all_tables().run()
-      self.assertAllEqual(("salad", "surgery", default_value), feats.eval())
+      self.assertAllEqual((b"salad", b"surgery", default_value), feats.eval())
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/contrib/skflow/BUILD b/tensorflow/contrib/skflow/BUILD
new file mode 100644
index 0000000000..5fed607395
--- /dev/null
+++ b/tensorflow/contrib/skflow/BUILD
@@ -0,0 +1,205 @@
+# Description:
+#   contains Scikit Flow sub-project with high level tensorflow API.
+
+licenses(["notice"])  # Apache 2.0
+
+exports_files(["LICENSE"])
+
+package(default_visibility = ["//tensorflow:__subpackages__"])
+
+py_library(
+    name = "skflow",
+    srcs = glob([
+        "python/skflow/**/*.py",
+    ]),
+    srcs_version = "PY2AND3",
+    deps = ["//tensorflow/python:framework"],
+)
+
+py_test(
+    name = "test_base",
+    srcs = ["python/skflow/tests/test_base.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":skflow",
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/python:framework_test_lib",
+    ],
+)
+
+py_test(
+    name = "test_custom_decay",
+    srcs = ["python/skflow/tests/test_custom_decay.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":skflow",
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/python:framework_test_lib",
+    ],
+)
+
+py_test(
+    name = "test_data_feeder",
+    srcs = ["python/skflow/tests/test_data_feeder.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":skflow",
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/python:framework_test_lib",
+    ],
+)
+
+py_test(
+    name = "test_estimators",
+    srcs = ["python/skflow/tests/test_estimators.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":skflow",
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/python:framework_test_lib",
+    ],
+)
+
+py_test(
+    name = "test_grid_search",
+    srcs = ["python/skflow/tests/test_grid_search.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":skflow",
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/python:framework_test_lib",
+    ],
+)
+
+py_test(
+    name = "test_io",
+    srcs = ["python/skflow/tests/test_io.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":skflow",
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/python:framework_test_lib",
+    ],
+)
+
+py_test(
+    name = "test_multioutput",
+    srcs = ["python/skflow/tests/test_multioutput.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":skflow",
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/python:framework_test_lib",
+    ],
+)
+
+py_test(
+    name = "test_nonlinear",
+    srcs = ["python/skflow/tests/test_nonlinear.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":skflow",
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/python:framework_test_lib",
+    ],
+)
+
+py_test(
+    name = "test_regression",
+    srcs = ["python/skflow/tests/test_regression.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":skflow",
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/python:framework_test_lib",
+    ],
+)
+
+py_test(
+    name = "test_saver",
+    srcs = ["python/skflow/tests/test_saver.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":skflow",
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/python:framework_test_lib",
+    ],
+)
+
+py_test(
+    name = "test_ops",
+    srcs = ["python/skflow/ops/tests/test_ops.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":skflow",
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/python:framework_test_lib",
+    ],
+)
+
+py_test(
+    name = "test_dropout_ops",
+    srcs = ["python/skflow/ops/tests/test_dropout_ops.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":skflow",
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/python:framework_test_lib",
+    ],
+)
+
+py_test(
+    name = "test_seq2seq_ops",
+    srcs = ["python/skflow/ops/tests/test_seq2seq_ops.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":skflow",
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/python:framework_test_lib",
+    ],
+)
+
+py_test(
+    name = "test_categorical",
+    srcs = ["python/skflow/preprocessing/tests/test_categorical.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":skflow",
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/python:framework_test_lib",
+    ],
+)
+
+py_test(
+    name = "test_categorical_vocabulary",
+    srcs = ["python/skflow/preprocessing/tests/test_categorical_vocabulary.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":skflow",
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/python:framework_test_lib",
+    ],
+)
+
+py_test(
+    name = "test_text",
+    srcs = ["python/skflow/preprocessing/tests/test_text.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":skflow",
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/python:framework_test_lib",
+    ],
+)
+
+filegroup(
+    name = "all_files",
+    srcs = glob(
+        ["**/*"],
+        exclude = [
+            "**/METADATA",
+            "**/OWNERS",
+        ],
+    ),
+    visibility = ["//tensorflow:__subpackages__"],
+)
diff --git a/tensorflow/contrib/skflow/__init__.py b/tensorflow/contrib/skflow/__init__.py
new file mode 100644
index 0000000000..91aef73431
--- /dev/null
+++ b/tensorflow/contrib/skflow/__init__.py
@@ -0,0 +1,20 @@
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from __future__ import division, print_function, absolute_import
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from python import *
diff --git a/tensorflow/contrib/skflow/g3doc/api_docs/python/estimators.md b/tensorflow/contrib/skflow/g3doc/api_docs/python/estimators.md
new file mode 100644
index 0000000000..a5c962d0e2
--- /dev/null
+++ b/tensorflow/contrib/skflow/g3doc/api_docs/python/estimators.md
@@ -0,0 +1,2481 @@
+---
+---
+<!-- This file is machine generated: DO NOT EDIT! -->
+
+# Estimators
+[TOC]
+
+Scikit Flow Estimators.
+
+## Other Functions and Classes
+- - -
+
+### `class skflow.TensorFlowClassifier` {#TensorFlowClassifier}
+
+TensorFlow Linear Classifier model.
+- - -
+
+#### `skflow.TensorFlowClassifier.__init__(n_classes, tf_master='', batch_size=32, steps=200, optimizer='SGD', learning_rate=0.1, class_weight=None, tf_random_seed=42, continue_training=False, config_addon=None, verbose=1, max_to_keep=5, keep_checkpoint_every_n_hours=10000)` {#TensorFlowClassifier.__init__}
+
+
+
+
+- - -
+
+#### `skflow.TensorFlowClassifier.bias_` {#TensorFlowClassifier.bias_}
+
+Returns weights of the linear classifier.
+
+
+- - -
+
+#### `skflow.TensorFlowClassifier.fit(X, y, monitor=None, logdir=None)` {#TensorFlowClassifier.fit}
+
+Builds a neural network model given provided `model_fn` and training
+data X and y.
+
+Note: called first time constructs the graph and initializers
+variables. Consecutives times it will continue training the same model.
+This logic follows partial_fit() interface in scikit-learn.
+
+To restart learning, create new estimator.
+
+##### Args:
+
+
+*  <b>`X`</b>: matrix or tensor of shape [n_samples, n_features...]. Can be
+    iterator that returns arrays of features. The training input
+    samples for fitting the model.
+*  <b>`y`</b>: vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
+    iterator that returns array of targets. The training target values
+    (class labels in classification, real numbers in regression).
+*  <b>`monitor`</b>: Monitor object to print training progress and invoke early stopping
+*  <b>`logdir`</b>: the directory to save the log file that can be used for
+    optional visualization.
+
+##### Returns:
+
+    Returns self.
+
+
+- - -
+
+#### `skflow.TensorFlowClassifier.get_params(deep=True)` {#TensorFlowClassifier.get_params}
+
+Get parameters for this estimator.
+
+Parameters
+----------
+deep: boolean, optional
+    If True, will return the parameters for this estimator and
+    contained subobjects that are estimators.
+
+Returns
+-------
+params : mapping of string to any
+    Parameter names mapped to their values.
+
+
+- - -
+
+#### `skflow.TensorFlowClassifier.get_tensor(name)` {#TensorFlowClassifier.get_tensor}
+
+Returns tensor by name.
+
+##### Args:
+
+
+*  <b>`name`</b>: string, name of the tensor.
+
+##### Returns:
+
+    Tensor.
+
+
+- - -
+
+#### `skflow.TensorFlowClassifier.get_tensor_value(name)` {#TensorFlowClassifier.get_tensor_value}
+
+Returns value of the tensor give by name.
+
+##### Args:
+
+
+*  <b>`name`</b>: string, name of the tensor.
+
+##### Returns:
+
+    Numpy array - value of the tensor.
+
+
+- - -
+
+#### `skflow.TensorFlowClassifier.partial_fit(X, y)` {#TensorFlowClassifier.partial_fit}
+
+Incremental fit on a batch of samples.
+
+This method is expected to be called several times consecutively
+on different or the same chunks of the dataset. This either can
+implement iterative training or out-of-core/online training.
+
+This is especially useful when the whole dataset is too big to
+fit in memory at the same time. Or when model is taking long time
+to converge, and you want to split up training into subparts.
+
+##### Args:
+
+
+*  <b>`X`</b>: matrix or tensor of shape [n_samples, n_features...]. Can be
+    iterator that returns arrays of features. The training input
+    samples for fitting the model.
+*  <b>`y`</b>: vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
+    iterator that returns array of targets. The training target values
+    (class label in classification, real numbers in regression).
+
+##### Returns:
+
+    Returns self.
+
+
+- - -
+
+#### `skflow.TensorFlowClassifier.predict(X, axis=1, batch_size=-1)` {#TensorFlowClassifier.predict}
+
+Predict class or regression for X.
+
+For a classification model, the predicted class for each sample in X is
+returned. For a regression model, the predicted value based on X is
+returned.
+
+##### Args:
+
+
+*  <b>`X`</b>: array-like matrix, [n_samples, n_features...] or iterator.
+*  <b>`axis`</b>: Which axis to argmax for classification.
+          By default axis 1 (next after batch) is used.
+          Use 2 for sequence predictions.
+*  <b>`batch_size`</b>: If test set is too big, use batch size to split
+                it into mini batches. By default full dataset is used.
+
+##### Returns:
+
+
+*  <b>`y`</b>: array of shape [n_samples]. The predicted classes or predicted
+    value.
+
+
+- - -
+
+#### `skflow.TensorFlowClassifier.predict_proba(X, batch_size=-1)` {#TensorFlowClassifier.predict_proba}
+
+Predict class probability of the input samples X.
+
+##### Args:
+
+
+*  <b>`X`</b>: array-like matrix, [n_samples, n_features...] or iterator.
+*  <b>`batch_size`</b>: If test set is too big, use batch size to split
+                it into mini batches. By default full dataset is used.
+
+##### Returns:
+
+
+*  <b>`y`</b>: array of shape [n_samples, n_classes]. The predicted
+    probabilities for each class.
+
+
+- - -
+
+#### `skflow.TensorFlowClassifier.restore(cls, path, config_addon=None)` {#TensorFlowClassifier.restore}
+
+Restores model from give path.
+
+##### Args:
+
+
+*  <b>`path`</b>: Path to the checkpoints and other model information.
+*  <b>`config_addon`</b>: ConfigAddon object that controls the configurations of the session,
+        e.g. num_cores, gpu_memory_fraction, etc. This is allowed to be reconfigured.
+
+##### Returns:
+
+    Estiamator, object of the subclass of TensorFlowEstimator.
+
+
+- - -
+
+#### `skflow.TensorFlowClassifier.save(path)` {#TensorFlowClassifier.save}
+
+Saves checkpoints and graph to given path.
+
+##### Args:
+
+
+*  <b>`path`</b>: Folder to save model to.
+
+
+- - -
+
+#### `skflow.TensorFlowClassifier.score(X, y, sample_weight=None)` {#TensorFlowClassifier.score}
+
+Returns the mean accuracy on the given test data and labels.
+
+In multi-label classification, this is the subset accuracy
+which is a harsh metric since you require for each sample that
+each label set be correctly predicted.
+
+Parameters
+----------
+X : array-like, shape = (n_samples, n_features)
+    Test samples.
+
+y : array-like, shape = (n_samples) or (n_samples, n_outputs)
+    True labels for X.
+
+sample_weight : array-like, shape = [n_samples], optional
+    Sample weights.
+
+Returns
+-------
+score : float
+    Mean accuracy of self.predict(X) wrt. y.
+
+
+- - -
+
+#### `skflow.TensorFlowClassifier.set_params(**params)` {#TensorFlowClassifier.set_params}
+
+Set the parameters of this estimator.
+
+The method works on simple estimators as well as on nested objects
+(such as pipelines). The former have parameters of the form
+``<component>__<parameter>`` so that it's possible to update each
+component of a nested object.
+
+Returns
+-------
+self
+
+
+- - -
+
+#### `skflow.TensorFlowClassifier.weights_` {#TensorFlowClassifier.weights_}
+
+Returns weights of the linear classifier.
+
+
+
+- - -
+
+### `class skflow.TensorFlowDNNClassifier` {#TensorFlowDNNClassifier}
+
+TensorFlow DNN Classifier model.
+
+Parameters:
+    hidden_units: List of hidden units per layer.
+    n_classes: Number of classes in the target.
+    tf_master: TensorFlow master. Empty string is default for local.
+    batch_size: Mini batch size.
+    steps: Number of steps to run over data.
+    optimizer: Optimizer name (or class), for example "SGD", "Adam",
+               "Adagrad".
+    learning_rate: If this is constant float value, no decay function is used.
+        Instead, a customized decay function can be passed that accepts
+        global_step as parameter and returns a Tensor.
+        e.g. exponential decay function:
+        def exp_decay(global_step):
+            return tf.train.exponential_decay(
+                learning_rate=0.1, global_step,
+                decay_steps=2, decay_rate=0.001)
+    class_weight: None or list of n_classes floats. Weight associated with
+                 classes for loss computation. If not given, all classes are suppose to have
+                 weight one.
+    tf_random_seed: Random seed for TensorFlow initializers.
+        Setting this value, allows consistency between reruns.
+    continue_training: when continue_training is True, once initialized
+        model will be continuely trained on every call of fit.
+    config_addon: ConfigAddon object that controls the configurations of the session,
+        e.g. num_cores, gpu_memory_fraction, etc.
+    max_to_keep: The maximum number of recent checkpoint files to keep.
+        As new files are created, older files are deleted.
+        If None or 0, all checkpoint files are kept.
+        Defaults to 5 (that is, the 5 most recent checkpoint files are kept.)
+    keep_checkpoint_every_n_hours: Number of hours between each checkpoint
+        to be saved. The default value of 10,000 hours effectively disables the feature.
+- - -
+
+#### `skflow.TensorFlowDNNClassifier.__init__(hidden_units, n_classes, tf_master='', batch_size=32, steps=200, optimizer='SGD', learning_rate=0.1, class_weight=None, tf_random_seed=42, continue_training=False, config_addon=None, verbose=1, max_to_keep=5, keep_checkpoint_every_n_hours=10000)` {#TensorFlowDNNClassifier.__init__}
+
+
+
+
+- - -
+
+#### `skflow.TensorFlowDNNClassifier.bias_` {#TensorFlowDNNClassifier.bias_}
+
+Returns bias of the DNN's bias layers.
+
+
+- - -
+
+#### `skflow.TensorFlowDNNClassifier.fit(X, y, monitor=None, logdir=None)` {#TensorFlowDNNClassifier.fit}
+
+Builds a neural network model given provided `model_fn` and training
+data X and y.
+
+Note: called first time constructs the graph and initializers
+variables. Consecutives times it will continue training the same model.
+This logic follows partial_fit() interface in scikit-learn.
+
+To restart learning, create new estimator.
+
+##### Args:
+
+
+*  <b>`X`</b>: matrix or tensor of shape [n_samples, n_features...]. Can be
+    iterator that returns arrays of features. The training input
+    samples for fitting the model.
+*  <b>`y`</b>: vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
+    iterator that returns array of targets. The training target values
+    (class labels in classification, real numbers in regression).
+*  <b>`monitor`</b>: Monitor object to print training progress and invoke early stopping
+*  <b>`logdir`</b>: the directory to save the log file that can be used for
+    optional visualization.
+
+##### Returns:
+
+    Returns self.
+
+
+- - -
+
+#### `skflow.TensorFlowDNNClassifier.get_params(deep=True)` {#TensorFlowDNNClassifier.get_params}
+
+Get parameters for this estimator.
+
+Parameters
+----------
+deep: boolean, optional
+    If True, will return the parameters for this estimator and
+    contained subobjects that are estimators.
+
+Returns
+-------
+params : mapping of string to any
+    Parameter names mapped to their values.
+
+
+- - -
+
+#### `skflow.TensorFlowDNNClassifier.get_tensor(name)` {#TensorFlowDNNClassifier.get_tensor}
+
+Returns tensor by name.
+
+##### Args:
+
+
+*  <b>`name`</b>: string, name of the tensor.
+
+##### Returns:
+
+    Tensor.
+
+
+- - -
+
+#### `skflow.TensorFlowDNNClassifier.get_tensor_value(name)` {#TensorFlowDNNClassifier.get_tensor_value}
+
+Returns value of the tensor give by name.
+
+##### Args:
+
+
+*  <b>`name`</b>: string, name of the tensor.
+
+##### Returns:
+
+    Numpy array - value of the tensor.
+
+
+- - -
+
+#### `skflow.TensorFlowDNNClassifier.partial_fit(X, y)` {#TensorFlowDNNClassifier.partial_fit}
+
+Incremental fit on a batch of samples.
+
+This method is expected to be called several times consecutively
+on different or the same chunks of the dataset. This either can
+implement iterative training or out-of-core/online training.
+
+This is especially useful when the whole dataset is too big to
+fit in memory at the same time. Or when model is taking long time
+to converge, and you want to split up training into subparts.
+
+##### Args:
+
+
+*  <b>`X`</b>: matrix or tensor of shape [n_samples, n_features...]. Can be
+    iterator that returns arrays of features. The training input
+    samples for fitting the model.
+*  <b>`y`</b>: vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
+    iterator that returns array of targets. The training target values
+    (class label in classification, real numbers in regression).
+
+##### Returns:
+
+    Returns self.
+
+
+- - -
+
+#### `skflow.TensorFlowDNNClassifier.predict(X, axis=1, batch_size=-1)` {#TensorFlowDNNClassifier.predict}
+
+Predict class or regression for X.
+
+For a classification model, the predicted class for each sample in X is
+returned. For a regression model, the predicted value based on X is
+returned.
+
+##### Args:
+
+
+*  <b>`X`</b>: array-like matrix, [n_samples, n_features...] or iterator.
+*  <b>`axis`</b>: Which axis to argmax for classification.
+          By default axis 1 (next after batch) is used.
+          Use 2 for sequence predictions.
+*  <b>`batch_size`</b>: If test set is too big, use batch size to split
+                it into mini batches. By default full dataset is used.
+
+##### Returns:
+
+
+*  <b>`y`</b>: array of shape [n_samples]. The predicted classes or predicted
+    value.
+
+
+- - -
+
+#### `skflow.TensorFlowDNNClassifier.predict_proba(X, batch_size=-1)` {#TensorFlowDNNClassifier.predict_proba}
+
+Predict class probability of the input samples X.
+
+##### Args:
+
+
+*  <b>`X`</b>: array-like matrix, [n_samples, n_features...] or iterator.
+*  <b>`batch_size`</b>: If test set is too big, use batch size to split
+                it into mini batches. By default full dataset is used.
+
+##### Returns:
+
+
+*  <b>`y`</b>: array of shape [n_samples, n_classes]. The predicted
+    probabilities for each class.
+
+
+- - -
+
+#### `skflow.TensorFlowDNNClassifier.restore(cls, path, config_addon=None)` {#TensorFlowDNNClassifier.restore}
+
+Restores model from give path.
+
+##### Args:
+
+
+*  <b>`path`</b>: Path to the checkpoints and other model information.
+*  <b>`config_addon`</b>: ConfigAddon object that controls the configurations of the session,
+        e.g. num_cores, gpu_memory_fraction, etc. This is allowed to be reconfigured.
+
+##### Returns:
+
+    Estiamator, object of the subclass of TensorFlowEstimator.
+
+
+- - -
+
+#### `skflow.TensorFlowDNNClassifier.save(path)` {#TensorFlowDNNClassifier.save}
+
+Saves checkpoints and graph to given path.
+
+##### Args:
+
+
+*  <b>`path`</b>: Folder to save model to.
+
+
+- - -
+
+#### `skflow.TensorFlowDNNClassifier.score(X, y, sample_weight=None)` {#TensorFlowDNNClassifier.score}
+
+Returns the mean accuracy on the given test data and labels.
+
+In multi-label classification, this is the subset accuracy
+which is a harsh metric since you require for each sample that
+each label set be correctly predicted.
+
+Parameters
+----------
+X : array-like, shape = (n_samples, n_features)
+    Test samples.
+
+y : array-like, shape = (n_samples) or (n_samples, n_outputs)
+    True labels for X.
+
+sample_weight : array-like, shape = [n_samples], optional
+    Sample weights.
+
+Returns
+-------
+score : float
+    Mean accuracy of self.predict(X) wrt. y.
+
+
+- - -
+
+#### `skflow.TensorFlowDNNClassifier.set_params(**params)` {#TensorFlowDNNClassifier.set_params}
+
+Set the parameters of this estimator.
+
+The method works on simple estimators as well as on nested objects
+(such as pipelines). The former have parameters of the form
+``<component>__<parameter>`` so that it's possible to update each
+component of a nested object.
+
+Returns
+-------
+self
+
+
+- - -
+
+#### `skflow.TensorFlowDNNClassifier.weights_` {#TensorFlowDNNClassifier.weights_}
+
+Returns weights of the DNN weight layers.
+
+
+
+- - -
+
+### `class skflow.TensorFlowDNNRegressor` {#TensorFlowDNNRegressor}
+
+TensorFlow DNN Regressor model.
+
+Parameters:
+    hidden_units: List of hidden units per layer.
+    tf_master: TensorFlow master. Empty string is default for local.
+    batch_size: Mini batch size.
+    steps: Number of steps to run over data.
+    optimizer: Optimizer name (or class), for example "SGD", "Adam",
+               "Adagrad".
+    learning_rate: If this is constant float value, no decay function is used.
+        Instead, a customized decay function can be passed that accepts
+        global_step as parameter and returns a Tensor.
+        e.g. exponential decay function:
+        def exp_decay(global_step):
+            return tf.train.exponential_decay(
+                learning_rate=0.1, global_step,
+                decay_steps=2, decay_rate=0.001)
+    tf_random_seed: Random seed for TensorFlow initializers.
+        Setting this value, allows consistency between reruns.
+    continue_training: when continue_training is True, once initialized
+        model will be continuely trained on every call of fit.
+    config_addon: ConfigAddon object that controls the configurations of the session,
+        e.g. num_cores, gpu_memory_fraction, etc.
+    verbose: Controls the verbosity, possible values:
+             0: the algorithm and debug information is muted.
+             1: trainer prints the progress.
+             2: log device placement is printed.
+    max_to_keep: The maximum number of recent checkpoint files to keep.
+        As new files are created, older files are deleted.
+        If None or 0, all checkpoint files are kept.
+        Defaults to 5 (that is, the 5 most recent checkpoint files are kept.)
+    keep_checkpoint_every_n_hours: Number of hours between each checkpoint
+        to be saved. The default value of 10,000 hours effectively disables the feature.
+- - -
+
+#### `skflow.TensorFlowDNNRegressor.__init__(hidden_units, n_classes=0, tf_master='', batch_size=32, steps=200, optimizer='SGD', learning_rate=0.1, tf_random_seed=42, continue_training=False, config_addon=None, verbose=1, max_to_keep=5, keep_checkpoint_every_n_hours=10000)` {#TensorFlowDNNRegressor.__init__}
+
+
+
+
+- - -
+
+#### `skflow.TensorFlowDNNRegressor.bias_` {#TensorFlowDNNRegressor.bias_}
+
+Returns bias of the DNN's bias layers.
+
+
+- - -
+
+#### `skflow.TensorFlowDNNRegressor.fit(X, y, monitor=None, logdir=None)` {#TensorFlowDNNRegressor.fit}
+
+Builds a neural network model given provided `model_fn` and training
+data X and y.
+
+Note: called first time constructs the graph and initializers
+variables. Consecutives times it will continue training the same model.
+This logic follows partial_fit() interface in scikit-learn.
+
+To restart learning, create new estimator.
+
+##### Args:
+
+
+*  <b>`X`</b>: matrix or tensor of shape [n_samples, n_features...]. Can be
+    iterator that returns arrays of features. The training input
+    samples for fitting the model.
+*  <b>`y`</b>: vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
+    iterator that returns array of targets. The training target values
+    (class labels in classification, real numbers in regression).
+*  <b>`monitor`</b>: Monitor object to print training progress and invoke early stopping
+*  <b>`logdir`</b>: the directory to save the log file that can be used for
+    optional visualization.
+
+##### Returns:
+
+    Returns self.
+
+
+- - -
+
+#### `skflow.TensorFlowDNNRegressor.get_params(deep=True)` {#TensorFlowDNNRegressor.get_params}
+
+Get parameters for this estimator.
+
+Parameters
+----------
+deep: boolean, optional
+    If True, will return the parameters for this estimator and
+    contained subobjects that are estimators.
+
+Returns
+-------
+params : mapping of string to any
+    Parameter names mapped to their values.
+
+
+- - -
+
+#### `skflow.TensorFlowDNNRegressor.get_tensor(name)` {#TensorFlowDNNRegressor.get_tensor}
+
+Returns tensor by name.
+
+##### Args:
+
+
+*  <b>`name`</b>: string, name of the tensor.
+
+##### Returns:
+
+    Tensor.
+
+
+- - -
+
+#### `skflow.TensorFlowDNNRegressor.get_tensor_value(name)` {#TensorFlowDNNRegressor.get_tensor_value}
+
+Returns value of the tensor give by name.
+
+##### Args:
+
+
+*  <b>`name`</b>: string, name of the tensor.
+
+##### Returns:
+
+    Numpy array - value of the tensor.
+
+
+- - -
+
+#### `skflow.TensorFlowDNNRegressor.partial_fit(X, y)` {#TensorFlowDNNRegressor.partial_fit}
+
+Incremental fit on a batch of samples.
+
+This method is expected to be called several times consecutively
+on different or the same chunks of the dataset. This either can
+implement iterative training or out-of-core/online training.
+
+This is especially useful when the whole dataset is too big to
+fit in memory at the same time. Or when model is taking long time
+to converge, and you want to split up training into subparts.
+
+##### Args:
+
+
+*  <b>`X`</b>: matrix or tensor of shape [n_samples, n_features...]. Can be
+    iterator that returns arrays of features. The training input
+    samples for fitting the model.
+*  <b>`y`</b>: vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
+    iterator that returns array of targets. The training target values
+    (class label in classification, real numbers in regression).
+
+##### Returns:
+
+    Returns self.
+
+
+- - -
+
+#### `skflow.TensorFlowDNNRegressor.predict(X, axis=1, batch_size=-1)` {#TensorFlowDNNRegressor.predict}
+
+Predict class or regression for X.
+
+For a classification model, the predicted class for each sample in X is
+returned. For a regression model, the predicted value based on X is
+returned.
+
+##### Args:
+
+
+*  <b>`X`</b>: array-like matrix, [n_samples, n_features...] or iterator.
+*  <b>`axis`</b>: Which axis to argmax for classification.
+          By default axis 1 (next after batch) is used.
+          Use 2 for sequence predictions.
+*  <b>`batch_size`</b>: If test set is too big, use batch size to split
+                it into mini batches. By default full dataset is used.
+
+##### Returns:
+
+
+*  <b>`y`</b>: array of shape [n_samples]. The predicted classes or predicted
+    value.
+
+
+- - -
+
+#### `skflow.TensorFlowDNNRegressor.predict_proba(X, batch_size=-1)` {#TensorFlowDNNRegressor.predict_proba}
+
+Predict class probability of the input samples X.
+
+##### Args:
+
+
+*  <b>`X`</b>: array-like matrix, [n_samples, n_features...] or iterator.
+*  <b>`batch_size`</b>: If test set is too big, use batch size to split
+                it into mini batches. By default full dataset is used.
+
+##### Returns:
+
+
+*  <b>`y`</b>: array of shape [n_samples, n_classes]. The predicted
+    probabilities for each class.
+
+
+- - -
+
+#### `skflow.TensorFlowDNNRegressor.restore(cls, path, config_addon=None)` {#TensorFlowDNNRegressor.restore}
+
+Restores model from give path.
+
+##### Args:
+
+
+*  <b>`path`</b>: Path to the checkpoints and other model information.
+*  <b>`config_addon`</b>: ConfigAddon object that controls the configurations of the session,
+        e.g. num_cores, gpu_memory_fraction, etc. This is allowed to be reconfigured.
+
+##### Returns:
+
+    Estiamator, object of the subclass of TensorFlowEstimator.
+
+
+- - -
+
+#### `skflow.TensorFlowDNNRegressor.save(path)` {#TensorFlowDNNRegressor.save}
+
+Saves checkpoints and graph to given path.
+
+##### Args:
+
+
+*  <b>`path`</b>: Folder to save model to.
+
+
+- - -
+
+#### `skflow.TensorFlowDNNRegressor.score(X, y, sample_weight=None)` {#TensorFlowDNNRegressor.score}
+
+Returns the coefficient of determination R^2 of the prediction.
+
+The coefficient R^2 is defined as (1 - u/v), where u is the regression
+sum of squares ((y_true - y_pred) ** 2).sum() and v is the residual
+sum of squares ((y_true - y_true.mean()) ** 2).sum().
+Best possible score is 1.0 and it can be negative (because the
+model can be arbitrarily worse). A constant model that always
+predicts the expected value of y, disregarding the input features,
+would get a R^2 score of 0.0.
+
+Parameters
+----------
+X : array-like, shape = (n_samples, n_features)
+    Test samples.
+
+y : array-like, shape = (n_samples) or (n_samples, n_outputs)
+    True values for X.
+
+sample_weight : array-like, shape = [n_samples], optional
+    Sample weights.
+
+Returns
+-------
+score : float
+    R^2 of self.predict(X) wrt. y.
+
+
+- - -
+
+#### `skflow.TensorFlowDNNRegressor.set_params(**params)` {#TensorFlowDNNRegressor.set_params}
+
+Set the parameters of this estimator.
+
+The method works on simple estimators as well as on nested objects
+(such as pipelines). The former have parameters of the form
+``<component>__<parameter>`` so that it's possible to update each
+component of a nested object.
+
+Returns
+-------
+self
+
+
+- - -
+
+#### `skflow.TensorFlowDNNRegressor.weights_` {#TensorFlowDNNRegressor.weights_}
+
+Returns weights of the DNN weight layers.
+
+
+
+- - -
+
+### `class skflow.TensorFlowEstimator` {#TensorFlowEstimator}
+
+Base class for all TensorFlow estimators.
+
+Parameters:
+    model_fn: Model function, that takes input X, y tensors and outputs
+              prediction and loss tensors.
+    n_classes: Number of classes in the target.
+    tf_master: TensorFlow master. Empty string is default for local.
+    batch_size: Mini batch size.
+    steps: Number of steps to run over data.
+    optimizer: Optimizer name (or class), for example "SGD", "Adam",
+               "Adagrad".
+    learning_rate: If this is constant float value, no decay function is used.
+        Instead, a customized decay function can be passed that accepts
+        global_step as parameter and returns a Tensor.
+        e.g. exponential decay function:
+        def exp_decay(global_step):
+            return tf.train.exponential_decay(
+                learning_rate=0.1, global_step,
+                decay_steps=2, decay_rate=0.001)
+    class_weight: None or list of n_classes floats. Weight associated with
+                 classes for loss computation. If not given, all classes are suppose to have
+                 weight one.
+    tf_random_seed: Random seed for TensorFlow initializers.
+        Setting this value, allows consistency between reruns.
+    continue_training: when continue_training is True, once initialized
+        model will be continuely trained on every call of fit.
+    config_addon: ConfigAddon object that controls the configurations of the session,
+        e.g. num_cores, gpu_memory_fraction, etc.
+    verbose: Controls the verbosity, possible values:
+             0: the algorithm and debug information is muted.
+             1: trainer prints the progress.
+             2: log device placement is printed.
+    max_to_keep: The maximum number of recent checkpoint files to keep.
+        As new files are created, older files are deleted.
+        If None or 0, all checkpoint files are kept.
+        Defaults to 5 (that is, the 5 most recent checkpoint files are kept.)
+    keep_checkpoint_every_n_hours: Number of hours between each checkpoint
+        to be saved. The default value of 10,000 hours effectively disables the feature.
+- - -
+
+#### `skflow.TensorFlowEstimator.__init__(model_fn, n_classes, tf_master='', batch_size=32, steps=200, optimizer='SGD', learning_rate=0.1, class_weight=None, tf_random_seed=42, continue_training=False, config_addon=None, verbose=1, max_to_keep=5, keep_checkpoint_every_n_hours=10000)` {#TensorFlowEstimator.__init__}
+
+
+
+
+- - -
+
+#### `skflow.TensorFlowEstimator.fit(X, y, monitor=None, logdir=None)` {#TensorFlowEstimator.fit}
+
+Builds a neural network model given provided `model_fn` and training
+data X and y.
+
+Note: called first time constructs the graph and initializers
+variables. Consecutives times it will continue training the same model.
+This logic follows partial_fit() interface in scikit-learn.
+
+To restart learning, create new estimator.
+
+##### Args:
+
+
+*  <b>`X`</b>: matrix or tensor of shape [n_samples, n_features...]. Can be
+    iterator that returns arrays of features. The training input
+    samples for fitting the model.
+*  <b>`y`</b>: vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
+    iterator that returns array of targets. The training target values
+    (class labels in classification, real numbers in regression).
+*  <b>`monitor`</b>: Monitor object to print training progress and invoke early stopping
+*  <b>`logdir`</b>: the directory to save the log file that can be used for
+    optional visualization.
+
+##### Returns:
+
+    Returns self.
+
+
+- - -
+
+#### `skflow.TensorFlowEstimator.get_params(deep=True)` {#TensorFlowEstimator.get_params}
+
+Get parameters for this estimator.
+
+Parameters
+----------
+deep: boolean, optional
+    If True, will return the parameters for this estimator and
+    contained subobjects that are estimators.
+
+Returns
+-------
+params : mapping of string to any
+    Parameter names mapped to their values.
+
+
+- - -
+
+#### `skflow.TensorFlowEstimator.get_tensor(name)` {#TensorFlowEstimator.get_tensor}
+
+Returns tensor by name.
+
+##### Args:
+
+
+*  <b>`name`</b>: string, name of the tensor.
+
+##### Returns:
+
+    Tensor.
+
+
+- - -
+
+#### `skflow.TensorFlowEstimator.get_tensor_value(name)` {#TensorFlowEstimator.get_tensor_value}
+
+Returns value of the tensor give by name.
+
+##### Args:
+
+
+*  <b>`name`</b>: string, name of the tensor.
+
+##### Returns:
+
+    Numpy array - value of the tensor.
+
+
+- - -
+
+#### `skflow.TensorFlowEstimator.partial_fit(X, y)` {#TensorFlowEstimator.partial_fit}
+
+Incremental fit on a batch of samples.
+
+This method is expected to be called several times consecutively
+on different or the same chunks of the dataset. This either can
+implement iterative training or out-of-core/online training.
+
+This is especially useful when the whole dataset is too big to
+fit in memory at the same time. Or when model is taking long time
+to converge, and you want to split up training into subparts.
+
+##### Args:
+
+
+*  <b>`X`</b>: matrix or tensor of shape [n_samples, n_features...]. Can be
+    iterator that returns arrays of features. The training input
+    samples for fitting the model.
+*  <b>`y`</b>: vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
+    iterator that returns array of targets. The training target values
+    (class label in classification, real numbers in regression).
+
+##### Returns:
+
+    Returns self.
+
+
+- - -
+
+#### `skflow.TensorFlowEstimator.predict(X, axis=1, batch_size=-1)` {#TensorFlowEstimator.predict}
+
+Predict class or regression for X.
+
+For a classification model, the predicted class for each sample in X is
+returned. For a regression model, the predicted value based on X is
+returned.
+
+##### Args:
+
+
+*  <b>`X`</b>: array-like matrix, [n_samples, n_features...] or iterator.
+*  <b>`axis`</b>: Which axis to argmax for classification.
+          By default axis 1 (next after batch) is used.
+          Use 2 for sequence predictions.
+*  <b>`batch_size`</b>: If test set is too big, use batch size to split
+                it into mini batches. By default full dataset is used.
+
+##### Returns:
+
+
+*  <b>`y`</b>: array of shape [n_samples]. The predicted classes or predicted
+    value.
+
+
+- - -
+
+#### `skflow.TensorFlowEstimator.predict_proba(X, batch_size=-1)` {#TensorFlowEstimator.predict_proba}
+
+Predict class probability of the input samples X.
+
+##### Args:
+
+
+*  <b>`X`</b>: array-like matrix, [n_samples, n_features...] or iterator.
+*  <b>`batch_size`</b>: If test set is too big, use batch size to split
+                it into mini batches. By default full dataset is used.
+
+##### Returns:
+
+
+*  <b>`y`</b>: array of shape [n_samples, n_classes]. The predicted
+    probabilities for each class.
+
+
+- - -
+
+#### `skflow.TensorFlowEstimator.restore(cls, path, config_addon=None)` {#TensorFlowEstimator.restore}
+
+Restores model from give path.
+
+##### Args:
+
+
+*  <b>`path`</b>: Path to the checkpoints and other model information.
+*  <b>`config_addon`</b>: ConfigAddon object that controls the configurations of the session,
+        e.g. num_cores, gpu_memory_fraction, etc. This is allowed to be reconfigured.
+
+##### Returns:
+
+    Estiamator, object of the subclass of TensorFlowEstimator.
+
+
+- - -
+
+#### `skflow.TensorFlowEstimator.save(path)` {#TensorFlowEstimator.save}
+
+Saves checkpoints and graph to given path.
+
+##### Args:
+
+
+*  <b>`path`</b>: Folder to save model to.
+
+
+- - -
+
+#### `skflow.TensorFlowEstimator.set_params(**params)` {#TensorFlowEstimator.set_params}
+
+Set the parameters of this estimator.
+
+The method works on simple estimators as well as on nested objects
+(such as pipelines). The former have parameters of the form
+``<component>__<parameter>`` so that it's possible to update each
+component of a nested object.
+
+Returns
+-------
+self
+
+
+
+- - -
+
+### `class skflow.TensorFlowLinearClassifier` {#TensorFlowLinearClassifier}
+
+TensorFlow Linear Classifier model.
+- - -
+
+#### `skflow.TensorFlowLinearClassifier.__init__(n_classes, tf_master='', batch_size=32, steps=200, optimizer='SGD', learning_rate=0.1, class_weight=None, tf_random_seed=42, continue_training=False, config_addon=None, verbose=1, max_to_keep=5, keep_checkpoint_every_n_hours=10000)` {#TensorFlowLinearClassifier.__init__}
+
+
+
+
+- - -
+
+#### `skflow.TensorFlowLinearClassifier.bias_` {#TensorFlowLinearClassifier.bias_}
+
+Returns weights of the linear classifier.
+
+
+- - -
+
+#### `skflow.TensorFlowLinearClassifier.fit(X, y, monitor=None, logdir=None)` {#TensorFlowLinearClassifier.fit}
+
+Builds a neural network model given provided `model_fn` and training
+data X and y.
+
+Note: called first time constructs the graph and initializers
+variables. Consecutives times it will continue training the same model.
+This logic follows partial_fit() interface in scikit-learn.
+
+To restart learning, create new estimator.
+
+##### Args:
+
+
+*  <b>`X`</b>: matrix or tensor of shape [n_samples, n_features...]. Can be
+    iterator that returns arrays of features. The training input
+    samples for fitting the model.
+*  <b>`y`</b>: vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
+    iterator that returns array of targets. The training target values
+    (class labels in classification, real numbers in regression).
+*  <b>`monitor`</b>: Monitor object to print training progress and invoke early stopping
+*  <b>`logdir`</b>: the directory to save the log file that can be used for
+    optional visualization.
+
+##### Returns:
+
+    Returns self.
+
+
+- - -
+
+#### `skflow.TensorFlowLinearClassifier.get_params(deep=True)` {#TensorFlowLinearClassifier.get_params}
+
+Get parameters for this estimator.
+
+Parameters
+----------
+deep: boolean, optional
+    If True, will return the parameters for this estimator and
+    contained subobjects that are estimators.
+
+Returns
+-------
+params : mapping of string to any
+    Parameter names mapped to their values.
+
+
+- - -
+
+#### `skflow.TensorFlowLinearClassifier.get_tensor(name)` {#TensorFlowLinearClassifier.get_tensor}
+
+Returns tensor by name.
+
+##### Args:
+
+
+*  <b>`name`</b>: string, name of the tensor.
+
+##### Returns:
+
+    Tensor.
+
+
+- - -
+
+#### `skflow.TensorFlowLinearClassifier.get_tensor_value(name)` {#TensorFlowLinearClassifier.get_tensor_value}
+
+Returns value of the tensor give by name.
+
+##### Args:
+
+
+*  <b>`name`</b>: string, name of the tensor.
+
+##### Returns:
+
+    Numpy array - value of the tensor.
+
+
+- - -
+
+#### `skflow.TensorFlowLinearClassifier.partial_fit(X, y)` {#TensorFlowLinearClassifier.partial_fit}
+
+Incremental fit on a batch of samples.
+
+This method is expected to be called several times consecutively
+on different or the same chunks of the dataset. This either can
+implement iterative training or out-of-core/online training.
+
+This is especially useful when the whole dataset is too big to
+fit in memory at the same time. Or when model is taking long time
+to converge, and you want to split up training into subparts.
+
+##### Args:
+
+
+*  <b>`X`</b>: matrix or tensor of shape [n_samples, n_features...]. Can be
+    iterator that returns arrays of features. The training input
+    samples for fitting the model.
+*  <b>`y`</b>: vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
+    iterator that returns array of targets. The training target values
+    (class label in classification, real numbers in regression).
+
+##### Returns:
+
+    Returns self.
+
+
+- - -
+
+#### `skflow.TensorFlowLinearClassifier.predict(X, axis=1, batch_size=-1)` {#TensorFlowLinearClassifier.predict}
+
+Predict class or regression for X.
+
+For a classification model, the predicted class for each sample in X is
+returned. For a regression model, the predicted value based on X is
+returned.
+
+##### Args:
+
+
+*  <b>`X`</b>: array-like matrix, [n_samples, n_features...] or iterator.
+*  <b>`axis`</b>: Which axis to argmax for classification.
+          By default axis 1 (next after batch) is used.
+          Use 2 for sequence predictions.
+*  <b>`batch_size`</b>: If test set is too big, use batch size to split
+                it into mini batches. By default full dataset is used.
+
+##### Returns:
+
+
+*  <b>`y`</b>: array of shape [n_samples]. The predicted classes or predicted
+    value.
+
+
+- - -
+
+#### `skflow.TensorFlowLinearClassifier.predict_proba(X, batch_size=-1)` {#TensorFlowLinearClassifier.predict_proba}
+
+Predict class probability of the input samples X.
+
+##### Args:
+
+
+*  <b>`X`</b>: array-like matrix, [n_samples, n_features...] or iterator.
+*  <b>`batch_size`</b>: If test set is too big, use batch size to split
+                it into mini batches. By default full dataset is used.
+
+##### Returns:
+
+
+*  <b>`y`</b>: array of shape [n_samples, n_classes]. The predicted
+    probabilities for each class.
+
+
+- - -
+
+#### `skflow.TensorFlowLinearClassifier.restore(cls, path, config_addon=None)` {#TensorFlowLinearClassifier.restore}
+
+Restores model from give path.
+
+##### Args:
+
+
+*  <b>`path`</b>: Path to the checkpoints and other model information.
+*  <b>`config_addon`</b>: ConfigAddon object that controls the configurations of the session,
+        e.g. num_cores, gpu_memory_fraction, etc. This is allowed to be reconfigured.
+
+##### Returns:
+
+    Estiamator, object of the subclass of TensorFlowEstimator.
+
+
+- - -
+
+#### `skflow.TensorFlowLinearClassifier.save(path)` {#TensorFlowLinearClassifier.save}
+
+Saves checkpoints and graph to given path.
+
+##### Args:
+
+
+*  <b>`path`</b>: Folder to save model to.
+
+
+- - -
+
+#### `skflow.TensorFlowLinearClassifier.score(X, y, sample_weight=None)` {#TensorFlowLinearClassifier.score}
+
+Returns the mean accuracy on the given test data and labels.
+
+In multi-label classification, this is the subset accuracy
+which is a harsh metric since you require for each sample that
+each label set be correctly predicted.
+
+Parameters
+----------
+X : array-like, shape = (n_samples, n_features)
+    Test samples.
+
+y : array-like, shape = (n_samples) or (n_samples, n_outputs)
+    True labels for X.
+
+sample_weight : array-like, shape = [n_samples], optional
+    Sample weights.
+
+Returns
+-------
+score : float
+    Mean accuracy of self.predict(X) wrt. y.
+
+
+- - -
+
+#### `skflow.TensorFlowLinearClassifier.set_params(**params)` {#TensorFlowLinearClassifier.set_params}
+
+Set the parameters of this estimator.
+
+The method works on simple estimators as well as on nested objects
+(such as pipelines). The former have parameters of the form
+``<component>__<parameter>`` so that it's possible to update each
+component of a nested object.
+
+Returns
+-------
+self
+
+
+- - -
+
+#### `skflow.TensorFlowLinearClassifier.weights_` {#TensorFlowLinearClassifier.weights_}
+
+Returns weights of the linear classifier.
+
+
+
+- - -
+
+### `class skflow.TensorFlowLinearRegressor` {#TensorFlowLinearRegressor}
+
+TensorFlow Linear Regression model.
+- - -
+
+#### `skflow.TensorFlowLinearRegressor.__init__(n_classes=0, tf_master='', batch_size=32, steps=200, optimizer='SGD', learning_rate=0.1, tf_random_seed=42, continue_training=False, config_addon=None, verbose=1, max_to_keep=5, keep_checkpoint_every_n_hours=10000)` {#TensorFlowLinearRegressor.__init__}
+
+
+
+
+- - -
+
+#### `skflow.TensorFlowLinearRegressor.bias_` {#TensorFlowLinearRegressor.bias_}
+
+Returns bias of the linear regression.
+
+
+- - -
+
+#### `skflow.TensorFlowLinearRegressor.fit(X, y, monitor=None, logdir=None)` {#TensorFlowLinearRegressor.fit}
+
+Builds a neural network model given provided `model_fn` and training
+data X and y.
+
+Note: called first time constructs the graph and initializers
+variables. Consecutives times it will continue training the same model.
+This logic follows partial_fit() interface in scikit-learn.
+
+To restart learning, create new estimator.
+
+##### Args:
+
+
+*  <b>`X`</b>: matrix or tensor of shape [n_samples, n_features...]. Can be
+    iterator that returns arrays of features. The training input
+    samples for fitting the model.
+*  <b>`y`</b>: vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
+    iterator that returns array of targets. The training target values
+    (class labels in classification, real numbers in regression).
+*  <b>`monitor`</b>: Monitor object to print training progress and invoke early stopping
+*  <b>`logdir`</b>: the directory to save the log file that can be used for
+    optional visualization.
+
+##### Returns:
+
+    Returns self.
+
+
+- - -
+
+#### `skflow.TensorFlowLinearRegressor.get_params(deep=True)` {#TensorFlowLinearRegressor.get_params}
+
+Get parameters for this estimator.
+
+Parameters
+----------
+deep: boolean, optional
+    If True, will return the parameters for this estimator and
+    contained subobjects that are estimators.
+
+Returns
+-------
+params : mapping of string to any
+    Parameter names mapped to their values.
+
+
+- - -
+
+#### `skflow.TensorFlowLinearRegressor.get_tensor(name)` {#TensorFlowLinearRegressor.get_tensor}
+
+Returns tensor by name.
+
+##### Args:
+
+
+*  <b>`name`</b>: string, name of the tensor.
+
+##### Returns:
+
+    Tensor.
+
+
+- - -
+
+#### `skflow.TensorFlowLinearRegressor.get_tensor_value(name)` {#TensorFlowLinearRegressor.get_tensor_value}
+
+Returns value of the tensor give by name.
+
+##### Args:
+
+
+*  <b>`name`</b>: string, name of the tensor.
+
+##### Returns:
+
+    Numpy array - value of the tensor.
+
+
+- - -
+
+#### `skflow.TensorFlowLinearRegressor.partial_fit(X, y)` {#TensorFlowLinearRegressor.partial_fit}
+
+Incremental fit on a batch of samples.
+
+This method is expected to be called several times consecutively
+on different or the same chunks of the dataset. This either can
+implement iterative training or out-of-core/online training.
+
+This is especially useful when the whole dataset is too big to
+fit in memory at the same time. Or when model is taking long time
+to converge, and you want to split up training into subparts.
+
+##### Args:
+
+
+*  <b>`X`</b>: matrix or tensor of shape [n_samples, n_features...]. Can be
+    iterator that returns arrays of features. The training input
+    samples for fitting the model.
+*  <b>`y`</b>: vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
+    iterator that returns array of targets. The training target values
+    (class label in classification, real numbers in regression).
+
+##### Returns:
+
+    Returns self.
+
+
+- - -
+
+#### `skflow.TensorFlowLinearRegressor.predict(X, axis=1, batch_size=-1)` {#TensorFlowLinearRegressor.predict}
+
+Predict class or regression for X.
+
+For a classification model, the predicted class for each sample in X is
+returned. For a regression model, the predicted value based on X is
+returned.
+
+##### Args:
+
+
+*  <b>`X`</b>: array-like matrix, [n_samples, n_features...] or iterator.
+*  <b>`axis`</b>: Which axis to argmax for classification.
+          By default axis 1 (next after batch) is used.
+          Use 2 for sequence predictions.
+*  <b>`batch_size`</b>: If test set is too big, use batch size to split
+                it into mini batches. By default full dataset is used.
+
+##### Returns:
+
+
+*  <b>`y`</b>: array of shape [n_samples]. The predicted classes or predicted
+    value.
+
+
+- - -
+
+#### `skflow.TensorFlowLinearRegressor.predict_proba(X, batch_size=-1)` {#TensorFlowLinearRegressor.predict_proba}
+
+Predict class probability of the input samples X.
+
+##### Args:
+
+
+*  <b>`X`</b>: array-like matrix, [n_samples, n_features...] or iterator.
+*  <b>`batch_size`</b>: If test set is too big, use batch size to split
+                it into mini batches. By default full dataset is used.
+
+##### Returns:
+
+
+*  <b>`y`</b>: array of shape [n_samples, n_classes]. The predicted
+    probabilities for each class.
+
+
+- - -
+
+#### `skflow.TensorFlowLinearRegressor.restore(cls, path, config_addon=None)` {#TensorFlowLinearRegressor.restore}
+
+Restores model from give path.
+
+##### Args:
+
+
+*  <b>`path`</b>: Path to the checkpoints and other model information.
+*  <b>`config_addon`</b>: ConfigAddon object that controls the configurations of the session,
+        e.g. num_cores, gpu_memory_fraction, etc. This is allowed to be reconfigured.
+
+##### Returns:
+
+    Estiamator, object of the subclass of TensorFlowEstimator.
+
+
+- - -
+
+#### `skflow.TensorFlowLinearRegressor.save(path)` {#TensorFlowLinearRegressor.save}
+
+Saves checkpoints and graph to given path.
+
+##### Args:
+
+
+*  <b>`path`</b>: Folder to save model to.
+
+
+- - -
+
+#### `skflow.TensorFlowLinearRegressor.score(X, y, sample_weight=None)` {#TensorFlowLinearRegressor.score}
+
+Returns the coefficient of determination R^2 of the prediction.
+
+The coefficient R^2 is defined as (1 - u/v), where u is the regression
+sum of squares ((y_true - y_pred) ** 2).sum() and v is the residual
+sum of squares ((y_true - y_true.mean()) ** 2).sum().
+Best possible score is 1.0 and it can be negative (because the
+model can be arbitrarily worse). A constant model that always
+predicts the expected value of y, disregarding the input features,
+would get a R^2 score of 0.0.
+
+Parameters
+----------
+X : array-like, shape = (n_samples, n_features)
+    Test samples.
+
+y : array-like, shape = (n_samples) or (n_samples, n_outputs)
+    True values for X.
+
+sample_weight : array-like, shape = [n_samples], optional
+    Sample weights.
+
+Returns
+-------
+score : float
+    R^2 of self.predict(X) wrt. y.
+
+
+- - -
+
+#### `skflow.TensorFlowLinearRegressor.set_params(**params)` {#TensorFlowLinearRegressor.set_params}
+
+Set the parameters of this estimator.
+
+The method works on simple estimators as well as on nested objects
+(such as pipelines). The former have parameters of the form
+``<component>__<parameter>`` so that it's possible to update each
+component of a nested object.
+
+Returns
+-------
+self
+
+
+- - -
+
+#### `skflow.TensorFlowLinearRegressor.weights_` {#TensorFlowLinearRegressor.weights_}
+
+Returns weights of the linear regression.
+
+
+
+- - -
+
+### `class skflow.TensorFlowRNNClassifier` {#TensorFlowRNNClassifier}
+
+TensorFlow RNN Classifier model.
+
+Parameters:
+    rnn_size: The size for rnn cell, e.g. size of your word embeddings.
+    cell_type: The type of rnn cell, including rnn, gru, and lstm.
+    num_layers: The number of layers of the rnn model.
+    input_op_fn: Function that will transform the input tensor, such as
+                 creating word embeddings, byte list, etc. This takes
+                 an argument X for input and returns transformed X.
+    bidirectional: boolean, Whether this is a bidirectional rnn.
+    sequence_length: If sequence_length is provided, dynamic calculation is performed.
+             This saves computational time when unrolling past max sequence length.
+    initial_state: An initial state for the RNN. This must be a tensor of appropriate type
+                   and shape [batch_size x cell.state_size].
+    n_classes: Number of classes in the target.
+    tf_master: TensorFlow master. Empty string is default for local.
+    batch_size: Mini batch size.
+    steps: Number of steps to run over data.
+    optimizer: Optimizer name (or class), for example "SGD", "Adam",
+               "Adagrad".
+    learning_rate: If this is constant float value, no decay function is used.
+        Instead, a customized decay function can be passed that accepts
+        global_step as parameter and returns a Tensor.
+        e.g. exponential decay function:
+        def exp_decay(global_step):
+            return tf.train.exponential_decay(
+                learning_rate=0.1, global_step,
+                decay_steps=2, decay_rate=0.001)
+    class_weight: None or list of n_classes floats. Weight associated with
+                 classes for loss computation. If not given, all classes are suppose to have
+                 weight one.
+    tf_random_seed: Random seed for TensorFlow initializers.
+        Setting this value, allows consistency between reruns.
+    continue_training: when continue_training is True, once initialized
+        model will be continuely trained on every call of fit.
+    num_cores: Number of cores to be used. (default: 4)
+    max_to_keep: The maximum number of recent checkpoint files to keep.
+        As new files are created, older files are deleted.
+        If None or 0, all checkpoint files are kept.
+        Defaults to 5 (that is, the 5 most recent checkpoint files are kept.)
+    keep_checkpoint_every_n_hours: Number of hours between each checkpoint
+        to be saved. The default value of 10,000 hours effectively disables the feature.
+- - -
+
+#### `skflow.TensorFlowRNNClassifier.__init__(rnn_size, n_classes, cell_type='gru', num_layers=1, input_op_fn=null_input_op_fn, initial_state=None, bidirectional=False, sequence_length=None, tf_master='', batch_size=32, steps=50, optimizer='SGD', learning_rate=0.1, class_weight=None, tf_random_seed=42, continue_training=False, config_addon=None, verbose=1, max_to_keep=5, keep_checkpoint_every_n_hours=10000)` {#TensorFlowRNNClassifier.__init__}
+
+
+
+
+- - -
+
+#### `skflow.TensorFlowRNNClassifier.bias_` {#TensorFlowRNNClassifier.bias_}
+
+Returns bias of the rnn layer.
+
+
+- - -
+
+#### `skflow.TensorFlowRNNClassifier.fit(X, y, monitor=None, logdir=None)` {#TensorFlowRNNClassifier.fit}
+
+Builds a neural network model given provided `model_fn` and training
+data X and y.
+
+Note: called first time constructs the graph and initializers
+variables. Consecutives times it will continue training the same model.
+This logic follows partial_fit() interface in scikit-learn.
+
+To restart learning, create new estimator.
+
+##### Args:
+
+
+*  <b>`X`</b>: matrix or tensor of shape [n_samples, n_features...]. Can be
+    iterator that returns arrays of features. The training input
+    samples for fitting the model.
+*  <b>`y`</b>: vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
+    iterator that returns array of targets. The training target values
+    (class labels in classification, real numbers in regression).
+*  <b>`monitor`</b>: Monitor object to print training progress and invoke early stopping
+*  <b>`logdir`</b>: the directory to save the log file that can be used for
+    optional visualization.
+
+##### Returns:
+
+    Returns self.
+
+
+- - -
+
+#### `skflow.TensorFlowRNNClassifier.get_params(deep=True)` {#TensorFlowRNNClassifier.get_params}
+
+Get parameters for this estimator.
+
+Parameters
+----------
+deep: boolean, optional
+    If True, will return the parameters for this estimator and
+    contained subobjects that are estimators.
+
+Returns
+-------
+params : mapping of string to any
+    Parameter names mapped to their values.
+
+
+- - -
+
+#### `skflow.TensorFlowRNNClassifier.get_tensor(name)` {#TensorFlowRNNClassifier.get_tensor}
+
+Returns tensor by name.
+
+##### Args:
+
+
+*  <b>`name`</b>: string, name of the tensor.
+
+##### Returns:
+
+    Tensor.
+
+
+- - -
+
+#### `skflow.TensorFlowRNNClassifier.get_tensor_value(name)` {#TensorFlowRNNClassifier.get_tensor_value}
+
+Returns value of the tensor give by name.
+
+##### Args:
+
+
+*  <b>`name`</b>: string, name of the tensor.
+
+##### Returns:
+
+    Numpy array - value of the tensor.
+
+
+- - -
+
+#### `skflow.TensorFlowRNNClassifier.partial_fit(X, y)` {#TensorFlowRNNClassifier.partial_fit}
+
+Incremental fit on a batch of samples.
+
+This method is expected to be called several times consecutively
+on different or the same chunks of the dataset. This either can
+implement iterative training or out-of-core/online training.
+
+This is especially useful when the whole dataset is too big to
+fit in memory at the same time. Or when model is taking long time
+to converge, and you want to split up training into subparts.
+
+##### Args:
+
+
+*  <b>`X`</b>: matrix or tensor of shape [n_samples, n_features...]. Can be
+    iterator that returns arrays of features. The training input
+    samples for fitting the model.
+*  <b>`y`</b>: vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
+    iterator that returns array of targets. The training target values
+    (class label in classification, real numbers in regression).
+
+##### Returns:
+
+    Returns self.
+
+
+- - -
+
+#### `skflow.TensorFlowRNNClassifier.predict(X, axis=1, batch_size=-1)` {#TensorFlowRNNClassifier.predict}
+
+Predict class or regression for X.
+
+For a classification model, the predicted class for each sample in X is
+returned. For a regression model, the predicted value based on X is
+returned.
+
+##### Args:
+
+
+*  <b>`X`</b>: array-like matrix, [n_samples, n_features...] or iterator.
+*  <b>`axis`</b>: Which axis to argmax for classification.
+          By default axis 1 (next after batch) is used.
+          Use 2 for sequence predictions.
+*  <b>`batch_size`</b>: If test set is too big, use batch size to split
+                it into mini batches. By default full dataset is used.
+
+##### Returns:
+
+
+*  <b>`y`</b>: array of shape [n_samples]. The predicted classes or predicted
+    value.
+
+
+- - -
+
+#### `skflow.TensorFlowRNNClassifier.predict_proba(X, batch_size=-1)` {#TensorFlowRNNClassifier.predict_proba}
+
+Predict class probability of the input samples X.
+
+##### Args:
+
+
+*  <b>`X`</b>: array-like matrix, [n_samples, n_features...] or iterator.
+*  <b>`batch_size`</b>: If test set is too big, use batch size to split
+                it into mini batches. By default full dataset is used.
+
+##### Returns:
+
+
+*  <b>`y`</b>: array of shape [n_samples, n_classes]. The predicted
+    probabilities for each class.
+
+
+- - -
+
+#### `skflow.TensorFlowRNNClassifier.restore(cls, path, config_addon=None)` {#TensorFlowRNNClassifier.restore}
+
+Restores model from give path.
+
+##### Args:
+
+
+*  <b>`path`</b>: Path to the checkpoints and other model information.
+*  <b>`config_addon`</b>: ConfigAddon object that controls the configurations of the session,
+        e.g. num_cores, gpu_memory_fraction, etc. This is allowed to be reconfigured.
+
+##### Returns:
+
+    Estiamator, object of the subclass of TensorFlowEstimator.
+
+
+- - -
+
+#### `skflow.TensorFlowRNNClassifier.save(path)` {#TensorFlowRNNClassifier.save}
+
+Saves checkpoints and graph to given path.
+
+##### Args:
+
+
+*  <b>`path`</b>: Folder to save model to.
+
+
+- - -
+
+#### `skflow.TensorFlowRNNClassifier.score(X, y, sample_weight=None)` {#TensorFlowRNNClassifier.score}
+
+Returns the mean accuracy on the given test data and labels.
+
+In multi-label classification, this is the subset accuracy
+which is a harsh metric since you require for each sample that
+each label set be correctly predicted.
+
+Parameters
+----------
+X : array-like, shape = (n_samples, n_features)
+    Test samples.
+
+y : array-like, shape = (n_samples) or (n_samples, n_outputs)
+    True labels for X.
+
+sample_weight : array-like, shape = [n_samples], optional
+    Sample weights.
+
+Returns
+-------
+score : float
+    Mean accuracy of self.predict(X) wrt. y.
+
+
+- - -
+
+#### `skflow.TensorFlowRNNClassifier.set_params(**params)` {#TensorFlowRNNClassifier.set_params}
+
+Set the parameters of this estimator.
+
+The method works on simple estimators as well as on nested objects
+(such as pipelines). The former have parameters of the form
+``<component>__<parameter>`` so that it's possible to update each
+component of a nested object.
+
+Returns
+-------
+self
+
+
+- - -
+
+#### `skflow.TensorFlowRNNClassifier.weights_` {#TensorFlowRNNClassifier.weights_}
+
+Returns weights of the rnn layer.
+
+
+
+- - -
+
+### `class skflow.TensorFlowRNNRegressor` {#TensorFlowRNNRegressor}
+
+TensorFlow RNN Regressor model.
+
+Parameters:
+    rnn_size: The size for rnn cell, e.g. size of your word embeddings.
+    cell_type: The type of rnn cell, including rnn, gru, and lstm.
+    num_layers: The number of layers of the rnn model.
+    input_op_fn: Function that will transform the input tensor, such as
+                 creating word embeddings, byte list, etc. This takes
+                 an argument X for input and returns transformed X.
+    bidirectional: boolean, Whether this is a bidirectional rnn.
+    sequence_length: If sequence_length is provided, dynamic calculation is performed.
+             This saves computational time when unrolling past max sequence length.
+    initial_state: An initial state for the RNN. This must be a tensor of appropriate type
+                   and shape [batch_size x cell.state_size].
+    tf_master: TensorFlow master. Empty string is default for local.
+    batch_size: Mini batch size.
+    steps: Number of steps to run over data.
+    optimizer: Optimizer name (or class), for example "SGD", "Adam",
+               "Adagrad".
+    learning_rate: If this is constant float value, no decay function is used.
+        Instead, a customized decay function can be passed that accepts
+        global_step as parameter and returns a Tensor.
+        e.g. exponential decay function:
+        def exp_decay(global_step):
+            return tf.train.exponential_decay(
+                learning_rate=0.1, global_step,
+                decay_steps=2, decay_rate=0.001)
+    tf_random_seed: Random seed for TensorFlow initializers.
+        Setting this value, allows consistency between reruns.
+    continue_training: when continue_training is True, once initialized
+        model will be continuely trained on every call of fit.
+    num_cores: Number of cores to be used. (default: 4)
+    verbose: Controls the verbosity, possible values:
+             0: the algorithm and debug information is muted.
+             1: trainer prints the progress.
+             2: log device placement is printed.
+    max_to_keep: The maximum number of recent checkpoint files to keep.
+        As new files are created, older files are deleted.
+        If None or 0, all checkpoint files are kept.
+        Defaults to 5 (that is, the 5 most recent checkpoint files are kept.)
+    keep_checkpoint_every_n_hours: Number of hours between each checkpoint
+        to be saved. The default value of 10,000 hours effectively disables the feature.
+- - -
+
+#### `skflow.TensorFlowRNNRegressor.__init__(rnn_size, cell_type='gru', num_layers=1, input_op_fn=null_input_op_fn, initial_state=None, bidirectional=False, sequence_length=None, n_classes=0, tf_master='', batch_size=32, steps=50, optimizer='SGD', learning_rate=0.1, tf_random_seed=42, continue_training=False, config_addon=None, verbose=1, max_to_keep=5, keep_checkpoint_every_n_hours=10000)` {#TensorFlowRNNRegressor.__init__}
+
+
+
+
+- - -
+
+#### `skflow.TensorFlowRNNRegressor.bias_` {#TensorFlowRNNRegressor.bias_}
+
+Returns bias of the rnn layer.
+
+
+- - -
+
+#### `skflow.TensorFlowRNNRegressor.fit(X, y, monitor=None, logdir=None)` {#TensorFlowRNNRegressor.fit}
+
+Builds a neural network model given provided `model_fn` and training
+data X and y.
+
+Note: called first time constructs the graph and initializers
+variables. Consecutives times it will continue training the same model.
+This logic follows partial_fit() interface in scikit-learn.
+
+To restart learning, create new estimator.
+
+##### Args:
+
+
+*  <b>`X`</b>: matrix or tensor of shape [n_samples, n_features...]. Can be
+    iterator that returns arrays of features. The training input
+    samples for fitting the model.
+*  <b>`y`</b>: vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
+    iterator that returns array of targets. The training target values
+    (class labels in classification, real numbers in regression).
+*  <b>`monitor`</b>: Monitor object to print training progress and invoke early stopping
+*  <b>`logdir`</b>: the directory to save the log file that can be used for
+    optional visualization.
+
+##### Returns:
+
+    Returns self.
+
+
+- - -
+
+#### `skflow.TensorFlowRNNRegressor.get_params(deep=True)` {#TensorFlowRNNRegressor.get_params}
+
+Get parameters for this estimator.
+
+Parameters
+----------
+deep: boolean, optional
+    If True, will return the parameters for this estimator and
+    contained subobjects that are estimators.
+
+Returns
+-------
+params : mapping of string to any
+    Parameter names mapped to their values.
+
+
+- - -
+
+#### `skflow.TensorFlowRNNRegressor.get_tensor(name)` {#TensorFlowRNNRegressor.get_tensor}
+
+Returns tensor by name.
+
+##### Args:
+
+
+*  <b>`name`</b>: string, name of the tensor.
+
+##### Returns:
+
+    Tensor.
+
+
+- - -
+
+#### `skflow.TensorFlowRNNRegressor.get_tensor_value(name)` {#TensorFlowRNNRegressor.get_tensor_value}
+
+Returns value of the tensor give by name.
+
+##### Args:
+
+
+*  <b>`name`</b>: string, name of the tensor.
+
+##### Returns:
+
+    Numpy array - value of the tensor.
+
+
+- - -
+
+#### `skflow.TensorFlowRNNRegressor.partial_fit(X, y)` {#TensorFlowRNNRegressor.partial_fit}
+
+Incremental fit on a batch of samples.
+
+This method is expected to be called several times consecutively
+on different or the same chunks of the dataset. This either can
+implement iterative training or out-of-core/online training.
+
+This is especially useful when the whole dataset is too big to
+fit in memory at the same time. Or when model is taking long time
+to converge, and you want to split up training into subparts.
+
+##### Args:
+
+
+*  <b>`X`</b>: matrix or tensor of shape [n_samples, n_features...]. Can be
+    iterator that returns arrays of features. The training input
+    samples for fitting the model.
+*  <b>`y`</b>: vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
+    iterator that returns array of targets. The training target values
+    (class label in classification, real numbers in regression).
+
+##### Returns:
+
+    Returns self.
+
+
+- - -
+
+#### `skflow.TensorFlowRNNRegressor.predict(X, axis=1, batch_size=-1)` {#TensorFlowRNNRegressor.predict}
+
+Predict class or regression for X.
+
+For a classification model, the predicted class for each sample in X is
+returned. For a regression model, the predicted value based on X is
+returned.
+
+##### Args:
+
+
+*  <b>`X`</b>: array-like matrix, [n_samples, n_features...] or iterator.
+*  <b>`axis`</b>: Which axis to argmax for classification.
+          By default axis 1 (next after batch) is used.
+          Use 2 for sequence predictions.
+*  <b>`batch_size`</b>: If test set is too big, use batch size to split
+                it into mini batches. By default full dataset is used.
+
+##### Returns:
+
+
+*  <b>`y`</b>: array of shape [n_samples]. The predicted classes or predicted
+    value.
+
+
+- - -
+
+#### `skflow.TensorFlowRNNRegressor.predict_proba(X, batch_size=-1)` {#TensorFlowRNNRegressor.predict_proba}
+
+Predict class probability of the input samples X.
+
+##### Args:
+
+
+*  <b>`X`</b>: array-like matrix, [n_samples, n_features...] or iterator.
+*  <b>`batch_size`</b>: If test set is too big, use batch size to split
+                it into mini batches. By default full dataset is used.
+
+##### Returns:
+
+
+*  <b>`y`</b>: array of shape [n_samples, n_classes]. The predicted
+    probabilities for each class.
+
+
+- - -
+
+#### `skflow.TensorFlowRNNRegressor.restore(cls, path, config_addon=None)` {#TensorFlowRNNRegressor.restore}
+
+Restores model from give path.
+
+##### Args:
+
+
+*  <b>`path`</b>: Path to the checkpoints and other model information.
+*  <b>`config_addon`</b>: ConfigAddon object that controls the configurations of the session,
+        e.g. num_cores, gpu_memory_fraction, etc. This is allowed to be reconfigured.
+
+##### Returns:
+
+    Estiamator, object of the subclass of TensorFlowEstimator.
+
+
+- - -
+
+#### `skflow.TensorFlowRNNRegressor.save(path)` {#TensorFlowRNNRegressor.save}
+
+Saves checkpoints and graph to given path.
+
+##### Args:
+
+
+*  <b>`path`</b>: Folder to save model to.
+
+
+- - -
+
+#### `skflow.TensorFlowRNNRegressor.score(X, y, sample_weight=None)` {#TensorFlowRNNRegressor.score}
+
+Returns the coefficient of determination R^2 of the prediction.
+
+The coefficient R^2 is defined as (1 - u/v), where u is the regression
+sum of squares ((y_true - y_pred) ** 2).sum() and v is the residual
+sum of squares ((y_true - y_true.mean()) ** 2).sum().
+Best possible score is 1.0 and it can be negative (because the
+model can be arbitrarily worse). A constant model that always
+predicts the expected value of y, disregarding the input features,
+would get a R^2 score of 0.0.
+
+Parameters
+----------
+X : array-like, shape = (n_samples, n_features)
+    Test samples.
+
+y : array-like, shape = (n_samples) or (n_samples, n_outputs)
+    True values for X.
+
+sample_weight : array-like, shape = [n_samples], optional
+    Sample weights.
+
+Returns
+-------
+score : float
+    R^2 of self.predict(X) wrt. y.
+
+
+- - -
+
+#### `skflow.TensorFlowRNNRegressor.set_params(**params)` {#TensorFlowRNNRegressor.set_params}
+
+Set the parameters of this estimator.
+
+The method works on simple estimators as well as on nested objects
+(such as pipelines). The former have parameters of the form
+``<component>__<parameter>`` so that it's possible to update each
+component of a nested object.
+
+Returns
+-------
+self
+
+
+- - -
+
+#### `skflow.TensorFlowRNNRegressor.weights_` {#TensorFlowRNNRegressor.weights_}
+
+Returns weights of the rnn layer.
+
+
+
+- - -
+
+### `class skflow.TensorFlowRegressor` {#TensorFlowRegressor}
+
+TensorFlow Linear Regression model.
+- - -
+
+#### `skflow.TensorFlowRegressor.__init__(n_classes=0, tf_master='', batch_size=32, steps=200, optimizer='SGD', learning_rate=0.1, tf_random_seed=42, continue_training=False, config_addon=None, verbose=1, max_to_keep=5, keep_checkpoint_every_n_hours=10000)` {#TensorFlowRegressor.__init__}
+
+
+
+
+- - -
+
+#### `skflow.TensorFlowRegressor.bias_` {#TensorFlowRegressor.bias_}
+
+Returns bias of the linear regression.
+
+
+- - -
+
+#### `skflow.TensorFlowRegressor.fit(X, y, monitor=None, logdir=None)` {#TensorFlowRegressor.fit}
+
+Builds a neural network model given provided `model_fn` and training
+data X and y.
+
+Note: called first time constructs the graph and initializers
+variables. Consecutives times it will continue training the same model.
+This logic follows partial_fit() interface in scikit-learn.
+
+To restart learning, create new estimator.
+
+##### Args:
+
+
+*  <b>`X`</b>: matrix or tensor of shape [n_samples, n_features...]. Can be
+    iterator that returns arrays of features. The training input
+    samples for fitting the model.
+*  <b>`y`</b>: vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
+    iterator that returns array of targets. The training target values
+    (class labels in classification, real numbers in regression).
+*  <b>`monitor`</b>: Monitor object to print training progress and invoke early stopping
+*  <b>`logdir`</b>: the directory to save the log file that can be used for
+    optional visualization.
+
+##### Returns:
+
+    Returns self.
+
+
+- - -
+
+#### `skflow.TensorFlowRegressor.get_params(deep=True)` {#TensorFlowRegressor.get_params}
+
+Get parameters for this estimator.
+
+Parameters
+----------
+deep: boolean, optional
+    If True, will return the parameters for this estimator and
+    contained subobjects that are estimators.
+
+Returns
+-------
+params : mapping of string to any
+    Parameter names mapped to their values.
+
+
+- - -
+
+#### `skflow.TensorFlowRegressor.get_tensor(name)` {#TensorFlowRegressor.get_tensor}
+
+Returns tensor by name.
+
+##### Args:
+
+
+*  <b>`name`</b>: string, name of the tensor.
+
+##### Returns:
+
+    Tensor.
+
+
+- - -
+
+#### `skflow.TensorFlowRegressor.get_tensor_value(name)` {#TensorFlowRegressor.get_tensor_value}
+
+Returns value of the tensor give by name.
+
+##### Args:
+
+
+*  <b>`name`</b>: string, name of the tensor.
+
+##### Returns:
+
+    Numpy array - value of the tensor.
+
+
+- - -
+
+#### `skflow.TensorFlowRegressor.partial_fit(X, y)` {#TensorFlowRegressor.partial_fit}
+
+Incremental fit on a batch of samples.
+
+This method is expected to be called several times consecutively
+on different or the same chunks of the dataset. This either can
+implement iterative training or out-of-core/online training.
+
+This is especially useful when the whole dataset is too big to
+fit in memory at the same time. Or when model is taking long time
+to converge, and you want to split up training into subparts.
+
+##### Args:
+
+
+*  <b>`X`</b>: matrix or tensor of shape [n_samples, n_features...]. Can be
+    iterator that returns arrays of features. The training input
+    samples for fitting the model.
+*  <b>`y`</b>: vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
+    iterator that returns array of targets. The training target values
+    (class label in classification, real numbers in regression).
+
+##### Returns:
+
+    Returns self.
+
+
+- - -
+
+#### `skflow.TensorFlowRegressor.predict(X, axis=1, batch_size=-1)` {#TensorFlowRegressor.predict}
+
+Predict class or regression for X.
+
+For a classification model, the predicted class for each sample in X is
+returned. For a regression model, the predicted value based on X is
+returned.
+
+##### Args:
+
+
+*  <b>`X`</b>: array-like matrix, [n_samples, n_features...] or iterator.
+*  <b>`axis`</b>: Which axis to argmax for classification.
+          By default axis 1 (next after batch) is used.
+          Use 2 for sequence predictions.
+*  <b>`batch_size`</b>: If test set is too big, use batch size to split
+                it into mini batches. By default full dataset is used.
+
+##### Returns:
+
+
+*  <b>`y`</b>: array of shape [n_samples]. The predicted classes or predicted
+    value.
+
+
+- - -
+
+#### `skflow.TensorFlowRegressor.predict_proba(X, batch_size=-1)` {#TensorFlowRegressor.predict_proba}
+
+Predict class probability of the input samples X.
+
+##### Args:
+
+
+*  <b>`X`</b>: array-like matrix, [n_samples, n_features...] or iterator.
+*  <b>`batch_size`</b>: If test set is too big, use batch size to split
+                it into mini batches. By default full dataset is used.
+
+##### Returns:
+
+
+*  <b>`y`</b>: array of shape [n_samples, n_classes]. The predicted
+    probabilities for each class.
+
+
+- - -
+
+#### `skflow.TensorFlowRegressor.restore(cls, path, config_addon=None)` {#TensorFlowRegressor.restore}
+
+Restores model from give path.
+
+##### Args:
+
+
+*  <b>`path`</b>: Path to the checkpoints and other model information.
+*  <b>`config_addon`</b>: ConfigAddon object that controls the configurations of the session,
+        e.g. num_cores, gpu_memory_fraction, etc. This is allowed to be reconfigured.
+
+##### Returns:
+
+    Estiamator, object of the subclass of TensorFlowEstimator.
+
+
+- - -
+
+#### `skflow.TensorFlowRegressor.save(path)` {#TensorFlowRegressor.save}
+
+Saves checkpoints and graph to given path.
+
+##### Args:
+
+
+*  <b>`path`</b>: Folder to save model to.
+
+
+- - -
+
+#### `skflow.TensorFlowRegressor.score(X, y, sample_weight=None)` {#TensorFlowRegressor.score}
+
+Returns the coefficient of determination R^2 of the prediction.
+
+The coefficient R^2 is defined as (1 - u/v), where u is the regression
+sum of squares ((y_true - y_pred) ** 2).sum() and v is the residual
+sum of squares ((y_true - y_true.mean()) ** 2).sum().
+Best possible score is 1.0 and it can be negative (because the
+model can be arbitrarily worse). A constant model that always
+predicts the expected value of y, disregarding the input features,
+would get a R^2 score of 0.0.
+
+Parameters
+----------
+X : array-like, shape = (n_samples, n_features)
+    Test samples.
+
+y : array-like, shape = (n_samples) or (n_samples, n_outputs)
+    True values for X.
+
+sample_weight : array-like, shape = [n_samples], optional
+    Sample weights.
+
+Returns
+-------
+score : float
+    R^2 of self.predict(X) wrt. y.
+
+
+- - -
+
+#### `skflow.TensorFlowRegressor.set_params(**params)` {#TensorFlowRegressor.set_params}
+
+Set the parameters of this estimator.
+
+The method works on simple estimators as well as on nested objects
+(such as pipelines). The former have parameters of the form
+``<component>__<parameter>`` so that it's possible to update each
+component of a nested object.
+
+Returns
+-------
+self
+
+
+- - -
+
+#### `skflow.TensorFlowRegressor.weights_` {#TensorFlowRegressor.weights_}
+
+Returns weights of the linear regression.
+
+
+
diff --git a/tensorflow/contrib/skflow/g3doc/api_docs/python/index.md b/tensorflow/contrib/skflow/g3doc/api_docs/python/index.md
new file mode 100644
index 0000000000..a656f19161
--- /dev/null
+++ b/tensorflow/contrib/skflow/g3doc/api_docs/python/index.md
@@ -0,0 +1,27 @@
+---
+---
+<!-- This file is machine generated: DO NOT EDIT! -->
+
+# TensorFlow Python reference documentation
+
+* **[Estimators](../../api_docs/python/estimators)**:
+  * [`TensorFlowClassifier`](../../api_docs/python/estimators#TensorFlowClassifier)
+  * [`TensorFlowDNNClassifier`](../../api_docs/python/estimators#TensorFlowDNNClassifier)
+  * [`TensorFlowDNNRegressor`](../../api_docs/python/estimators#TensorFlowDNNRegressor)
+  * [`TensorFlowEstimator`](../../api_docs/python/estimators#TensorFlowEstimator)
+  * [`TensorFlowLinearClassifier`](../../api_docs/python/estimators#TensorFlowLinearClassifier)
+  * [`TensorFlowLinearRegressor`](../../api_docs/python/estimators#TensorFlowLinearRegressor)
+  * [`TensorFlowRegressor`](../../api_docs/python/estimators#TensorFlowRegressor)
+  * [`TensorFlowRNNClassifier`](../../api_docs/python/estimators#TensorFlowRNNClassifier)
+  * [`TensorFlowRNNRegressor`](../../api_docs/python/estimators#TensorFlowRNNRegressor)
+
+* **[IO](../../api_docs/python/io)**:
+  * [`extract_dask_data`](../../api_docs/python/io#extract_dask_data)
+  * [`extract_dask_labels`](../../api_docs/python/io#extract_dask_labels)
+  * [`extract_pandas_data`](../../api_docs/python/io#extract_pandas_data)
+  * [`extract_pandas_labels`](../../api_docs/python/io#extract_pandas_labels)
+  * [`extract_pandas_matrix`](../../api_docs/python/io#extract_pandas_matrix)
+
+* **[Trainer](../../api_docs/python/trainer)**:
+  * [`TensorFlowTrainer`](../../api_docs/python/trainer#TensorFlowTrainer)
+
diff --git a/tensorflow/contrib/skflow/g3doc/api_docs/python/io.md b/tensorflow/contrib/skflow/g3doc/api_docs/python/io.md
new file mode 100644
index 0000000000..33fcd3f1db
--- /dev/null
+++ b/tensorflow/contrib/skflow/g3doc/api_docs/python/io.md
@@ -0,0 +1,45 @@
+---
+---
+<!-- This file is machine generated: DO NOT EDIT! -->
+
+# IO
+[TOC]
+
+Tools to allow different io formats.
+
+## Other Functions and Classes
+- - -
+
+### `skflow.extract_dask_data(data)` {#extract_dask_data}
+
+Extract data from dask.Series or dask.DataFrame for predictors
+
+
+- - -
+
+### `skflow.extract_dask_labels(labels)` {#extract_dask_labels}
+
+Extract data from dask.Series for labels
+
+
+- - -
+
+### `skflow.extract_pandas_data(data)` {#extract_pandas_data}
+
+Extract data from pandas.DataFrame for predictors
+
+
+- - -
+
+### `skflow.extract_pandas_labels(labels)` {#extract_pandas_labels}
+
+Extract data from pandas.DataFrame for labels
+
+
+- - -
+
+### `skflow.extract_pandas_matrix(data)` {#extract_pandas_matrix}
+
+Extracts numpy matrix from pandas DataFrame.
+
+
diff --git a/tensorflow/contrib/skflow/g3doc/api_docs/python/models.md b/tensorflow/contrib/skflow/g3doc/api_docs/python/models.md
new file mode 100644
index 0000000000..b5ad9d40f5
--- /dev/null
+++ b/tensorflow/contrib/skflow/g3doc/api_docs/python/models.md
@@ -0,0 +1,8 @@
+---
+---
+<!-- This file is machine generated: DO NOT EDIT! -->
+
+# Models
+[TOC]
+
+Various high level TF models.
diff --git a/tensorflow/contrib/skflow/g3doc/api_docs/python/ops.array_ops.md b/tensorflow/contrib/skflow/g3doc/api_docs/python/ops.array_ops.md
new file mode 100644
index 0000000000..40e2b31c33
--- /dev/null
+++ b/tensorflow/contrib/skflow/g3doc/api_docs/python/ops.array_ops.md
@@ -0,0 +1,10 @@
+<!-- This file is machine generated: DO NOT EDIT! -->
+
+# Tensor Transformations
+
+Note: Functions taking `Tensor` arguments can also take anything accepted by
+[`tf.convert_to_tensor`](framework.md#convert_to_tensor).
+
+[TOC]
+
+TensorFlow ops for array / tensor manipulation.
diff --git a/tensorflow/contrib/skflow/g3doc/api_docs/python/ops.md b/tensorflow/contrib/skflow/g3doc/api_docs/python/ops.md
new file mode 100644
index 0000000000..895bbe5995
--- /dev/null
+++ b/tensorflow/contrib/skflow/g3doc/api_docs/python/ops.md
@@ -0,0 +1,12 @@
+---
+---
+<!-- This file is machine generated: DO NOT EDIT! -->
+
+# Tensor Transformations
+
+Note: Functions taking `Tensor` arguments can also take anything accepted by
+[`tf.convert_to_tensor`](framework.md#convert_to_tensor).
+
+[TOC]
+
+Main Scikit Flow module.
diff --git a/tensorflow/contrib/skflow/g3doc/api_docs/python/preprocessing.md b/tensorflow/contrib/skflow/g3doc/api_docs/python/preprocessing.md
new file mode 100644
index 0000000000..777a1e5007
--- /dev/null
+++ b/tensorflow/contrib/skflow/g3doc/api_docs/python/preprocessing.md
@@ -0,0 +1,8 @@
+---
+---
+<!-- This file is machine generated: DO NOT EDIT! -->
+
+# Preprocessing
+[TOC]
+
+Preprocessing tools useful for building models.
diff --git a/tensorflow/contrib/skflow/g3doc/api_docs/python/trainer.md b/tensorflow/contrib/skflow/g3doc/api_docs/python/trainer.md
new file mode 100644
index 0000000000..9ea64744f4
--- /dev/null
+++ b/tensorflow/contrib/skflow/g3doc/api_docs/python/trainer.md
@@ -0,0 +1,84 @@
+---
+---
+<!-- This file is machine generated: DO NOT EDIT! -->
+
+# Trainer
+[TOC]
+
+Generic trainer for TensorFlow models.
+
+## Other Functions and Classes
+- - -
+
+### `class skflow.TensorFlowTrainer` {#TensorFlowTrainer}
+
+General trainer class.
+
+Attributes:
+  model: Model object.
+  gradients: Gradients tensor.
+- - -
+
+#### `skflow.TensorFlowTrainer.__init__(loss, global_step, optimizer, learning_rate, clip_gradients=5.0)` {#TensorFlowTrainer.__init__}
+
+Build a trainer part of graph.
+
+##### Args:
+
+
+*  <b>`loss`</b>: Tensor that evaluates to model's loss.
+*  <b>`global_step`</b>: Tensor with global step of the model.
+*  <b>`optimizer`</b>: Name of the optimizer class (SGD, Adam, Adagrad) or class.
+*  <b>`learning_rate`</b>: If this is constant float value, no decay function is used.
+                 Instead, a customized decay function can be passed that accepts
+                 global_step as parameter and returns a Tensor.
+                 e.g. exponential decay function:
+                 def exp_decay(global_step):
+                    return tf.train.exponential_decay(
+                        learning_rate=0.1, global_step=global_step,
+                        decay_steps=2, decay_rate=0.001)
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: if learning_rate is not a float or a callable.
+
+
+- - -
+
+#### `skflow.TensorFlowTrainer.initialize(sess)` {#TensorFlowTrainer.initialize}
+
+Initalizes all variables.
+
+##### Args:
+
+
+*  <b>`sess`</b>: Session object.
+
+##### Returns:
+
+    Values of initializers.
+
+
+- - -
+
+#### `skflow.TensorFlowTrainer.train(sess, feed_dict_fn, steps, monitor, summary_writer=None, summaries=None, feed_params_fn=None)` {#TensorFlowTrainer.train}
+
+Trains a model for given number of steps, given feed_dict function.
+
+##### Args:
+
+
+*  <b>`sess`</b>: Session object.
+*  <b>`feed_dict_fn`</b>: Function that will return a feed dictionary.
+*  <b>`summary_writer`</b>: SummaryWriter object to use for writing summaries.
+*  <b>`steps`</b>: Number of steps to run.
+*  <b>`monitor`</b>: Monitor object to track training progress and induce early stopping
+*  <b>`summaries`</b>: Joined object of all summaries that should be ran.
+
+##### Returns:
+
+    List of losses for each step.
+
+
+
diff --git a/tensorflow/contrib/skflow/g3doc/get_started/index.md b/tensorflow/contrib/skflow/g3doc/get_started/index.md
new file mode 100644
index 0000000000..5eb27b6d0e
--- /dev/null
+++ b/tensorflow/contrib/skflow/g3doc/get_started/index.md
@@ -0,0 +1,125 @@
+# Introduction
+
+Below are few simple examples of the API. For more examples, please see `examples <https://github.com/tensorflow/skflow/tree/master/examples>`__.
+
+## General tips
+
+-  It's useful to re-scale dataset before passing to estimator to 0 mean and unit standard deviation. Stochastic Gradient Descent doesn't always do the right thing when variable are very different scale.
+
+-  Categorical variables should be managed before passing input to the estimator.
+
+## Linear Classifier
+
+Simple linear classification:
+
+.. code:: python
+
+    import skflow
+    from sklearn import datasets, metrics
+
+    iris = datasets.load_iris()
+    classifier = skflow.TensorFlowLinearClassifier(n_classes=3)
+    classifier.fit(iris.data, iris.target)
+    score = metrics.accuracy_score(iris.target, classifier.predict(iris.data))
+    print("Accuracy: %f" % score)
+
+## Linear Regressor
+
+Simple linear regression:
+
+.. code:: python
+
+    import skflow
+    from sklearn import datasets, metrics, preprocessing
+
+    boston = datasets.load_boston()
+    X = preprocessing.StandardScaler().fit_transform(boston.data)
+    regressor = skflow.TensorFlowLinearRegressor()
+    regressor.fit(X, boston.target)
+    score = metrics.mean_squared_error(regressor.predict(X), boston.target)
+    print ("MSE: %f" % score)
+
+## Deep Neural Network
+
+Example of 3 layer network with 10, 20 and 10 hidden units respectively:
+
+.. code:: python
+
+    import skflow
+    from sklearn import datasets, metrics
+
+    iris = datasets.load_iris()
+    classifier = skflow.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=3)
+    classifier.fit(iris.data, iris.target)
+    score = metrics.accuracy_score(iris.target, classifier.predict(iris.data))
+    print("Accuracy: %f" % score)
+
+## Custom model
+
+Example of how to pass a custom model to the TensorFlowEstimator:
+
+.. code:: python
+
+    import skflow
+    from sklearn import datasets, metrics
+
+    iris = datasets.load_iris()
+
+    def my_model(X, y):
+        """This is DNN with 10, 20, 10 hidden layers, and dropout of 0.5 probability."""
+        layers = skflow.ops.dnn(X, [10, 20, 10], keep_prob=0.5)
+        return skflow.models.logistic_regression(layers, y)
+
+    classifier = skflow.TensorFlowEstimator(model_fn=my_model, n_classes=3)
+    classifier.fit(iris.data, iris.target)
+    score = metrics.accuracy_score(iris.target, classifier.predict(iris.data))
+    print("Accuracy: %f" % score)
+
+## Saving / Restoring models
+
+Each estimator has a ``save`` method which takes folder path where all model information will be saved. For restoring you can just call ``skflow.TensorFlowEstimator.restore(path)`` and it will return object of your class.
+
+Some example code:
+
+.. code:: python
+
+    import skflow
+
+    classifier = skflow.TensorFlowLinearRegression()
+    classifier.fit(...)
+    classifier.save('/tmp/tf_examples/my_model_1/')
+
+    new_classifier = TensorFlowEstimator.restore('/tmp/tf_examples/my_model_2')
+    new_classifier.predict(...)
+
+## Summaries
+
+To get nice visualizations and summaries you can use ``logdir`` parameter on ``fit``. It will start writing summaries for ``loss`` and histograms for variables in your model. You can also add custom summaries in your custom model function by calling ``tf.summary`` and passing Tensors to report.
+
+.. code:: python
+
+    classifier = skflow.TensorFlowLinearRegression()
+    classifier.fit(X, y, logdir='/tmp/tf_examples/my_model_1/')
+
+Then run next command in command line:
+
+.. code:: bash
+
+    tensorboard --logdir=/tmp/tf_examples/my_model_1
+
+and follow reported url.
+
+Graph visualization: |Text classification RNN Graph|
+
+Loss visualization: |Text classification RNN Loss|
+
+# More examples
+
+See examples folder for:
+
+-  Easy way to handle categorical variables - words are just an example of categorical variable.
+-  Text Classification - see examples for RNN, CNN on word and characters.
+-  Language modeling and text sequence to sequence.
+-  Images (CNNs) - see example for digit recognition.
+-  More & deeper - different examples showing DNNs and CNNs
+
diff --git a/tensorflow/contrib/skflow/g3doc/how_to/index.md b/tensorflow/contrib/skflow/g3doc/how_to/index.md
new file mode 100644
index 0000000000..9eb486e2d6
--- /dev/null
+++ b/tensorflow/contrib/skflow/g3doc/how_to/index.md
@@ -0,0 +1,11 @@
+# How to
+
+## Re-generate API documentation
+
+To regenerate API documentation, run this commands from main git folder:
+
+    cd scripts/docs
+    ./gen_docs.sh
+
+Then review and commit changes.
+
diff --git a/tensorflow/contrib/skflow/g3doc/index.md b/tensorflow/contrib/skflow/g3doc/index.md
new file mode 100644
index 0000000000..10f3826bd4
--- /dev/null
+++ b/tensorflow/contrib/skflow/g3doc/index.md
@@ -0,0 +1,54 @@
+# Scikit Flow
+
+This is a simplified interface for TensorFlow, to get people started on predictive analytics and data mining.
+
+Library covers variety of needs from linear models to *Deep Learning* applications like text and image understanding.
+
+## Why *TensorFlow*? 
+
+- TensorFlow provides a good backbone for building different shapes of machine learning applications. 
+- It will continue to evolve both in the distributed direction and as general pipelinining machinery.
+
+## Why *Scikit Flow*? 
+
+- To smooth the transition from the Scikit Learn world of one-liner machine learning into the more open world of building different shapes of ML models. You can start by using fit/predict and slide into TensorFlow APIs as you are getting comfortable. 
+- To provide a set of reference models that would be easy to integrate with existing code.
+
+# Installation
+
+## Dependencies
+
+- Python: 2.7, 3.4+ 
+- Scikit learn: 0.16, 0.17, 0.18+ 
+- Tensorflow: 0.7+
+
+First, you need to make sure you have `TensorFlow <https://github.com/tensorflow/tensorflow#installation>`__ and `Scikit Learn <http://scikit-learn.org/stable/install.html>`__ installed. 
+
+Run the following to install the stable version from PyPI:
+
+    pip install skflow
+
+Or run the following to install from the development version from Github:
+
+    pip install git+git://github.com/tensorflow/skflow.git
+
+## Tutorial
+
+-  `Introduction to Scikit Flow and why you want to start learning
+   TensorFlow <https://medium.com/@ilblackdragon/tensorflow-tutorial-part-1-c559c63c0cb1>`__
+-  `DNNs, custom model and Digit recognition
+   examples <https://medium.com/@ilblackdragon/tensorflow-tutorial-part-2-9ffe47049c92>`__
+-  `Categorical variables: One hot vs Distributed
+   representation <https://medium.com/@ilblackdragon/tensorflow-tutorial-part-3-c5fc0662bc08>`__
+-  More coming soon.
+
+## Community
+
+- Twitter `#skflow <https://twitter.com/search?q=skflow&src=typd>`__.
+- StackOverflow with `skflow tag <http://stackoverflow.com/questions/tagged/skflow>`__ for questions and struggles.
+- Github `issues <https://github.com/tensorflow/skflow/issues>`__ for technical discussions and feature requests. 
+- `Gitter channel <https://gitter.im/tensorflow/skflow>`__ for non-trivial discussions.
+
+## Table of Contents
+<!--#include virtual="sitemap.md" -->
+
diff --git a/tensorflow/contrib/skflow/python/__init__.py b/tensorflow/contrib/skflow/python/__init__.py
new file mode 100644
index 0000000000..2295fbdddc
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/__init__.py
@@ -0,0 +1,20 @@
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from __future__ import division, print_function, absolute_import
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from skflow import * 
diff --git a/tensorflow/contrib/skflow/python/skflow/README.rst b/tensorflow/contrib/skflow/python/skflow/README.rst
new file mode 100644
index 0000000000..04dd2bb460
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/README.rst
@@ -0,0 +1,208 @@
+|Travis-CI Build Status| |Codecov Status| |License| |PyPI version| |Join the chat at
+https://gitter.im/tensorflow/skflow|
+
+Scikit Flow
+===========
+
+This is a simplified interface for TensorFlow, to get people started on predictive analytics and data mining.
+
+Library covers variety of needs from linear models to *Deep Learning* applications like text and image understanding.
+
+Why *TensorFlow*? 
+-----------------
+- TensorFlow provides a good backbone for building different shapes of machine learning applications. 
+- It will continue to evolve both in the distributed direction and as general pipelinining machinery.
+
+Why *Scikit Flow*? 
+-----------------
+- To smooth the transition from the Scikit Learn world of one-liner machine learning into the more open world of building different shapes of ML models. You can start by using fit/predict and slide into TensorFlow APIs as you are getting comfortable. 
+- To provide a set of reference models that would be easy to integrate with existing code.
+
+Installation
+============
+
+Dependencies
+-----------
+- Python: 2.7, 3.4+ 
+- Scikit learn: 0.16, 0.17, 0.18+ 
+- Tensorflow: 0.7+
+
+First, you need to make sure you have `TensorFlow <https://github.com/tensorflow/tensorflow#installation>`__ and `Scikit Learn <http://scikit-learn.org/stable/install.html>`__ installed. 
+
+Run the following to install the stable version from PyPI:
+
+.. code:: bash
+
+    pip install skflow
+
+Or run the following to install from the development version from Github:
+
+.. code:: bash
+
+    pip install git+git://github.com/tensorflow/skflow.git
+
+Tutorial
+--------
+
+-  `Introduction to Scikit Flow and Why You Want to Start Learning
+   TensorFlow <https://medium.com/@ilblackdragon/tensorflow-tutorial-part-1-c559c63c0cb1>`__
+-  `DNNs, Custom model and Digit Recognition
+   examples <https://medium.com/@ilblackdragon/tensorflow-tutorial-part-2-9ffe47049c92>`__
+-  `Categorical Variables: One Hot vs Distributed
+   representation <https://medium.com/@ilblackdragon/tensorflow-tutorial-part-3-c5fc0662bc08>`__
+-  `Scikit Flow Key Features Illustrated <http://terrytangyuan.github.io/2016/03/14/scikit-flow-intro/>`__
+-  More coming soon.
+
+Community
+---------
+- Twitter `#skflow <https://twitter.com/search?q=skflow&src=typd>`__.
+- StackOverflow with `skflow tag <http://stackoverflow.com/questions/tagged/skflow>`__ for questions and struggles.
+- Github `issues <https://github.com/tensorflow/skflow/issues>`__ for technical discussions and feature requests. 
+- `Gitter channel <https://gitter.im/tensorflow/skflow>`__ for non-trivial discussions.
+
+Usage
+-----
+
+Below are few simple examples of the API. For more examples, please see `examples <https://github.com/tensorflow/skflow/tree/master/examples>`__.
+
+General tips
+~~~~~~~~~~~~
+
+-  It's useful to re-scale dataset before passing to estimator to 0 mean and unit standard deviation. Stochastic Gradient Descent doesn't always do the right thing when variable are very different scale.
+
+-  Categorical variables should be managed before passing input to the estimator. 
+
+Linear Classifier
+~~~~~~~~~~~~~~~~~
+
+Simple linear classification:
+
+.. code:: python
+
+    import skflow
+    from sklearn import datasets, metrics
+
+    iris = datasets.load_iris()
+    classifier = skflow.TensorFlowLinearClassifier(n_classes=3)
+    classifier.fit(iris.data, iris.target)
+    score = metrics.accuracy_score(iris.target, classifier.predict(iris.data))
+    print("Accuracy: %f" % score)
+
+Linear Regressor
+~~~~~~~~~~~~~~~~
+
+Simple linear regression:
+
+.. code:: python
+
+    import skflow
+    from sklearn import datasets, metrics, preprocessing
+
+    boston = datasets.load_boston()
+    X = preprocessing.StandardScaler().fit_transform(boston.data)
+    regressor = skflow.TensorFlowLinearRegressor()
+    regressor.fit(X, boston.target)
+    score = metrics.mean_squared_error(regressor.predict(X), boston.target)
+    print ("MSE: %f" % score)
+
+Deep Neural Network
+~~~~~~~~~~~~~~~~~~~
+
+Example of 3 layer network with 10, 20 and 10 hidden units respectively:
+
+.. code:: python
+
+    import skflow
+    from sklearn import datasets, metrics
+
+    iris = datasets.load_iris()
+    classifier = skflow.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=3)
+    classifier.fit(iris.data, iris.target)
+    score = metrics.accuracy_score(iris.target, classifier.predict(iris.data))
+    print("Accuracy: %f" % score)
+
+Custom model
+~~~~~~~~~~~~
+
+Example of how to pass a custom model to the TensorFlowEstimator:
+
+.. code:: python
+
+    import skflow
+    from sklearn import datasets, metrics
+
+    iris = datasets.load_iris()
+
+    def my_model(X, y):
+        """This is DNN with 10, 20, 10 hidden layers, and dropout of 0.5 probability."""
+        layers = skflow.ops.dnn(X, [10, 20, 10], keep_prob=0.5)
+        return skflow.models.logistic_regression(layers, y)
+
+    classifier = skflow.TensorFlowEstimator(model_fn=my_model, n_classes=3)
+    classifier.fit(iris.data, iris.target)
+    score = metrics.accuracy_score(iris.target, classifier.predict(iris.data))
+    print("Accuracy: %f" % score)
+
+Saving / Restoring models
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Each estimator has a ``save`` method which takes folder path where all model information will be saved. For restoring you can just call ``skflow.TensorFlowEstimator.restore(path)`` and it will return object of your class.
+
+Some example code:
+
+.. code:: python
+
+    import skflow
+
+    classifier = skflow.TensorFlowLinearRegression()
+    classifier.fit(...)
+    classifier.save('/tmp/tf_examples/my_model_1/')
+
+    new_classifier = TensorFlowEstimator.restore('/tmp/tf_examples/my_model_2')
+    new_classifier.predict(...)
+
+Summaries
+~~~~~~~~~
+
+To get nice visualizations and summaries you can use ``logdir`` parameter on ``fit``. It will start writing summaries for ``loss`` and histograms for variables in your model. You can also add custom summaries in your custom model function by calling ``tf.summary`` and passing Tensors to report.
+
+.. code:: python
+
+    classifier = skflow.TensorFlowLinearRegression()
+    classifier.fit(X, y, logdir='/tmp/tf_examples/my_model_1/')
+
+Then run next command in command line:
+
+.. code:: bash
+
+    tensorboard --logdir=/tmp/tf_examples/my_model_1
+
+and follow reported url.
+
+Graph visualization: |Text classification RNN Graph|
+
+Loss visualization: |Text classification RNN Loss|
+
+More examples
+-------------
+
+See examples folder for:
+
+-  Easy way to handle categorical variables - words are just an example of categorical variable.
+-  Text Classification - see examples for RNN, CNN on word and characters.
+-  Language modeling and text sequence to sequence. 
+-  Images (CNNs) - see example for digit recognition.
+-  More & deeper - different examples showing DNNs and CNNs
+
+.. |Travis-CI Build Status| image:: https://travis-ci.org/tensorflow/skflow.svg?branch=master
+   :target: https://travis-ci.org/tensorflow/skflow
+.. |Codecov Status| image:: https://codecov.io/github/tensorflow/skflow/coverage.svg?precision=2
+   :target: https://codecov.io/github/tensorflow/skflow
+.. |License| image:: https://img.shields.io/badge/license-Apache%202.0-blue.svg
+   :target: http://www.apache.org/licenses/LICENSE-2.0.html
+.. |Join the chat at https://gitter.im/tensorflow/skflow| image:: https://badges.gitter.im/Join%20Chat.svg
+   :target: https://gitter.im/tensorflow/skflow?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge
+.. |Text classification RNN Graph| image:: https://raw.githubusercontent.com/tensorflow/skflow/master/g3doc/images/text_classification_rnn_graph.png
+.. |Text classification RNN Loss| image:: https://raw.githubusercontent.com/tensorflow/skflow/master/g3doc/images/text_classification_rnn_loss.png
+.. |PyPI version| image:: https://badge.fury.io/py/skflow.svg
+   :target: http://badge.fury.io/py/skflow
diff --git a/tensorflow/contrib/skflow/python/skflow/__init__.py b/tensorflow/contrib/skflow/python/skflow/__init__.py
new file mode 100644
index 0000000000..b02ca4be6b
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/__init__.py
@@ -0,0 +1,38 @@
+"""Main Scikit Flow module."""
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+## Check existance of sklearn and it's version
+
+from __future__ import division, print_function, absolute_import
+
+try:
+    import sklearn
+except ImportError:
+    raise ImportError("Please install sklearn (pip install sklearn) to use "
+                      "skflow.")
+
+if sklearn.__version__ < '0.16.0':
+    raise ImportError("Your scikit-learn version needs to be at least 0.16. "
+                      "Your current version is %s. " % sklearn.VERSION)
+
+import numpy as np
+import tensorflow as tf
+
+from tensorflow.contrib.skflow.python.skflow.io import *
+from tensorflow.contrib.skflow.python.skflow.estimators import *
+from tensorflow.contrib.skflow.python.skflow import ops
+from tensorflow.contrib.skflow.python.skflow import preprocessing
+from tensorflow.contrib.skflow.python.skflow import models
+from tensorflow.contrib.skflow.python.skflow.trainer import TensorFlowTrainer
diff --git a/tensorflow/contrib/skflow/python/skflow/addons/__init__.py b/tensorflow/contrib/skflow/python/skflow/addons/__init__.py
new file mode 100644
index 0000000000..5fbef6ee65
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/addons/__init__.py
@@ -0,0 +1,18 @@
+"""Scikit Flow Addons."""
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+from tensorflow.contrib.skflow.python.skflow.addons.config_addon import ConfigAddon
diff --git a/tensorflow/contrib/skflow/python/skflow/addons/config_addon.py b/tensorflow/contrib/skflow/python/skflow/addons/config_addon.py
new file mode 100644
index 0000000000..ae5fef553b
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/addons/config_addon.py
@@ -0,0 +1,39 @@
+"""Configuration Addon."""
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+import tensorflow as tf
+
+class ConfigAddon(object):
+    """This class specifies the specific configurations for a session.
+
+    Parameters:
+        num_cores: Number of cores to be used. (default: 4)
+        verbose: Controls the verbosity, possible values:
+                 0: the algorithm and debug information is muted.
+                 1: trainer prints the progress.
+                 2: log device placement is printed.
+        gpu_memory_fraction: Fraction of GPU memory used by the process on
+            each GPU uniformly on the same machine.
+   """
+
+    def __init__(self, num_cores=4, verbose=1, gpu_memory_fraction=1):
+        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_memory_fraction)
+        self.config = tf.ConfigProto(log_device_placement=(verbose > 1),
+                                     inter_op_parallelism_threads=num_cores,
+                                     intra_op_parallelism_threads=num_cores,
+                                     gpu_options=gpu_options)
+    
diff --git a/tensorflow/contrib/skflow/python/skflow/estimators/__init__.py b/tensorflow/contrib/skflow/python/skflow/estimators/__init__.py
new file mode 100644
index 0000000000..bda9327406
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/estimators/__init__.py
@@ -0,0 +1,26 @@
+"""Scikit Flow Estimators."""
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+from tensorflow.contrib.skflow.python.skflow.estimators.base import TensorFlowEstimator
+from tensorflow.contrib.skflow.python.skflow.estimators.linear import TensorFlowLinearClassifier
+from tensorflow.contrib.skflow.python.skflow.estimators.linear import TensorFlowClassifier
+from tensorflow.contrib.skflow.python.skflow.estimators.linear import TensorFlowLinearRegressor
+from tensorflow.contrib.skflow.python.skflow.estimators.linear import TensorFlowRegressor
+from tensorflow.contrib.skflow.python.skflow.estimators.dnn import TensorFlowDNNClassifier
+from tensorflow.contrib.skflow.python.skflow.estimators.dnn import TensorFlowDNNRegressor
+from tensorflow.contrib.skflow.python.skflow.estimators.rnn import TensorFlowRNNClassifier
+from tensorflow.contrib.skflow.python.skflow.estimators.rnn import TensorFlowRNNRegressor
diff --git a/tensorflow/contrib/skflow/python/skflow/estimators/base.py b/tensorflow/contrib/skflow/python/skflow/estimators/base.py
new file mode 100644
index 0000000000..cb25c6f14e
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/estimators/base.py
@@ -0,0 +1,500 @@
+"""Base estimator class."""
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+import datetime
+import json
+import os
+import shutil
+from six import string_types
+
+import numpy as np
+import tensorflow as tf
+
+from google.protobuf import text_format
+
+from sklearn.base import BaseEstimator
+try:
+    from sklearn.exceptions import NotFittedError
+except ImportError:
+    from sklearn.utils.validation import NotFittedError  # pylint: disable=ungrouped-imports
+
+from ..trainer import TensorFlowTrainer, RestoredTrainer
+from ..io.data_feeder import setup_train_data_feeder
+from ..io.data_feeder import setup_predict_data_feeder
+from ..ops.dropout_ops import DROPOUTS
+from .. import monitors
+
+from ..addons.config_addon import ConfigAddon
+
+
+def _write_with_backup(filename, content):
+    if os.path.exists(filename):
+        shutil.move(filename, filename + '.old')
+    with open(filename, 'w') as f:
+        f.write(content)
+
+
+class TensorFlowEstimator(BaseEstimator):
+    """Base class for all TensorFlow estimators.
+
+    Parameters:
+        model_fn: Model function, that takes input X, y tensors and outputs
+                  prediction and loss tensors.
+        n_classes: Number of classes in the target.
+        tf_master: TensorFlow master. Empty string is default for local.
+        batch_size: Mini batch size.
+        steps: Number of steps to run over data.
+        optimizer: Optimizer name (or class), for example "SGD", "Adam",
+                   "Adagrad".
+        learning_rate: If this is constant float value, no decay function is used.
+            Instead, a customized decay function can be passed that accepts
+            global_step as parameter and returns a Tensor.
+            e.g. exponential decay function:
+            def exp_decay(global_step):
+                return tf.train.exponential_decay(
+                    learning_rate=0.1, global_step,
+                    decay_steps=2, decay_rate=0.001)
+        class_weight: None or list of n_classes floats. Weight associated with
+                     classes for loss computation. If not given, all classes are suppose to have
+                     weight one.
+        tf_random_seed: Random seed for TensorFlow initializers.
+            Setting this value, allows consistency between reruns.
+        continue_training: when continue_training is True, once initialized
+            model will be continuely trained on every call of fit.
+        config_addon: ConfigAddon object that controls the configurations of the session,
+            e.g. num_cores, gpu_memory_fraction, etc.
+        verbose: Controls the verbosity, possible values:
+                 0: the algorithm and debug information is muted.
+                 1: trainer prints the progress.
+                 2: log device placement is printed.
+        max_to_keep: The maximum number of recent checkpoint files to keep.
+            As new files are created, older files are deleted.
+            If None or 0, all checkpoint files are kept.
+            Defaults to 5 (that is, the 5 most recent checkpoint files are kept.)
+        keep_checkpoint_every_n_hours: Number of hours between each checkpoint
+            to be saved. The default value of 10,000 hours effectively disables the feature.
+    """
+
+    def __init__(self, model_fn, n_classes, tf_master="", batch_size=32,
+                 steps=200, optimizer="SGD",
+                 learning_rate=0.1, class_weight=None,
+                 tf_random_seed=42, continue_training=False,
+                 config_addon=None, verbose=1,
+                 max_to_keep=5, keep_checkpoint_every_n_hours=10000):
+
+        self.n_classes = n_classes
+        self.tf_master = tf_master
+        self.batch_size = batch_size
+        self.steps = steps
+        self.verbose = verbose
+        self.optimizer = optimizer
+        self.learning_rate = learning_rate
+        self.tf_random_seed = tf_random_seed
+        self.model_fn = model_fn
+        self.continue_training = continue_training
+        self._initialized = False
+        self.max_to_keep = max_to_keep
+        self.keep_checkpoint_every_n_hours = keep_checkpoint_every_n_hours
+        self.class_weight = class_weight
+        self.config_addon = config_addon
+
+    def _setup_training(self):
+        """Sets up graph, model and trainer."""
+        self._graph = tf.Graph()
+        self._graph.add_to_collection("IS_TRAINING", True)
+        with self._graph.as_default():
+            tf.set_random_seed(self.tf_random_seed)
+            self._global_step = tf.Variable(
+                0, name="global_step", trainable=False)
+
+            # Setting up input and output placeholders.
+            input_shape = [None] + self._data_feeder.input_shape[1:]
+            output_shape = [None] + self._data_feeder.output_shape[1:]
+            self._inp = tf.placeholder(
+                tf.as_dtype(self._data_feeder.input_dtype), input_shape,
+                name="input")
+            self._out = tf.placeholder(
+                tf.as_dtype(self._data_feeder.output_dtype), output_shape,
+                name="output")
+
+            # If class weights are provided, add them to the graph.
+            # Different loss functions can use this tensor by name.
+            if self.class_weight:
+                self._class_weight_node = tf.constant(
+                    self.class_weight, name='class_weight')
+
+            # Add histograms for X and y if they are floats.
+            if self._data_feeder.input_dtype in (np.float32, np.float64):
+                tf.histogram_summary("X", self._inp)
+            if self._data_feeder.output_dtype in (np.float32, np.float64):
+                tf.histogram_summary("y", self._out)
+
+            # Create model's graph.
+            self._model_predictions, self._model_loss = self.model_fn(
+                self._inp, self._out)
+
+            # Create summary to monitor loss
+            tf.scalar_summary("loss", self._model_loss)
+
+            # Set up a single operator to merge all the summaries
+            self._summaries = tf.merge_all_summaries()
+
+            # Create trainer and augment graph with gradients and optimizer.
+            # Additionally creates initialization ops.
+            self._trainer = TensorFlowTrainer(
+                loss=self._model_loss, global_step=self._global_step,
+                optimizer=self.optimizer, learning_rate=self.learning_rate)
+
+            # Create model's saver capturing all the nodes created up until now.
+            self._saver = tf.train.Saver(
+                max_to_keep=self.max_to_keep,
+                keep_checkpoint_every_n_hours=self.keep_checkpoint_every_n_hours)
+
+            # Enable monitor to create validation data dict with appropriate tf placeholders
+            self._monitor.create_val_feed_dict(self._inp, self._out)
+
+            # Create session to run model with.
+            if self.config_addon is None:
+                self.config_addon = ConfigAddon(verbose=self.verbose)
+            self._session = tf.Session(self.tf_master, config=self.config_addon.config)
+
+    def _setup_summary_writer(self, logdir):
+        """Sets up the summary writer to prepare for later optional visualization."""
+        self._summary_writer = tf.train.SummaryWriter(
+            os.path.join(logdir, datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')),
+            graph_def=self._session.graph_def)
+
+    def fit(self, X, y, monitor=None, logdir=None):
+        """Builds a neural network model given provided `model_fn` and training
+        data X and y.
+
+        Note: called first time constructs the graph and initializers
+        variables. Consecutives times it will continue training the same model.
+        This logic follows partial_fit() interface in scikit-learn.
+
+        To restart learning, create new estimator.
+
+        Args:
+            X: matrix or tensor of shape [n_samples, n_features...]. Can be
+            iterator that returns arrays of features. The training input
+            samples for fitting the model.
+            y: vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
+            iterator that returns array of targets. The training target values
+            (class labels in classification, real numbers in regression).
+            monitor: Monitor object to print training progress and invoke early stopping
+            logdir: the directory to save the log file that can be used for
+            optional visualization.
+
+        Returns:
+            Returns self.
+        """
+        # Sets up data feeder.
+        self._data_feeder = setup_train_data_feeder(X, y,
+                                                    self.n_classes,
+                                                    self.batch_size)
+
+        if monitor is None:
+            self._monitor = monitors.default_monitor()
+        else:
+            self._monitor = monitor
+
+        if not self.continue_training or not self._initialized:
+            # Sets up model and trainer.
+            self._setup_training()
+            # Initialize model parameters.
+            self._trainer.initialize(self._session)
+            self._initialized = True
+
+        # Sets up summary writer for later optional visualization.
+        # Due to not able to setup _summary_writer in __init__ as it's not a
+        # parameter of the model, here we need to check if such variable exists
+        # and if it's None or not (in case it was setup in a previous run).
+        # It is initialized only in the case where it wasn't before and log dir
+        # is provided.
+        if logdir:
+            if (not hasattr(self, "_summary_writer") or
+                    (hasattr(self, "_summary_writer") and self._summary_writer is None)):
+                self._setup_summary_writer(logdir)
+        else:
+            self._summary_writer = None
+
+        # Train model for given number of steps.
+        self._trainer.train(self._session,
+                            self._data_feeder.get_feed_dict_fn(
+                                self._inp, self._out),
+                            self.steps,
+                            self._monitor,
+                            self._summary_writer,
+                            self._summaries,
+                            feed_params_fn=self._data_feeder.get_feed_params)
+        return self
+
+    def partial_fit(self, X, y):
+        """Incremental fit on a batch of samples.
+
+        This method is expected to be called several times consecutively
+        on different or the same chunks of the dataset. This either can
+        implement iterative training or out-of-core/online training.
+
+        This is especially useful when the whole dataset is too big to
+        fit in memory at the same time. Or when model is taking long time
+        to converge, and you want to split up training into subparts.
+
+        Args:
+            X: matrix or tensor of shape [n_samples, n_features...]. Can be
+            iterator that returns arrays of features. The training input
+            samples for fitting the model.
+            y: vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
+            iterator that returns array of targets. The training target values
+            (class label in classification, real numbers in regression).
+
+        Returns:
+            Returns self.
+        """
+        return self.fit(X, y)
+
+    def _predict(self, X, axis=-1, batch_size=-1):
+        if not self._initialized:
+            raise NotFittedError()
+        self._graph.add_to_collection("IS_TRAINING", False)
+        predict_data_feeder = setup_predict_data_feeder(
+            X, batch_size=batch_size)
+        preds = []
+        dropouts = self._graph.get_collection(DROPOUTS)
+        feed_dict = {prob: 1.0 for prob in dropouts}
+        for data in predict_data_feeder:
+            feed_dict[self._inp] = data
+            predictions_for_batch = self._session.run(
+                self._model_predictions,
+                feed_dict)
+            if self.n_classes > 1 and axis != -1:
+                preds.append(predictions_for_batch.argmax(axis=axis))
+            else:
+                preds.append(predictions_for_batch)
+
+        return np.concatenate(preds, axis=0)
+
+    def predict(self, X, axis=1, batch_size=-1):
+        """Predict class or regression for X.
+
+        For a classification model, the predicted class for each sample in X is
+        returned. For a regression model, the predicted value based on X is
+        returned.
+
+        Args:
+            X: array-like matrix, [n_samples, n_features...] or iterator.
+            axis: Which axis to argmax for classification.
+                  By default axis 1 (next after batch) is used.
+                  Use 2 for sequence predictions.
+            batch_size: If test set is too big, use batch size to split
+                        it into mini batches. By default full dataset is used.
+
+        Returns:
+            y: array of shape [n_samples]. The predicted classes or predicted
+            value.
+        """
+        return self._predict(X, axis=axis, batch_size=batch_size)
+
+    def predict_proba(self, X, batch_size=-1):
+        """Predict class probability of the input samples X.
+
+        Args:
+            X: array-like matrix, [n_samples, n_features...] or iterator.
+            batch_size: If test set is too big, use batch size to split
+                        it into mini batches. By default full dataset is used.
+
+        Returns:
+            y: array of shape [n_samples, n_classes]. The predicted
+            probabilities for each class.
+
+        """
+        return self._predict(X, batch_size=batch_size)
+
+    def get_tensor(self, name):
+        """Returns tensor by name.
+
+        Args:
+            name: string, name of the tensor.
+
+        Returns:
+            Tensor.
+        """
+        return self._graph.get_tensor_by_name(name)
+
+    def get_tensor_value(self, name):
+        """Returns value of the tensor give by name.
+
+        Args:
+            name: string, name of the tensor.
+
+        Returns:
+            Numpy array - value of the tensor.
+        """
+        return self._session.run(self.get_tensor(name))
+
+    def save(self, path):
+        """Saves checkpoints and graph to given path.
+
+        Args:
+            path: Folder to save model to.
+        """
+        if not self._initialized:
+            raise NotFittedError()
+
+        # Currently Saver requires absolute path to work correctly.
+        path = os.path.abspath(path)
+
+        if not os.path.exists(path):
+            os.makedirs(path)
+        if not os.path.isdir(path):
+            raise ValueError("Path %s should be a directory to save"
+                             "checkpoints and graph." % path)
+        # Save model definition.
+        all_params = self.get_params()
+        params = {}
+        for key, value in all_params.items():
+            if not callable(value) and value is not None:
+                params[key] = value
+        params['class_name'] = type(self).__name__
+        model_def = json.dumps(
+            params,
+            default=lambda o: o.__dict__ if hasattr(o, '__dict__') else None)
+        _write_with_backup(os.path.join(path, 'model.def'), model_def)
+
+        # Save checkpoints.
+        endpoints = '%s\n%s\n%s\n%s' % (
+            self._inp.name,
+            self._out.name,
+            self._model_predictions.name,
+            self._model_loss.name)
+        _write_with_backup(os.path.join(path, 'endpoints'), endpoints)
+
+        # Save graph definition.
+        _write_with_backup(os.path.join(path, 'graph.pbtxt'), str(self._graph.as_graph_def()))
+
+        # Save saver defintion.
+        _write_with_backup(os.path.join(path, 'saver.pbtxt'), str(self._saver.as_saver_def()))
+
+        # Save checkpoints.
+        self._saver.save(self._session, os.path.join(path, 'model'),
+                         global_step=self._global_step)
+
+    def _restore(self, path):
+        """Restores this estimator from given path.
+
+        Note: will rebuild the graph and initialize all parameters,
+        and will ignore provided model.
+
+        Args:
+            path: Path to checkpoints and other information.
+        """
+        # Currently Saver requires absolute path to work correctly.
+        path = os.path.abspath(path)
+
+        self._graph = tf.Graph()
+        with self._graph.as_default():
+            endpoints_filename = os.path.join(path, 'endpoints')
+            if not os.path.exists(endpoints_filename):
+                raise ValueError("Restore folder doesn't contain endpoints.")
+            with open(endpoints_filename) as foutputs:
+                endpoints = foutputs.read().split('\n')
+            graph_filename = os.path.join(path, 'graph.pbtxt')
+            if not os.path.exists(graph_filename):
+                raise ValueError("Restore folder doesn't contain graph definition.")
+            with open(graph_filename) as fgraph:
+                graph_def = tf.GraphDef()
+                text_format.Merge(fgraph.read(), graph_def)
+                (self._inp, self._out,
+                 self._model_predictions, self._model_loss) = tf.import_graph_def(
+                     graph_def, name='', return_elements=endpoints)
+            saver_filename = os.path.join(path, 'saver.pbtxt')
+            if not os.path.exists(saver_filename):
+                raise ValueError("Restore folder doesn't contain saver defintion.")
+            with open(saver_filename) as fsaver:
+                saver_def = tf.train.SaverDef()
+                text_format.Merge(fsaver.read(), saver_def)
+                self._saver = tf.train.Saver(saver_def=saver_def)
+
+            # Restore trainer
+            self._global_step = self._graph.get_tensor_by_name('global_step:0')
+            trainer_op = self._graph.get_operation_by_name('train')
+            self._trainer = RestoredTrainer(
+                self._model_loss, self._global_step, trainer_op)
+
+            # Restore summaries.
+            self._summaries = self._graph.get_operation_by_name('MergeSummary/MergeSummary')
+
+            # Restore session.
+            if not isinstance(self.config_addon, ConfigAddon):
+                self.config_addon = ConfigAddon(verbose=self.verbose)
+            self._session = tf.Session(
+                self.tf_master,
+                config=self.config_addon.config)
+            checkpoint_path = tf.train.latest_checkpoint(path)
+            if checkpoint_path is None:
+                raise ValueError("Missing checkpoint files in the %s. Please "
+                                 "make sure you are you have checkpoint file that describes "
+                                 "latest checkpoints and appropriate checkpoints are there. "
+                                 "If you have moved the folder, you at this point need to "
+                                 "update manually update the paths in the checkpoint file." % path)
+            self._saver.restore(self._session, checkpoint_path)
+        # Set to be initialized.
+        self._initialized = True
+
+    # pylint: disable=unused-argument
+    @classmethod
+    def restore(cls, path, config_addon=None):
+        """Restores model from give path.
+
+        Args:
+            path: Path to the checkpoints and other model information.
+            config_addon: ConfigAddon object that controls the configurations of the session,
+                e.g. num_cores, gpu_memory_fraction, etc. This is allowed to be reconfigured.
+
+        Returns:
+            Estiamator, object of the subclass of TensorFlowEstimator.
+        """
+        model_def_filename = os.path.join(path, 'model.def')
+        if not os.path.exists(model_def_filename):
+            raise ValueError("Restore folder doesn't contain model definition.")
+        # list of parameters that are allowed to be reconfigured
+        reconfigurable_params = ['config_addon']
+        with open(model_def_filename) as fmodel:
+            model_def = json.loads(fmodel.read())
+            # TensorFlow binding requires parameters to be strings not unicode.
+            # Only issue in Python2.
+            for key, value in model_def.items():
+                if (isinstance(value, string_types) and
+                        not isinstance(value, str)):
+                    model_def[key] = str(value)
+                if key in reconfigurable_params:
+                    newValue = locals()[key]
+                    if newValue is not None:
+                        model_def[key] = newValue
+        class_name = model_def.pop('class_name')
+        if class_name == 'TensorFlowEstimator':
+            custom_estimator = TensorFlowEstimator(model_fn=None, **model_def)
+            custom_estimator._restore(path)
+            return custom_estimator
+
+        # To avoid cyclical dependencies, import inside the function instead of
+        # the beginning of the file.
+        from tensorflow.contrib.skflow.python.skflow import estimators
+        # Estimator must be one of the defined estimators in the __init__ file.
+        estimator = getattr(estimators, class_name)(**model_def)
+        estimator._restore(path)
+        return estimator
diff --git a/tensorflow/contrib/skflow/python/skflow/estimators/dnn.py b/tensorflow/contrib/skflow/python/skflow/estimators/dnn.py
new file mode 100644
index 0000000000..cf0412c304
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/estimators/dnn.py
@@ -0,0 +1,173 @@
+"""Deep Neural Network estimators."""
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+from sklearn.base import ClassifierMixin, RegressorMixin
+
+from .base import TensorFlowEstimator
+from .. import models
+
+
+class TensorFlowDNNClassifier(TensorFlowEstimator, ClassifierMixin):
+    """TensorFlow DNN Classifier model.
+
+    Parameters:
+        hidden_units: List of hidden units per layer.
+        n_classes: Number of classes in the target.
+        tf_master: TensorFlow master. Empty string is default for local.
+        batch_size: Mini batch size.
+        steps: Number of steps to run over data.
+        optimizer: Optimizer name (or class), for example "SGD", "Adam",
+                   "Adagrad".
+        learning_rate: If this is constant float value, no decay function is used.
+            Instead, a customized decay function can be passed that accepts
+            global_step as parameter and returns a Tensor.
+            e.g. exponential decay function:
+            def exp_decay(global_step):
+                return tf.train.exponential_decay(
+                    learning_rate=0.1, global_step,
+                    decay_steps=2, decay_rate=0.001)
+        class_weight: None or list of n_classes floats. Weight associated with
+                     classes for loss computation. If not given, all classes are suppose to have
+                     weight one.
+        tf_random_seed: Random seed for TensorFlow initializers.
+            Setting this value, allows consistency between reruns.
+        continue_training: when continue_training is True, once initialized
+            model will be continuely trained on every call of fit.
+        config_addon: ConfigAddon object that controls the configurations of the session,
+            e.g. num_cores, gpu_memory_fraction, etc.
+        max_to_keep: The maximum number of recent checkpoint files to keep.
+            As new files are created, older files are deleted.
+            If None or 0, all checkpoint files are kept.
+            Defaults to 5 (that is, the 5 most recent checkpoint files are kept.)
+        keep_checkpoint_every_n_hours: Number of hours between each checkpoint
+            to be saved. The default value of 10,000 hours effectively disables the feature.
+     """
+
+    def __init__(self, hidden_units, n_classes, tf_master="", batch_size=32,
+                 steps=200, optimizer="SGD", learning_rate=0.1,
+                 class_weight=None,
+                 tf_random_seed=42, continue_training=False, config_addon=None,
+                 verbose=1, max_to_keep=5, keep_checkpoint_every_n_hours=10000):
+
+        self.hidden_units = hidden_units
+        super(TensorFlowDNNClassifier, self).__init__(
+            model_fn=self._model_fn,
+            n_classes=n_classes, tf_master=tf_master,
+            batch_size=batch_size, steps=steps, optimizer=optimizer,
+            learning_rate=learning_rate, class_weight=class_weight,
+            tf_random_seed=tf_random_seed,
+            continue_training=continue_training,
+            config_addon=config_addon, verbose=verbose,
+            max_to_keep=max_to_keep,
+            keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours)
+
+    def _model_fn(self, X, y):
+        return models.get_dnn_model(self.hidden_units,
+                                    models.logistic_regression)(X, y)
+
+    @property
+    def weights_(self):
+        """Returns weights of the DNN weight layers."""
+        weights = []
+        for layer in range(len(self.hidden_units)):
+            weights.append(self.get_tensor_value('dnn/layer%d/Linear/Matrix:0' % layer))
+        weights.append(self.get_tensor_value('logistic_regression/weights:0'))
+        return weights
+
+    @property
+    def bias_(self):
+        """Returns bias of the DNN's bias layers."""
+        biases = []
+        for layer in range(len(self.hidden_units)):
+            biases.append(self.get_tensor_value('dnn/layer%d/Linear/Bias:0' % layer))
+        biases.append(self.get_tensor_value('logistic_regression/bias:0'))
+        return biases
+
+
+class TensorFlowDNNRegressor(TensorFlowEstimator, RegressorMixin):
+    """TensorFlow DNN Regressor model.
+
+    Parameters:
+        hidden_units: List of hidden units per layer.
+        tf_master: TensorFlow master. Empty string is default for local.
+        batch_size: Mini batch size.
+        steps: Number of steps to run over data.
+        optimizer: Optimizer name (or class), for example "SGD", "Adam",
+                   "Adagrad".
+        learning_rate: If this is constant float value, no decay function is used.
+            Instead, a customized decay function can be passed that accepts
+            global_step as parameter and returns a Tensor.
+            e.g. exponential decay function:
+            def exp_decay(global_step):
+                return tf.train.exponential_decay(
+                    learning_rate=0.1, global_step,
+                    decay_steps=2, decay_rate=0.001)
+        tf_random_seed: Random seed for TensorFlow initializers.
+            Setting this value, allows consistency between reruns.
+        continue_training: when continue_training is True, once initialized
+            model will be continuely trained on every call of fit.
+        config_addon: ConfigAddon object that controls the configurations of the session,
+            e.g. num_cores, gpu_memory_fraction, etc.
+        verbose: Controls the verbosity, possible values:
+                 0: the algorithm and debug information is muted.
+                 1: trainer prints the progress.
+                 2: log device placement is printed.
+        max_to_keep: The maximum number of recent checkpoint files to keep.
+            As new files are created, older files are deleted.
+            If None or 0, all checkpoint files are kept.
+            Defaults to 5 (that is, the 5 most recent checkpoint files are kept.)
+        keep_checkpoint_every_n_hours: Number of hours between each checkpoint
+            to be saved. The default value of 10,000 hours effectively disables the feature.
+   """
+
+    def __init__(self, hidden_units, n_classes=0, tf_master="", batch_size=32,
+                 steps=200, optimizer="SGD", learning_rate=0.1,
+                 tf_random_seed=42, continue_training=False, config_addon=None,
+                 verbose=1, max_to_keep=5, keep_checkpoint_every_n_hours=10000):
+
+        self.hidden_units = hidden_units
+        super(TensorFlowDNNRegressor, self).__init__(
+            model_fn=self._model_fn,
+            n_classes=n_classes, tf_master=tf_master,
+            batch_size=batch_size, steps=steps, optimizer=optimizer,
+            learning_rate=learning_rate, tf_random_seed=tf_random_seed,
+            continue_training=continue_training,
+            config_addon=config_addon, verbose=verbose,
+            max_to_keep=max_to_keep,
+            keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours)
+
+    def _model_fn(self, X, y):
+        return models.get_dnn_model(self.hidden_units,
+                                    models.linear_regression)(X, y)
+
+    @property
+    def weights_(self):
+        """Returns weights of the DNN weight layers."""
+        weights = []
+        for layer in range(len(self.hidden_units)):
+            weights.append(self.get_tensor_value('dnn/layer%d/Linear/Matrix:0' % layer))
+        weights.append(self.get_tensor_value('linear_regression/weights:0'))
+        return weights
+
+    @property
+    def bias_(self):
+        """Returns bias of the DNN's bias layers."""
+        biases = []
+        for layer in range(len(self.hidden_units)):
+            biases.append(self.get_tensor_value('dnn/layer%d/Linear/Bias:0' % layer))
+        biases.append(self.get_tensor_value('linear_regression/bias:0'))
+        return biases
diff --git a/tensorflow/contrib/skflow/python/skflow/estimators/linear.py b/tensorflow/contrib/skflow/python/skflow/estimators/linear.py
new file mode 100644
index 0000000000..dae822a77c
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/estimators/linear.py
@@ -0,0 +1,82 @@
+"""Linear Estimators."""
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+from sklearn.base import ClassifierMixin, RegressorMixin
+
+from .base import TensorFlowEstimator
+from .. import models
+
+
+class TensorFlowLinearRegressor(TensorFlowEstimator, RegressorMixin):
+    """TensorFlow Linear Regression model."""
+
+    def __init__(self, n_classes=0, tf_master="", batch_size=32, steps=200, optimizer="SGD",
+                 learning_rate=0.1, tf_random_seed=42, continue_training=False,
+                 config_addon=None, verbose=1,
+                 max_to_keep=5, keep_checkpoint_every_n_hours=10000):
+
+        super(TensorFlowLinearRegressor, self).__init__(
+            model_fn=models.linear_regression, n_classes=n_classes,
+            tf_master=tf_master,
+            batch_size=batch_size, steps=steps, optimizer=optimizer,
+            learning_rate=learning_rate, tf_random_seed=tf_random_seed,
+            continue_training=continue_training, config_addon=config_addon,
+            verbose=verbose, max_to_keep=max_to_keep,
+            keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours)
+
+    @property
+    def weights_(self):
+        """Returns weights of the linear regression."""
+        return self.get_tensor_value('linear_regression/weights:0')
+
+    @property
+    def bias_(self):
+        """Returns bias of the linear regression."""
+        return self.get_tensor_value('linear_regression/bias:0')
+
+
+class TensorFlowLinearClassifier(TensorFlowEstimator, ClassifierMixin):
+    """TensorFlow Linear Classifier model."""
+
+    def __init__(self, n_classes, tf_master="", batch_size=32, steps=200, optimizer="SGD",
+                 learning_rate=0.1, class_weight=None,
+                 tf_random_seed=42, continue_training=False, config_addon=None,
+                 verbose=1, max_to_keep=5, keep_checkpoint_every_n_hours=10000):
+
+        super(TensorFlowLinearClassifier, self).__init__(
+            model_fn=models.logistic_regression, n_classes=n_classes,
+            tf_master=tf_master,
+            batch_size=batch_size, steps=steps, optimizer=optimizer,
+            learning_rate=learning_rate, class_weight=class_weight,
+            tf_random_seed=tf_random_seed,
+            continue_training=continue_training, config_addon=config_addon,
+            verbose=verbose, max_to_keep=max_to_keep,
+            keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours)
+
+    @property
+    def weights_(self):
+        """Returns weights of the linear classifier."""
+        return self.get_tensor_value('logistic_regression/weights:0')
+
+    @property
+    def bias_(self):
+        """Returns weights of the linear classifier."""
+        return self.get_tensor_value('logistic_regression/bias:0')
+
+
+TensorFlowRegressor = TensorFlowLinearRegressor
+TensorFlowClassifier = TensorFlowLinearClassifier
diff --git a/tensorflow/contrib/skflow/python/skflow/estimators/rnn.py b/tensorflow/contrib/skflow/python/skflow/estimators/rnn.py
new file mode 100644
index 0000000000..6cf81b5b93
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/estimators/rnn.py
@@ -0,0 +1,207 @@
+"""Recurrent Neural Network estimators."""
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+from sklearn.base import ClassifierMixin, RegressorMixin
+
+from .base import TensorFlowEstimator
+from .. import models
+
+
+def null_input_op_fn(X):
+    """This function does no transformation on the inputs, used as default"""
+    return X
+
+
+class TensorFlowRNNClassifier(TensorFlowEstimator, ClassifierMixin):
+    """TensorFlow RNN Classifier model.
+
+    Parameters:
+        rnn_size: The size for rnn cell, e.g. size of your word embeddings.
+        cell_type: The type of rnn cell, including rnn, gru, and lstm.
+        num_layers: The number of layers of the rnn model.
+        input_op_fn: Function that will transform the input tensor, such as
+                     creating word embeddings, byte list, etc. This takes
+                     an argument X for input and returns transformed X.
+        bidirectional: boolean, Whether this is a bidirectional rnn.
+        sequence_length: If sequence_length is provided, dynamic calculation is performed.
+                 This saves computational time when unrolling past max sequence length.
+        initial_state: An initial state for the RNN. This must be a tensor of appropriate type
+                       and shape [batch_size x cell.state_size].
+        n_classes: Number of classes in the target.
+        tf_master: TensorFlow master. Empty string is default for local.
+        batch_size: Mini batch size.
+        steps: Number of steps to run over data.
+        optimizer: Optimizer name (or class), for example "SGD", "Adam",
+                   "Adagrad".
+        learning_rate: If this is constant float value, no decay function is used.
+            Instead, a customized decay function can be passed that accepts
+            global_step as parameter and returns a Tensor.
+            e.g. exponential decay function:
+            def exp_decay(global_step):
+                return tf.train.exponential_decay(
+                    learning_rate=0.1, global_step,
+                    decay_steps=2, decay_rate=0.001)
+        class_weight: None or list of n_classes floats. Weight associated with
+                     classes for loss computation. If not given, all classes are suppose to have
+                     weight one.
+        tf_random_seed: Random seed for TensorFlow initializers.
+            Setting this value, allows consistency between reruns.
+        continue_training: when continue_training is True, once initialized
+            model will be continuely trained on every call of fit.
+        num_cores: Number of cores to be used. (default: 4)
+        max_to_keep: The maximum number of recent checkpoint files to keep.
+            As new files are created, older files are deleted.
+            If None or 0, all checkpoint files are kept.
+            Defaults to 5 (that is, the 5 most recent checkpoint files are kept.)
+        keep_checkpoint_every_n_hours: Number of hours between each checkpoint
+            to be saved. The default value of 10,000 hours effectively disables the feature.
+     """
+
+    def __init__(self, rnn_size, n_classes, cell_type='gru', num_layers=1,
+                 input_op_fn=null_input_op_fn,
+                 initial_state=None, bidirectional=False,
+                 sequence_length=None, tf_master="", batch_size=32,
+                 steps=50, optimizer="SGD", learning_rate=0.1,
+                 class_weight=None,
+                 tf_random_seed=42, continue_training=False,
+                 config_addon=None, verbose=1,
+                 max_to_keep=5, keep_checkpoint_every_n_hours=10000):
+
+        self.rnn_size = rnn_size
+        self.cell_type = cell_type
+        self.input_op_fn = input_op_fn
+        self.bidirectional = bidirectional
+        self.num_layers = num_layers
+        self.sequence_length = sequence_length
+        self.initial_state = initial_state
+        super(TensorFlowRNNClassifier, self).__init__(
+            model_fn=self._model_fn,
+            n_classes=n_classes, tf_master=tf_master,
+            batch_size=batch_size, steps=steps, optimizer=optimizer,
+            learning_rate=learning_rate, class_weight=class_weight,
+            tf_random_seed=tf_random_seed,
+            continue_training=continue_training, config_addon=config_addon,
+            verbose=verbose,
+            max_to_keep=max_to_keep,
+            keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours)
+
+    def _model_fn(self, X, y):
+        return models.get_rnn_model(self.rnn_size, self.cell_type,
+                                    self.num_layers,
+                                    self.input_op_fn, self.bidirectional,
+                                    models.logistic_regression,
+                                    self.sequence_length,
+                                    self.initial_state)(X, y)
+
+    @property
+    def bias_(self):
+        """Returns bias of the rnn layer."""
+        return self.get_tensor_value('logistic_regression/bias:0')
+
+    @property
+    def weights_(self):
+        """Returns weights of the rnn layer."""
+        return self.get_tensor_value('logistic_regression/weights:0')
+
+
+class TensorFlowRNNRegressor(TensorFlowEstimator, RegressorMixin):
+    """TensorFlow RNN Regressor model.
+
+    Parameters:
+        rnn_size: The size for rnn cell, e.g. size of your word embeddings.
+        cell_type: The type of rnn cell, including rnn, gru, and lstm.
+        num_layers: The number of layers of the rnn model.
+        input_op_fn: Function that will transform the input tensor, such as
+                     creating word embeddings, byte list, etc. This takes
+                     an argument X for input and returns transformed X.
+        bidirectional: boolean, Whether this is a bidirectional rnn.
+        sequence_length: If sequence_length is provided, dynamic calculation is performed.
+                 This saves computational time when unrolling past max sequence length.
+        initial_state: An initial state for the RNN. This must be a tensor of appropriate type
+                       and shape [batch_size x cell.state_size].
+        tf_master: TensorFlow master. Empty string is default for local.
+        batch_size: Mini batch size.
+        steps: Number of steps to run over data.
+        optimizer: Optimizer name (or class), for example "SGD", "Adam",
+                   "Adagrad".
+        learning_rate: If this is constant float value, no decay function is used.
+            Instead, a customized decay function can be passed that accepts
+            global_step as parameter and returns a Tensor.
+            e.g. exponential decay function:
+            def exp_decay(global_step):
+                return tf.train.exponential_decay(
+                    learning_rate=0.1, global_step,
+                    decay_steps=2, decay_rate=0.001)
+        tf_random_seed: Random seed for TensorFlow initializers.
+            Setting this value, allows consistency between reruns.
+        continue_training: when continue_training is True, once initialized
+            model will be continuely trained on every call of fit.
+        num_cores: Number of cores to be used. (default: 4)
+        verbose: Controls the verbosity, possible values:
+                 0: the algorithm and debug information is muted.
+                 1: trainer prints the progress.
+                 2: log device placement is printed.
+        max_to_keep: The maximum number of recent checkpoint files to keep.
+            As new files are created, older files are deleted.
+            If None or 0, all checkpoint files are kept.
+            Defaults to 5 (that is, the 5 most recent checkpoint files are kept.)
+        keep_checkpoint_every_n_hours: Number of hours between each checkpoint
+            to be saved. The default value of 10,000 hours effectively disables the feature.
+   """
+
+    def __init__(self, rnn_size, cell_type='gru', num_layers=1,
+                 input_op_fn=null_input_op_fn, initial_state=None,
+                 bidirectional=False, sequence_length=None,
+                 n_classes=0, tf_master="", batch_size=32,
+                 steps=50, optimizer="SGD", learning_rate=0.1,
+                 tf_random_seed=42, continue_training=False,
+                 config_addon=None, verbose=1,
+                 max_to_keep=5, keep_checkpoint_every_n_hours=10000):
+
+        self.rnn_size = rnn_size
+        self.cell_type = cell_type
+        self.input_op_fn = input_op_fn
+        self.bidirectional = bidirectional
+        self.num_layers = num_layers
+        self.sequence_length = sequence_length
+        self.initial_state = initial_state
+        super(TensorFlowRNNRegressor, self).__init__(
+            model_fn=self._model_fn,
+            n_classes=n_classes, tf_master=tf_master,
+            batch_size=batch_size, steps=steps, optimizer=optimizer,
+            learning_rate=learning_rate, tf_random_seed=tf_random_seed,
+            continue_training=continue_training, config_addon=config_addon,
+            verbose=verbose, max_to_keep=max_to_keep,
+            keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours)
+
+    def _model_fn(self, X, y):
+        return models.get_rnn_model(self.rnn_size, self.cell_type,
+                                    self.num_layers,
+                                    self.input_op_fn, self.bidirectional,
+                                    models.linear_regression,
+                                    self.sequence_length,
+                                    self.initial_state)(X, y)
+
+    @property
+    def bias_(self):
+        """Returns bias of the rnn layer."""
+        return self.get_tensor_value('linear_regression/bias:0')
+
+    @property
+    def weights_(self):
+        """Returns weights of the rnn layer."""
+        return self.get_tensor_value('linear_regression/weights:0')
diff --git a/tensorflow/contrib/skflow/python/skflow/io/__init__.py b/tensorflow/contrib/skflow/python/skflow/io/__init__.py
new file mode 100644
index 0000000000..7f93552075
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/io/__init__.py
@@ -0,0 +1,19 @@
+"""Tools to allow different io formats."""
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+from tensorflow.contrib.skflow.python.skflow.io.pandas_io import *
+from tensorflow.contrib.skflow.python.skflow.io.dask_io import *
diff --git a/tensorflow/contrib/skflow/python/skflow/io/dask_io.py b/tensorflow/contrib/skflow/python/skflow/io/dask_io.py
new file mode 100644
index 0000000000..10c040fc09
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/io/dask_io.py
@@ -0,0 +1,77 @@
+"""Methods to allow dask.DataFrame."""
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+import numpy as np
+
+
+try:
+    import dask.dataframe as dd
+    allowed_classes = (dd.Series, dd.DataFrame)
+    HAS_DASK = True
+except ImportError:
+    HAS_DASK = False
+
+
+def _add_to_index(df, start):
+    """Make a new dask.dataframe where we add these values to the
+    index of each subdataframe.
+    """
+    df = df.copy()
+    df.index = df.index + start
+    return df
+
+
+def _get_divisions(df):
+    """Number of rows in each sub-dataframe"""
+    lengths = df.map_partitions(len).compute()
+    divisions = np.cumsum(lengths).tolist()
+    divisions.insert(0, 0)
+    return divisions
+
+
+def _construct_dask_df_with_divisions(df):
+    """Construct the new task graph and make a new dask.dataframe around it"""
+    divisions = _get_divisions(df)
+    name = 'csv-index' + df._name
+    dsk = {(name, i): (_add_to_index, (df._name, i), divisions[i]) for i in range(df.npartitions)}
+    from toolz import merge
+    if isinstance(df, dd.DataFrame):
+        return dd.DataFrame(merge(dsk, df.dask), name, df.columns, divisions)
+    elif isinstance(df, dd.Series):
+        return dd.Series(merge(dsk, df.dask), name, df.name, divisions)
+
+
+def extract_dask_data(data):
+    """Extract data from dask.Series or dask.DataFrame for predictors"""
+    if isinstance(data, allowed_classes):
+        return _construct_dask_df_with_divisions(data)
+    else:
+        return data
+
+
+def extract_dask_labels(labels):
+    """Extract data from dask.Series for labels"""
+    if isinstance(labels, dd.DataFrame):
+        ncol = labels.columns
+    elif isinstance(labels, dd.Series):
+        ncol = labels.name
+    if isinstance(labels, allowed_classes):
+        if len(ncol) > 1:
+            raise ValueError('Only one column for labels is allowed.')
+        return _construct_dask_df_with_divisions(labels)
+    else:
+        return labels
diff --git a/tensorflow/contrib/skflow/python/skflow/io/data_feeder.py b/tensorflow/contrib/skflow/python/skflow/io/data_feeder.py
new file mode 100644
index 0000000000..cf52afd4c7
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/io/data_feeder.py
@@ -0,0 +1,413 @@
+"""Implementations of different data feeders to provide data for TF trainer."""
+
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+import itertools
+import math
+
+import six
+from six.moves import xrange   # pylint: disable=redefined-builtin
+
+import numpy as np
+from sklearn.utils import check_array
+
+from .pandas_io import HAS_PANDAS, extract_pandas_data, extract_pandas_matrix, extract_pandas_labels
+from .dask_io import HAS_DASK, extract_dask_data, extract_dask_labels
+
+
+def _get_in_out_shape(x_shape, y_shape, n_classes, batch_size):
+    """Returns shape for input and output of the data feeder."""
+    x_shape = list(x_shape[1:]) if len(x_shape) > 1 else [1]
+    input_shape = [batch_size] + x_shape
+    y_shape = list(y_shape[1:]) if len(y_shape) > 1 else []
+    # Skip first dimention if it is 1.
+    if y_shape and y_shape[0] == 1:
+        y_shape = y_shape[1:]
+    if n_classes > 1:
+        output_shape = [batch_size] + y_shape + [n_classes]
+    else:
+        output_shape = [batch_size] + y_shape
+    return input_shape, output_shape
+
+
+def _data_type_filter(X, y):
+    """Filter data types into acceptable format"""
+    if HAS_DASK:
+        X = extract_dask_data(X)
+        y = extract_dask_labels(y)
+    if HAS_PANDAS:
+        X = extract_pandas_data(X)
+        y = extract_pandas_labels(y)
+    return X, y
+
+
+def _is_iterable(X):
+    return hasattr(X, 'next') or hasattr(X, '__next__')
+
+
+def setup_train_data_feeder(X, y, n_classes, batch_size):
+    """Create data feeder, to sample inputs from dataset.
+    If X and y are iterators, use StreamingDataFeeder.
+
+    Args:
+        X: numpy, pandas or Dask matrix or iterable.
+        y: numpy, pandas or Dask array or iterable.
+        n_classes: number of classes.
+        batch_size: size to split data into parts.
+
+    Returns:
+        DataFeeder object that returns training data.
+    """
+    X, y = _data_type_filter(X, y)
+    if HAS_DASK:
+        import dask.dataframe as dd
+        allowed_classes = (dd.Series, dd.DataFrame)
+        if isinstance(X, allowed_classes) and isinstance(y, allowed_classes):
+            data_feeder_cls = DaskDataFeeder
+        else:
+            data_feeder_cls = DataFeeder
+    else:
+        data_feeder_cls = DataFeeder
+
+    if _is_iterable(X):
+        if not _is_iterable(y):
+            raise ValueError("Both X and y should be iterators for "
+                             "streaming learning to work.")
+        data_feeder_cls = StreamingDataFeeder
+    return data_feeder_cls(X, y, n_classes, batch_size)
+
+
+def _batch_data(X, batch_size):
+    chunk = []
+    for data in X:
+        chunk.append(data)
+        if batch_size > 0 and len(chunk) >= batch_size:
+            yield np.matrix(chunk)
+            chunk = []
+    yield np.matrix(chunk)
+
+
+def setup_predict_data_feeder(X, batch_size=-1):
+    """Returns an iterable for feeding into predict step.
+
+    Args:
+        X: numpy, pandas, Dask array or iterable.
+        batch_size: Size of batches to split data into.
+            If negative, returns one batch of full size.
+
+    Returns:
+        List or iterator of parts of data to predict on.
+    """
+    if HAS_DASK:
+        X = extract_dask_data(X)
+    if HAS_PANDAS:
+        X = extract_pandas_data(X)
+    if _is_iterable(X):
+        return _batch_data(X, batch_size)
+    if len(X.shape) == 1:
+        X = np.reshape(X, (-1, 1))
+    if batch_size > 0:
+        n_batches = int(math.ceil(float(len(X)) / batch_size))
+        return [X[i * batch_size:(i + 1) * batch_size] for i in xrange(n_batches)]
+    return [X]
+
+
+def setup_processor_data_feeder(X):
+    """Sets up processor iterable.
+
+    Args:
+        X: numpy, pandas or iterable.
+
+    Returns:
+        Iterable of data to process.
+    """
+    if HAS_PANDAS:
+        X = extract_pandas_matrix(X)
+    return X
+
+
+class DataFeeder(object):
+    """Data feeder is an example class to sample data for TF trainer.
+
+    Parameters:
+        X: feature Nd numpy matrix of shape [n_samples, n_features, ...].
+        y: target vector, either floats for regression or class id for
+            classification. If matrix, will consider as a sequence
+            of targets.
+        n_classes: number of classes, 0 and 1 are considered regression.
+        batch_size: mini batch size to accumulate.
+        random_state: numpy RandomState object to reproduce sampling.
+
+    Attributes:
+        X: input features.
+        y: input target.
+        n_classes: number of classes.
+        batch_size: mini batch size to accumulate.
+        input_shape: shape of the input.
+        output_shape: shape of the output.
+        input_dtype: dtype of input.
+        output_dtype: dtype of output.
+    """
+
+    def __init__(self, X, y, n_classes, batch_size, random_state=None):
+        x_dtype = np.int64 if X.dtype == np.int64 else np.float32
+        y_dtype = np.int64 if n_classes > 1 else np.float32
+        self.X = check_array(X, ensure_2d=False,
+                             allow_nd=True, dtype=x_dtype)
+        self.y = check_array(y, ensure_2d=False, dtype=y_dtype)
+        self.n_classes = n_classes
+        self.batch_size = batch_size
+        self.input_shape, self.output_shape = _get_in_out_shape(
+            self.X.shape, self.y.shape, n_classes, batch_size)
+        # Input dtype matches dtype of X.
+        self.input_dtype = self.X.dtype
+        # Output dtype always float32 (because for classification we use
+        # one-hot vectors.
+        self.output_dtype = np.float32
+        self.random_state = np.random.RandomState(42) if random_state is None else random_state
+        self.indices = self.random_state.permutation(self.X.shape[0])
+        self.offset = 0
+        self.epoch = 0
+
+    def get_feed_params(self):
+        """Function returns a dict with data feed params while training.
+        Returns:
+            A dict with data feed params while training.
+        """
+        return {
+            'epoch': self.epoch,
+            'offset': self.offset,
+            'batch_size': self.batch_size
+        }
+
+    def get_feed_dict_fn(self, input_placeholder, output_placeholder):
+        """Returns a function, that will sample data and provide it to given
+        placeholders.
+
+        Args:
+            input_placeholder: tf.Placeholder for input features mini batch.
+            output_placeholder: tf.Placeholder for output targets.
+        Returns:
+            A function that when called samples a random subset of batch size
+            from X and y.
+        """
+        def _feed_dict_fn():
+            # take random indices
+            batch_indices = self.indices[self.offset: self.offset+self.batch_size]
+
+            # assign input features from random indices
+            inp = np.array(self.X[batch_indices]).reshape((batch_indices.shape[0], 1)) \
+                if len(self.X.shape) == 1 else self.X[batch_indices]
+
+            # assign labels from random indices
+            self.output_shape[0] = batch_indices.shape[0]
+            out = np.zeros(self.output_shape, dtype=self.output_dtype)
+            for i in xrange(out.shape[0]):
+                sample = batch_indices[i]
+                if self.n_classes > 1:
+                    if len(self.output_shape) == 2:
+                        out.itemset((i, self.y[sample]), 1.0)
+                    else:
+                        for idx, value in enumerate(self.y[sample]):
+                            out.itemset(tuple([i, idx, value]), 1.0)
+                else:
+                    out[i] = self.y[sample]
+
+            # move offset and reset it if necessary
+            self.offset += self.batch_size
+            if self.offset >= self.X.shape[0]:
+                self.indices = self.random_state.permutation(self.X.shape[0])
+                self.offset = 0
+                self.epoch += 1
+
+            return {input_placeholder.name: inp, output_placeholder.name: out}
+        return _feed_dict_fn
+
+
+class StreamingDataFeeder(object):
+    """Data feeder for TF trainer that reads data from iterator.
+
+    Streaming data feeder allows to read data as it comes it from disk or
+    somewhere else. It's custom to have this iterators rotate infinetly over
+    the dataset, to allow control of how much to learn on the trainer side.
+
+    Parameters:
+        X: iterator that returns for each element, returns features.
+        y: iterator that returns for each element, returns 1 or many classes /
+           regression values.
+        n_classes: indicator of how many classes the target has.
+        batch_size: Mini batch size to accumulate.
+
+    Attributes:
+        X: input features.
+        y: input target.
+        n_classes: number of classes.
+        batch_size: mini batch size to accumulate.
+        input_shape: shape of the input.
+        output_shape: shape of the output.
+        input_dtype: dtype of input.
+        output_dtype: dtype of output.
+    """
+
+    def __init__(self, X, y, n_classes, batch_size):
+        X_first_el = six.next(X)
+        y_first_el = six.next(y)
+        self.X = itertools.chain([X_first_el], X)
+        self.y = itertools.chain([y_first_el], y)
+        self.n_classes = n_classes
+        self.batch_size = batch_size
+        self.input_shape, self.output_shape = _get_in_out_shape(
+            [1] + list(X_first_el.shape),
+            [1] + list(y_first_el.shape), n_classes, batch_size)
+        self.input_dtype = X_first_el.dtype
+        # Convert float64 to float32, as all the parameters in the model are
+        # floats32 and there is a lot of benefits in using it in NNs.
+        if self.input_dtype == np.float64:
+            self.input_dtype = np.float32
+        # Output types are floats, due to both softmaxes and regression req.
+        self.output_dtype = np.float32
+
+    def get_feed_params(self):
+        """Function returns a dict with data feed params while training.
+        Returns:
+            A dict with data feed params while training.
+        """
+        return {'batch_size': self.batch_size}
+
+    def get_feed_dict_fn(self, input_placeholder, output_placeholder):
+        """Returns a function, that will sample data and provide it to given
+
+        placeholders.
+
+        Args:
+            input_placeholder: tf.Placeholder for input features mini batch.
+            output_placeholder: tf.Placeholder for output targets.
+        Returns:
+            A function that when called samples a random subset of batch size
+            from X and y.
+        """
+        def _feed_dict_fn():
+            inp = np.zeros(self.input_shape, dtype=self.input_dtype)
+            out = np.zeros(self.output_shape, dtype=self.output_dtype)
+            for i in xrange(self.batch_size):
+                inp[i, :] = six.next(self.X)
+                y = six.next(self.y)
+                if self.n_classes > 1:
+                    if len(self.output_shape) == 2:
+                        out.itemset((i, y), 1.0)
+                    else:
+                        for idx, value in enumerate(y):
+                            out.itemset(tuple([i, idx, value]), 1.0)
+                else:
+                    out[i] = y
+            return {input_placeholder.name: inp, output_placeholder.name: out}
+        return _feed_dict_fn
+
+
+class DaskDataFeeder(object):
+    """Data feeder for TF trainer that reads data from dask.Series and dask.DataFrame.
+
+    Numpy arrays can be serialized to disk and it's possible to do random seeks into them.
+    DaskDataFeeder will remove requirement to have full dataset in the memory and still do
+    random seeks for sampling of batches.
+
+    Parameters:
+        X: iterator that returns for each element, returns features.
+        y: iterator that returns for each element, returns 1 or many classes /
+           regression values.
+        n_classes: indicator of how many classes the target has.
+        batch_size: Mini batch size to accumulate.
+        random_state: random state for RNG. Note that it will mutate so use a int value
+            for this if you want consistent sized batches.
+
+    Attributes:
+        X: input features.
+        y: input target.
+        n_classes: number of classes.
+        batch_size: mini batch size to accumulate.
+        input_shape: shape of the input.
+        output_shape: shape of the output.
+        input_dtype: dtype of input.
+        output_dtype: dtype of output.
+    """
+    def __init__(self, X, y, n_classes, batch_size, random_state=None):
+        import dask.dataframe as dd
+        # TODO: check X and y dtypes in dask_io like pandas
+        self.X = X
+        self.y = y
+        # save column names
+        self.X_columns = list(X.columns)
+        if isinstance(y.columns[0], str):
+            self.y_columns = list(y.columns)
+        else:
+            # deal with cases where two DFs have overlapped default numeric colnames
+            self.y_columns = len(self.X_columns)+1
+            self.y = self.y.rename(columns={y.columns[0]: self.y_columns})
+        # combine into a data frame
+        self.df = dd.multi.concat([self.X, self.y], axis=1)
+        self.n_classes = n_classes
+
+        X_count = X.count().compute()[0]
+        X_shape = (X_count, len(self.X.columns))
+        y_shape = (X_count, len(self.y.columns))
+        self.sample_fraction = batch_size/float(X_count)
+        self.input_shape, self.output_shape = _get_in_out_shape(
+            X_shape, y_shape, n_classes, batch_size)
+        # self.X.dtypes[0], self.y.dtypes[self.y_columns]
+        self.input_dtype, self.output_dtype = np.float32, np.float32
+        if random_state is None:
+            self.random_state = 66
+        else:
+            self.random_state = random_state
+        self.batch_size = batch_size
+
+    def get_feed_params(self):
+        """Function returns a dict with data feed params while training.
+        Returns:
+            A dict with data feed params while training.
+        """
+        return {'batch_size': self.batch_size}
+
+    def get_feed_dict_fn(self, input_placeholder, output_placeholder):
+        """Returns a function, that will sample data and provide it to given
+        placeholders.
+
+        Args:
+            input_placeholder: tf.Placeholder for input features mini batch.
+            output_placeholder: tf.Placeholder for output targets.
+        Returns:
+            A function that when called samples a random subset of batch size
+            from X and y.
+        """
+        def _feed_dict_fn():
+            # TODO: option for with/without replacement (dev version of dask)
+            sample = self.df.random_split([self.sample_fraction, 1-self.sample_fraction],
+                                          random_state=self.random_state)
+            inp = extract_pandas_matrix(sample[0][self.X_columns].compute()).tolist()
+            out = extract_pandas_matrix(sample[0][self.y_columns].compute())
+            # convert to correct dtype
+            inp = np.array(inp, dtype=self.input_dtype)
+            # one-hot encode out for each class for cross entropy loss
+            if HAS_PANDAS:
+                import pandas as pd
+                if not isinstance(out, pd.Series):
+                    out = out.flatten()
+            out_max = self.y.max().compute().values[0]
+            encoded_out = np.zeros((out.size, out_max+1), dtype=self.output_dtype)
+            encoded_out[np.arange(out.size), out] = 1
+            return {input_placeholder.name: inp, output_placeholder.name: encoded_out}
+        return _feed_dict_fn
diff --git a/tensorflow/contrib/skflow/python/skflow/io/pandas_io.py b/tensorflow/contrib/skflow/python/skflow/io/pandas_io.py
new file mode 100644
index 0000000000..a702a13efb
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/io/pandas_io.py
@@ -0,0 +1,59 @@
+"""Methods to allow pandas.DataFrame."""
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+try:
+    import pandas as pd
+    HAS_PANDAS = True
+except ImportError:
+    HAS_PANDAS = False
+
+PANDAS_DTYPES = {'int8': 'int', 'int16': 'int', 'int32': 'int', 'int64': 'int',\
+'uint8': 'int', 'uint16': 'int', 'uint32': 'int', 'uint64': 'int', 'float16': 'float',\
+'float32': 'float', 'float64': 'float', 'bool': 'i'}
+
+
+def extract_pandas_data(data):
+    """Extract data from pandas.DataFrame for predictors"""
+    if not isinstance(data, pd.DataFrame):
+        return data
+
+    if all(dtype.name in PANDAS_DTYPES for dtype in data.dtypes):
+        return data.values.astype('float')
+    else:
+        raise ValueError('Data types for data must be int, float, or bool.')
+
+
+def extract_pandas_matrix(data):
+    """Extracts numpy matrix from pandas DataFrame."""
+    if not isinstance(data, pd.DataFrame):
+        return data
+
+    return data.as_matrix()
+
+
+def extract_pandas_labels(labels):
+    """Extract data from pandas.DataFrame for labels"""
+    if isinstance(labels, pd.DataFrame): # pandas.Series also belongs to DataFrame
+        if len(labels.columns) > 1:
+            raise ValueError('Only one column for labels is allowed.')
+
+        if all(dtype.name in PANDAS_DTYPES for dtype in labels.dtypes):
+            return labels.values.astype('float')
+        else:
+            raise ValueError('Data types for labels must be int, float, or bool.')
+    else:
+        return labels
diff --git a/tensorflow/contrib/skflow/python/skflow/models.py b/tensorflow/contrib/skflow/python/skflow/models.py
new file mode 100644
index 0000000000..525a0dbc82
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/models.py
@@ -0,0 +1,253 @@
+"""Various high level TF models."""
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+import tensorflow as tf
+
+from .ops import mean_squared_error_regressor, softmax_classifier, dnn
+
+
+def linear_regression(X, y):
+    """Creates linear regression TensorFlow subgraph.
+
+    Args:
+        X: tensor or placeholder for input features.
+        y: tensor or placeholder for target.
+
+    Returns:
+        Predictions and loss tensors.
+    """
+    with tf.variable_scope('linear_regression'):
+        tf.histogram_summary('linear_regression.X', X)
+        tf.histogram_summary('linear_regression.y', y)
+        y_shape = y.get_shape()
+        if len(y_shape) == 1:
+            output_shape = 1
+        else:
+            output_shape = y_shape[1]
+        weights = tf.get_variable('weights', [X.get_shape()[1], output_shape])
+        bias = tf.get_variable('bias', [output_shape])
+        tf.histogram_summary('linear_regression.weights', weights)
+        tf.histogram_summary('linear_regression.bias', bias)
+        return mean_squared_error_regressor(X, y, weights, bias)
+
+
+def logistic_regression(X, y, class_weight=None):
+    """Creates logistic regression TensorFlow subgraph.
+
+    Args:
+        X: tensor or placeholder for input features,
+           shape should be [batch_size, n_features].
+        y: tensor or placeholder for target,
+           shape should be [batch_size, n_classes].
+        class_weight: tensor, [n_classes], where for each class
+                      it has weight of the class. If not provided
+                      will check if graph contains tensor `class_weight:0`.
+                      If that is not provided either all ones are used.
+
+    Returns:
+        Predictions and loss tensors.
+    """
+    with tf.variable_scope('logistic_regression'):
+        tf.histogram_summary('logistic_regression.X', X)
+        tf.histogram_summary('logistic_regression.y', y)
+        weights = tf.get_variable('weights', [X.get_shape()[1],
+                                              y.get_shape()[-1]])
+        bias = tf.get_variable('bias', [y.get_shape()[-1]])
+        tf.histogram_summary('logistic_regression.weights', weights)
+        tf.histogram_summary('logistic_regression.bias', bias)
+        # If no class weight provided, try to retrieve one from pre-defined
+        # tensor name in the graph.
+        if not class_weight:
+            try:
+                class_weight = tf.get_default_graph().get_tensor_by_name('class_weight:0')
+            except KeyError:
+                pass
+        return softmax_classifier(X, y, weights, bias,
+                                  class_weight=class_weight)
+
+
+def get_dnn_model(hidden_units, target_predictor_fn):
+    """Returns a function that creates a DNN TensorFlow subgraph with given
+    params.
+
+    Args:
+        hidden_units: List of values of hidden units for layers.
+        target_predictor_fn: Function that will predict target from input
+                             features. This can be logistic regression,
+                             linear regression or any other model,
+                             that takes X, y and returns predictions and loss tensors.
+
+    Returns:
+        A function that creates the subgraph.
+    """
+    def dnn_estimator(X, y):
+        """DNN estimator with target predictor function on top."""
+        layers = dnn(X, hidden_units)
+        return target_predictor_fn(layers, y)
+    return dnn_estimator
+
+## This will be in Tensorflow 0.7.
+## TODO(ilblackdragon): Clean this up when it's released
+
+
+def _reverse_seq(input_seq, lengths):
+    """Reverse a list of Tensors up to specified lengths.
+    Args:
+        input_seq: Sequence of seq_len tensors of dimension (batch_size, depth)
+        lengths:   A tensor of dimension batch_size, containing lengths for each
+                   sequence in the batch. If "None" is specified, simply reverses
+                   the list.
+    Returns:
+        time-reversed sequence
+    """
+    if lengths is None:
+        return list(reversed(input_seq))
+
+    for input_ in input_seq:
+        input_.set_shape(input_.get_shape().with_rank(2))
+
+    # Join into (time, batch_size, depth)
+    s_joined = tf.pack(input_seq)
+
+    # Reverse along dimension 0
+    s_reversed = tf.reverse_sequence(s_joined, lengths, 0, 1)
+    # Split again into list
+    result = tf.unpack(s_reversed)
+    return result
+
+
+def bidirectional_rnn(cell_fw, cell_bw, inputs,
+                      initial_state_fw=None, initial_state_bw=None,
+                      dtype=None, sequence_length=None, scope=None):
+    """Creates a bidirectional recurrent neural network.
+    Similar to the unidirectional case (rnn) but takes input and builds
+    independent forward and backward RNNs with the final forward and backward
+    outputs depth-concatenated, such that the output will have the format
+    [time][batch][cell_fw.output_size + cell_bw.output_size]. The input_size of
+    forward and backward cell must match. The initial state for both directions
+    is zero by default (but can be set optionally) and no intermediate states are
+    ever returned -- the network is fully unrolled for the given (passed in)
+    length(s) of the sequence(s) or completely unrolled if length(s) is not given.
+    Args:
+        cell_fw: An instance of RNNCell, to be used for forward direction.
+        cell_bw: An instance of RNNCell, to be used for backward direction.
+        inputs: A length T list of inputs, each a tensor of shape
+          [batch_size, cell.input_size].
+        initial_state_fw: (optional) An initial state for the forward RNN.
+          This must be a tensor of appropriate type and shape
+          [batch_size x cell.state_size].
+        initial_state_bw: (optional) Same as for initial_state_fw.
+        dtype: (optional) The data type for the initial state.  Required if either
+          of the initial states are not provided.
+        sequence_length: (optional) An int64 vector (tensor) of size [batch_size],
+          containing the actual lengths for each of the sequences.
+        scope: VariableScope for the created subgraph; defaults to "BiRNN"
+    Returns:
+        A pair (outputs, state) where:
+          outputs is a length T list of outputs (one for each input), which
+            are depth-concatenated forward and backward outputs
+          state is the concatenated final state of the forward and backward RNN
+    Raises:
+        TypeError: If "cell_fw" or "cell_bw" is not an instance of RNNCell.
+        ValueError: If inputs is None or an empty list.
+    """
+
+    if not isinstance(cell_fw, tf.nn.rnn_cell.RNNCell):
+        raise TypeError("cell_fw must be an instance of RNNCell")
+    if not isinstance(cell_bw, tf.nn.rnn_cell.RNNCell):
+        raise TypeError("cell_bw must be an instance of RNNCell")
+    if not isinstance(inputs, list):
+        raise TypeError("inputs must be a list")
+    if not inputs:
+        raise ValueError("inputs must not be empty")
+
+    name = scope or "BiRNN"
+    # Forward direction
+    with tf.variable_scope(name + "_FW"):
+        output_fw, state_fw = tf.nn.rnn(cell_fw, inputs, initial_state_fw, dtype,
+                                        sequence_length)
+
+    # Backward direction
+    with tf.variable_scope(name + "_BW"):
+        tmp, state_bw = tf.nn.rnn(cell_bw, _reverse_seq(inputs, sequence_length),
+                                  initial_state_bw, dtype, sequence_length)
+    output_bw = _reverse_seq(tmp, sequence_length)
+    # Concat each of the forward/backward outputs
+    outputs = [tf.concat(1, [fw, bw])
+               for fw, bw in zip(output_fw, output_bw)]
+
+    return outputs, tf.concat(1, [state_fw, state_bw])
+
+# End of Tensorflow 0.7
+
+
+def get_rnn_model(rnn_size, cell_type, num_layers, input_op_fn,
+                  bidirectional, target_predictor_fn,
+                  sequence_length, initial_state):
+    """Returns a function that creates a RNN TensorFlow subgraph with given
+    params.
+
+    Args:
+        rnn_size: The size for rnn cell, e.g. size of your word embeddings.
+        cell_type: The type of rnn cell, including rnn, gru, and lstm.
+        num_layers: The number of layers of the rnn model.
+        input_op_fn: Function that will transform the input tensor, such as
+                     creating word embeddings, byte list, etc. This takes
+                     an argument X for input and returns transformed X.
+        bidirectional: boolean, Whether this is a bidirectional rnn.
+        target_predictor_fn: Function that will predict target from input
+                             features. This can be logistic regression,
+                             linear regression or any other model,
+                             that takes X, y and returns predictions and loss tensors.
+        sequence_length: If sequence_length is provided, dynamic calculation is performed.
+                         This saves computational time when unrolling past max sequence length.
+                         Required for bidirectional RNNs.
+        initial_state: An initial state for the RNN. This must be a tensor of appropriate type
+                       and shape [batch_size x cell.state_size].
+
+    Returns:
+        A function that creates the subgraph.
+    """
+    def rnn_estimator(X, y):
+        """RNN estimator with target predictor function on top."""
+        X = input_op_fn(X)
+        if cell_type == 'rnn':
+            cell_fn = tf.nn.rnn_cell.BasicRNNCell
+        elif cell_type == 'gru':
+            cell_fn = tf.nn.rnn_cell.GRUCell
+        elif cell_type == 'lstm':
+            cell_fn = tf.nn.rnn_cell.BasicLSTMCell
+        else:
+            raise ValueError("cell_type {} is not supported. ".format(cell_type))
+        if bidirectional:
+            # forward direction cell
+            rnn_fw_cell = tf.nn.rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers)
+            # backward direction cell
+            rnn_bw_cell = tf.nn.rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers)
+            # pylint: disable=unexpected-keyword-arg, no-value-for-parameter
+            _, encoding = bidirectional_rnn(rnn_fw_cell, rnn_bw_cell, X,
+                                            dtype=tf.float32,
+                                            sequence_length=sequence_length,
+                                            initial_state_fw=initial_state,
+                                            initial_state_bw=initial_state)
+        else:
+            cell = tf.nn.rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers)
+            _, encoding = tf.nn.rnn(cell, X, dtype=tf.float32,
+                                    sequence_length=sequence_length,
+                                    initial_state=initial_state)
+        return target_predictor_fn(encoding, y)
+    return rnn_estimator
diff --git a/tensorflow/contrib/skflow/python/skflow/monitors.py b/tensorflow/contrib/skflow/python/skflow/monitors.py
new file mode 100644
index 0000000000..2b8adb9bdc
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/monitors.py
@@ -0,0 +1,172 @@
+"""Monitors to track model training, report on progress and request early stopping"""
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+import sys
+import numpy as np
+
+from tensorflow.contrib.skflow.python.skflow.io.data_feeder import setup_train_data_feeder
+
+
+# pylint: disable=too-many-instance-attributes
+# pylint: disable=too-few-public-methods
+# pylint: disable=too-many-arguments
+# pylint: disable=attribute-defined-outside-init
+
+def default_monitor():
+    """returns very simple monitor object to summarize training progress"""
+    return BaseMonitor()
+
+
+class BaseMonitor(object):
+    """Base class for all learning monitors. Stores and reports training loss throughout learning
+
+    Parameters:
+        print_steps: Number of steps in between printing cost.
+        early_stopping_rounds:  Activates early stopping if this is not None.
+                                Loss needs to decrease at least every every <early_stopping_rounds>
+                                round(s) to continue training. (default: None)
+        verbose: Level of verbosity of output.
+
+    """
+    def __init__(self, print_steps=100, early_stopping_rounds=None, verbose=1):
+        self.print_steps = print_steps
+        self.early_stopping_rounds = early_stopping_rounds
+
+        self.converged = False
+        self.min_loss = np.inf
+        self.min_loss_i = 0
+        self.last_loss_seen = np.inf
+        self.steps = 0
+        self.print_train_loss_buffer = []
+        self.all_train_loss_buffer = []
+        self.verbose = verbose
+        self.epoch = None
+
+    def update(self, global_step, step_number, training_loss,
+               sess, feed_params_fn, loss_expression_tensor):
+        """Adds training_loss to monitor. Triggers printed output if appropriate
+
+            global_step:
+            step_number: current step in training
+            training_loss: float value of training loss
+            sess: session for computation (used to calculate validation loss)
+            feed_params_fn: function generating dict with information like epoch. Sometimes None.
+            loss_expression_tensor: Tensor applied to validation data to calculate val loss
+
+        """
+        self.steps = step_number
+        self.global_step = global_step
+        self.print_train_loss_buffer.append(training_loss)
+        self.all_train_loss_buffer.append(training_loss)
+        self.sess = sess
+        self.loss_expression_tensor = loss_expression_tensor
+        self._set_last_loss_seen()
+        if self.last_loss_seen < self.min_loss:
+            self.min_loss = self.last_loss_seen
+            self.min_loss_i = self.steps
+        self._set_epoch(feed_params_fn)
+        self.report()
+
+    def _set_last_loss_seen(self):
+        """Sets last_loss_seen attribute to most recent training error"""
+        self.last_loss_seen = self.all_train_loss_buffer[-1]
+
+    def report(self):
+        """Checks whether to report, and prints loss information if appropriate"""
+        if self.verbose and (self.steps % self.print_steps == 0):
+            self._set_training_summary()
+            print(self._summary_str)
+
+    def monitor_inducing_stop(self):
+        """Returns True if the monitor requests the model stop (e.g. for early stopping)"""
+        if self.early_stopping_rounds is None:
+            return False
+        stop_now = (self.steps - self.min_loss_i >= self.early_stopping_rounds)
+        if stop_now:
+            sys.stderr.write("Stopping. Best step:\n step {} with loss {}\n"
+                             .format(self.min_loss_i, self.min_loss))
+        return stop_now
+
+    def create_val_feed_dict(self, inp, out):
+        """Validation requires access to TensorFlow placeholders. Not used in this Monitor"""
+        pass
+
+    def _set_epoch(self, feed_params_fn):
+        """Sets self.epoch from a function that generates a dictionary including this info"""
+        if feed_params_fn:
+            feed_params = feed_params_fn()
+            self.epoch = feed_params['epoch'] if 'epoch' in feed_params else None
+
+    def _set_training_summary(self):
+        """Returns the string to be written describing training progress"""
+        avg_train_loss = np.mean(self.print_train_loss_buffer)
+        self.print_train_loss_buffer = []
+        if self.epoch:
+            self._summary_str = ("Step #{step}, epoch #{epoch}, avg. train loss: {loss:.5f}"
+                                 .format(step=self.steps, loss=avg_train_loss,
+                                         epoch=self.epoch))
+        else:
+            self._summary_str = ("Step #{step}, avg. train loss: {loss:.5f}"
+                                 .format(step=self.global_step,
+                                         loss=avg_train_loss))
+        self._modify_summary_string()
+
+    def _modify_summary_string(self):
+        """Makes monitor specific changes to printed summary. Nothing interesting in BaseMonitor"""
+        pass
+
+
+class ValidationMonitor(BaseMonitor):
+    """Monitor that reports score for validation data and uses validation data for early stopping
+
+        val_X: Validation features
+        val_y: Validation labels
+        n_classes: Number of labels in output. 0 for regression
+        print_steps: Number of steps in between printing cost.
+        early_stopping_rounds:  Activates early stopping if this is not None.
+                                Loss needs to decrease at least every every <early_stopping_rounds>
+                                round(s) to continue training. (default: None)
+
+    """
+    def __init__(self, val_X, val_y, n_classes=0, print_steps=100,
+                 early_stopping_rounds=None):
+        super(ValidationMonitor, self).__init__(print_steps=print_steps,
+                                                early_stopping_rounds=early_stopping_rounds)
+        self.val_feeder = setup_train_data_feeder(val_X, val_y, n_classes, -1)
+        self.print_val_loss_buffer = []
+        self.all_val_loss_buffer = []
+
+    def create_val_feed_dict(self, inp, out):
+        """Set tensorflow placeholders and create validation data feed"""
+        self.val_dict = self.val_feeder.get_feed_dict_fn(inp, out)()
+
+    def _set_last_loss_seen(self):
+        """Sets self.last_loss_seen to most recent validation loss
+
+        Also stores this value to appropriate buffers
+        """
+        [val_loss] = self.sess.run([self.loss_expression_tensor], feed_dict=self.val_dict)
+        self.last_loss_seen = val_loss
+        self.all_val_loss_buffer.append(val_loss)
+        self.print_val_loss_buffer.append(val_loss)
+
+    def _modify_summary_string(self):
+        """Adds validation data to string to print and resets validation printing buffer"""
+        avg_val_loss = np.mean(self.print_val_loss_buffer)
+        self.print_val_loss_buffer = []
+        val_loss_string = "avg. val loss: {val_loss:.5f}".format(val_loss=avg_val_loss)
+        self._summary_str = (", ".join([self._summary_str, val_loss_string]))
diff --git a/tensorflow/contrib/skflow/python/skflow/ops/__init__.py b/tensorflow/contrib/skflow/python/skflow/ops/__init__.py
new file mode 100644
index 0000000000..5a68995462
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/ops/__init__.py
@@ -0,0 +1,24 @@
+"""Various TensorFlow Ops."""
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+from __future__ import division, print_function, absolute_import
+
+from tensorflow.contrib.skflow.python.skflow.ops.array_ops import *
+from tensorflow.contrib.skflow.python.skflow.ops.conv_ops import *
+from tensorflow.contrib.skflow.python.skflow.ops.dnn_ops import *
+from tensorflow.contrib.skflow.python.skflow.ops.dropout_ops import *
+from tensorflow.contrib.skflow.python.skflow.ops.embeddings_ops import *
+from tensorflow.contrib.skflow.python.skflow.ops.losses_ops import *
+from tensorflow.contrib.skflow.python.skflow.ops.seq2seq_ops import *
+from tensorflow.contrib.skflow.python.skflow.ops.batch_norm_ops import *
diff --git a/tensorflow/contrib/skflow/python/skflow/ops/array_ops.py b/tensorflow/contrib/skflow/python/skflow/ops/array_ops.py
new file mode 100644
index 0000000000..a342387bd9
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/ops/array_ops.py
@@ -0,0 +1,73 @@
+"""TensorFlow ops for array / tensor manipulation."""
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+import tensorflow as tf
+
+
+def split_squeeze(dim, num_split, tensor_in):
+    """Splits input on given dimension and then squeezes that dimension.
+
+    Args:
+        dim: Dimension to split and squeeze on.
+        num_split: integer, the number of ways to split.
+        tensor_in: Input tensor of shape [N1, N2, .. Ndim, .. Nx].
+
+    Returns:
+        List of tensors [N1, N2, .. Ndim-1, Ndim+1, .. Nx].
+    """
+    return [tf.squeeze(t, squeeze_dims=[dim]) for t in tf.split(dim, num_split, tensor_in)]
+
+
+def expand_concat(dim, inputs):
+    """Expands inputs on given dimension and then concatenates them.
+
+    Args:
+        dim: Dimension to expand and concatenate on.
+        inputs: List of tensors of the same shape [N1, ... Nx].
+
+    Returns:
+        A tensor of shape [N1, .. Ndim, ... Nx]
+    """
+    return tf.concat(dim, [tf.expand_dims(t, dim) for t in inputs])
+
+
+def one_hot_matrix(tensor_in, num_classes, on_value=1.0, off_value=0.0):
+    """Encodes indices from given tensor as one-hot tensor.
+
+    TODO(ilblackdragon): Ideally implementation should be
+    part of TensorFlow with Eigen-native operation.
+
+    Args:
+        tensor_in: Input tensor of shape [N1, N2].
+        num_classes: Number of classes to expand index into.
+        on_value: Tensor or float, value to fill-in given index.
+        off_value: Tensor or float, value to fill-in everything else.
+    Returns:
+        Tensor of shape [N1, N2, num_classes] with 1.0 for each id in original
+        tensor.
+    """
+    tensor_in = tf.convert_to_tensor(tensor_in)
+    sparse_values = tf.to_int64(tf.reshape(tensor_in, [-1, 1]))
+    size = tf.shape(sparse_values)[0]
+    dims = tf.shape(tensor_in)
+    indices = tf.to_int64(tf.reshape(tf.range(0, size), [-1, 1]))
+    indices_values = tf.concat(1, [indices, sparse_values])
+    outshape = tf.to_int64(expand_concat(0, [size, num_classes]))
+    one_hot_vector = tf.sparse_to_dense(indices_values, outshape, on_value, off_value)
+    ret = tf.reshape(one_hot_vector, tf.concat(0, [dims, [num_classes]]))
+    ret.set_shape(tensor_in.get_shape().concatenate(num_classes))
+    return ret
diff --git a/tensorflow/contrib/skflow/python/skflow/ops/batch_norm_ops.py b/tensorflow/contrib/skflow/python/skflow/ops/batch_norm_ops.py
new file mode 100644
index 0000000000..e4cf5d8e0b
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/ops/batch_norm_ops.py
@@ -0,0 +1,57 @@
+"""TensorFlow ops for Batch Normalization."""
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+import tensorflow as tf
+
+
+def batch_normalize(tensor_in, epsilon=1e-5, convnet=True, decay=0.9,
+                    scale_after_normalization=True):
+    """Batch Normalization
+
+    Args:
+        tensor_in: input Tensor, 4D shape:
+                   [batch, in_height, in_width, in_depth].
+        epsilon : A float number to avoid being divided by 0.
+        decay: decay rate for exponential moving average.
+        convnet: Whether this is for convolutional net use. If this is True,
+                 moments will sum across axis [0, 1, 2]. Otherwise, only [0].
+        scale_after_normalization: Whether to scale after normalization.
+    """
+    shape = tensor_in.get_shape().as_list()
+
+    with tf.variable_scope("batch_norm"):
+        gamma = tf.get_variable("gamma", [shape[-1]],
+                                initializer=tf.random_normal_initializer(1., 0.02))
+        beta = tf.get_variable("beta", [shape[-1]],
+                               initializer=tf.constant_initializer(0.))
+        ema = tf.train.ExponentialMovingAverage(decay=decay)
+        if convnet:
+            assign_mean, assign_var = tf.nn.moments(tensor_in, [0, 1, 2])
+        else:
+            assign_mean, assign_var = tf.nn.moments(tensor_in, [0])
+        ema_assign_op = ema.apply([assign_mean, assign_var])
+        ema_mean, ema_var = ema.average(assign_mean), ema.average(assign_var)
+        def update_mean_var():
+            """Internal function that updates mean and variance during training"""
+            with tf.control_dependencies([ema_assign_op]):
+                return tf.identity(assign_mean), tf.identity(assign_var)
+        is_training = tf.squeeze(tf.get_collection("IS_TRAINING"))
+        mean, variance = tf.cond(
+            is_training, update_mean_var, lambda: (ema_mean, ema_var))
+        return tf.nn.batch_norm_with_global_normalization(
+            tensor_in, mean, variance, beta, gamma, epsilon,
+            scale_after_normalization=scale_after_normalization)
diff --git a/tensorflow/contrib/skflow/python/skflow/ops/conv_ops.py b/tensorflow/contrib/skflow/python/skflow/ops/conv_ops.py
new file mode 100644
index 0000000000..aa741526ba
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/ops/conv_ops.py
@@ -0,0 +1,63 @@
+"""TensorFlow ops for Convolution NNs."""
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+import tensorflow as tf
+from .batch_norm_ops import batch_normalize
+
+
+def conv2d(tensor_in, n_filters, filter_shape, strides=None, padding='SAME',
+           bias=True, activation=None, batch_norm=False):
+    """Creates 2D convolutional subgraph with bank of filters.
+
+    Uses tf.nn.conv2d under the hood.
+    Creates a filter bank:
+      [filter_shape[0], filter_shape[1], tensor_in[3], n_filters]
+    and applies it to the input tensor.
+
+    Args:
+        tensor_in: input Tensor, 4D shape:
+                   [batch, in_height, in_width, in_depth].
+        n_filters: number of filters in the bank.
+        filter_shape: Shape of filters, a list of ints, 1-D of length 2.
+        strides: A list of ints, 1-D of length 4. The stride of the sliding
+                 window for each dimension of input.
+        padding: A string: 'SAME' or 'VALID'. The type of padding algorthim to
+                 use.
+        bias: Boolean, if to add bias.
+        activation: Activation Op, optional. If provided applied on the output.
+        batch_norm: Whether to apply batch normalization.
+
+    Returns:
+        A Tensor with resulting convolution.
+    """
+    with tf.variable_scope('convolution'):
+        if strides is None:
+            strides = [1, 1, 1, 1]
+        input_shape = tensor_in.get_shape()
+        filter_shape = list(filter_shape) + [input_shape[3], n_filters]
+        filters = tf.get_variable('filters', filter_shape, tf.float32)
+        output = tf.nn.conv2d(tensor_in, filters, strides, padding)
+        if bias:
+            bias_var = tf.get_variable('bias', [1, 1, 1, n_filters],
+                                       tf.float32)
+            output = output + bias_var
+        if batch_norm:
+            output = batch_normalize(output)
+        if activation:
+            output = activation(output)
+        return output
+
diff --git a/tensorflow/contrib/skflow/python/skflow/ops/dnn_ops.py b/tensorflow/contrib/skflow/python/skflow/ops/dnn_ops.py
new file mode 100644
index 0000000000..aebadf2ecd
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/ops/dnn_ops.py
@@ -0,0 +1,44 @@
+"""TensorFlow ops for deep neural networks."""
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+import tensorflow as tf
+
+from .dropout_ops import dropout
+
+
+def dnn(tensor_in, hidden_units, activation=tf.nn.relu, keep_prob=None):
+    """Creates fully connected deep neural network subgraph.
+
+    Args:
+        tenson_in: tensor or placeholder for input features.
+        hidden_units: list of counts of hidden units in each layer.
+        activation: activation function between layers. Can be None.
+        keep_proba: if not None, will add a dropout layer with given
+                    probability.
+
+    Returns:
+        A tensor which would be a deep neural network.
+    """
+    with tf.variable_scope('dnn'):
+        for i, n_units in enumerate(hidden_units):
+            with tf.variable_scope('layer%d' % i):
+                tensor_in = tf.nn.rnn_cell.linear(tensor_in, n_units, True)
+                if activation:
+                    tensor_in = activation(tensor_in)
+                if keep_prob:
+                    tensor_in = dropout(tensor_in, keep_prob)
+        return tensor_in
diff --git a/tensorflow/contrib/skflow/python/skflow/ops/dropout_ops.py b/tensorflow/contrib/skflow/python/skflow/ops/dropout_ops.py
new file mode 100644
index 0000000000..35f295010e
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/ops/dropout_ops.py
@@ -0,0 +1,45 @@
+"""Dropout operations and handling."""
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+import tensorflow as tf
+
+
+# Key to collect dropout probabilities.
+DROPOUTS = "dropouts"
+
+
+def dropout(tensor_in, prob, name=None):
+    """Adds dropout node and stores probability tensor into graph collection.
+
+    Args:
+        tensor_in: Input tensor.
+        prob: Float or Tensor.
+
+    Returns:
+        Tensor of the same shape of `tensor_in`.
+
+    Raises:
+        ValueError: If `keep_prob` is not in `(0, 1]`.
+    """
+    with tf.op_scope([tensor_in], name, "dropout") as name:
+        if isinstance(prob, float):
+            prob = tf.get_variable("prob", [],
+                                   initializer=tf.constant_initializer(prob),
+                                   trainable=False)
+        tf.add_to_collection(DROPOUTS, prob)
+        return tf.nn.dropout(tensor_in, prob)
+
diff --git a/tensorflow/contrib/skflow/python/skflow/ops/embeddings_ops.py b/tensorflow/contrib/skflow/python/skflow/ops/embeddings_ops.py
new file mode 100644
index 0000000000..fa4c632606
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/ops/embeddings_ops.py
@@ -0,0 +1,77 @@
+"""TensorFlow Ops to work with embeddings.
+
+Note: categorical variables are handled via embeddings in many cases.
+For example, in case of words.
+"""
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+import tensorflow as tf
+
+
+def embedding_lookup(params, ids, name="embedding_lookup"):
+    """Provides a N dimensional version of tf.embedding_lookup.
+
+    Ids are flattened to a 1d tensor before being passed to embedding_lookup
+    then, they are unflattend to match the original ids shape plus an extra
+    leading dimension of the size of the embeddings.
+
+    Args:
+        params: List of tensors of size D0 x D1 x ... x Dn-2 x Dn-1.
+        ids: N-dimensional tensor of B0 x B1 x .. x Bn-2 x Bn-1.
+             Must contain indexes into params.
+        name: Optional name for the op.
+
+    Returns:
+        A tensor of size B0 x B1 x .. x Bn-2 x Bn-1 x D1 x ... x Dn-2 x Dn-1
+        containing the values from the params tensor(s) for indecies in ids.
+
+    Raises:
+        ValueError: if some parameters are invalid.
+    """
+    with tf.op_scope([params, ids], name, "embedding_lookup"):
+        params = tf.convert_to_tensor(params)
+        ids = tf.convert_to_tensor(ids)
+        shape = tf.shape(ids)
+        ids_flat = tf.reshape(ids, tf.reduce_prod(shape, keep_dims=True))
+        embeds_flat = tf.nn.embedding_lookup(params, ids_flat, name)
+        embed_shape = tf.concat(0, [shape, [-1]])
+        embeds = tf.reshape(embeds_flat, embed_shape)
+        embeds.set_shape(ids.get_shape().concatenate(params.get_shape()[1:]))
+        return embeds
+
+
+def categorical_variable(tensor_in, n_classes, embedding_size, name):
+    """Creates an embedding for categorical variable with given number of
+    classes.
+
+    Args:
+        tensor_in: Input tensor with class identifier (can be batch or
+            N-dimensional).
+        n_classes: Number of classes.
+        embedding_size: Size of embedding vector to represent each class.
+        name: Name of this categorical variable.
+    Returns:
+        Tensor of input shape, with additional dimension for embedding.
+
+    Example:
+        Calling categorical_variable([1, 2], 5, 10, "my_cat"), will return 2 x 10
+        tensor, where each row is representation of the class.
+    """
+    with tf.variable_scope(name):
+        embeddings = tf.get_variable(
+            name + "_embeddings", [n_classes, embedding_size])
+        return embedding_lookup(embeddings, tensor_in)
diff --git a/tensorflow/contrib/skflow/python/skflow/ops/losses_ops.py b/tensorflow/contrib/skflow/python/skflow/ops/losses_ops.py
new file mode 100644
index 0000000000..64c2d7b49c
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/ops/losses_ops.py
@@ -0,0 +1,56 @@
+"""TensorFlow Ops for loss computation."""
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+import tensorflow as tf
+
+
+def mean_squared_error_regressor(tensor_in, labels, weights, biases, name=None):
+    """Returns prediction and loss for mean squared error regression."""
+    with tf.op_scope([tensor_in, labels], name, "mean_squared_error_regressor"):
+        predictions = tf.nn.xw_plus_b(tensor_in, weights, biases)
+        if len(labels.get_shape()) == 1:
+            labels = tf.reshape(labels, [-1, 1])
+        diff = labels - predictions
+        loss = tf.reduce_mean(tf.mul(diff, diff))
+        return predictions, loss
+
+
+def softmax_classifier(tensor_in, labels, weights, biases, class_weight=None, name=None):
+    """Returns prediction and loss for softmax classifier.
+
+    Args:
+        tensor_in: Input tensor, [batch_size, feature_size], features.
+        labels: Tensor, [batch_size, n_classes], labels of the output classes.
+        weights: Tensor, [batch_size, feature_size], linear transformation matrix.
+        biases: Tensor, [batch_size], biases.
+        class_weight: Tensor, optional, [n_classes], weight for each class.
+                      If not given, all classes are supposed to have weight
+                      one.
+
+    Returns:
+        Prediction and loss tensors.
+    """
+    with tf.op_scope([tensor_in, labels], name, "softmax_classifier"):
+        logits = tf.nn.xw_plus_b(tensor_in, weights, biases)
+        if class_weight is not None:
+            logits = tf.mul(logits, class_weight)
+        xent = tf.nn.softmax_cross_entropy_with_logits(logits,
+                                                       labels,
+                                                       name="xent_raw")
+        loss = tf.reduce_mean(xent, name="xent")
+        predictions = tf.nn.softmax(logits, name=name)
+        return predictions, loss
diff --git a/tensorflow/contrib/skflow/python/skflow/ops/seq2seq_ops.py b/tensorflow/contrib/skflow/python/skflow/ops/seq2seq_ops.py
new file mode 100644
index 0000000000..e337a3a3be
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/ops/seq2seq_ops.py
@@ -0,0 +1,133 @@
+"""TensorFlow Ops for Sequence to Sequence models."""
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+import tensorflow as tf
+
+from . import array_ops
+
+
+def sequence_classifier(decoding, labels, sampling_decoding=None, name=None):
+    """Returns predictions and loss for sequence of predictions.
+
+    Args:
+        decoding: List of Tensors with predictions.
+        labels: List of Tensors with labels.
+        sampling_decoding: Optional, List of Tensor with predictions to be used
+                           in sampling. E.g. they shouldn't have dependncy on ouptuts.
+                           If not provided, decoding is used.
+
+    Returns:
+        Predictions and losses tensors.
+    """
+    with tf.op_scope([decoding, labels], name, "sequence_classifier"):
+        predictions, xent_list = [], []
+        for i, pred in enumerate(decoding):
+            xent_list.append(
+                tf.nn.softmax_cross_entropy_with_logits(
+                    pred, labels[i], name="sequence_loss/xent_raw{0}".format(i)))
+            if sampling_decoding:
+                predictions.append(tf.nn.softmax(sampling_decoding[i]))
+            else:
+                predictions.append(tf.nn.softmax(pred))
+        xent = tf.add_n(xent_list, name="sequence_loss/xent")
+        loss = tf.reduce_sum(xent, name="sequence_loss")
+        return array_ops.expand_concat(1, predictions), loss
+
+
+def seq2seq_inputs(X, y, input_length, output_length, sentinel=None, name=None):
+    """Processes inputs for Sequence to Sequence models.
+
+    Args:
+        X: Input Tensor [batch_size, input_length, embed_dim].
+        y: Output Tensor [batch_size, output_length, embed_dim].
+        input_length: length of input X.
+        output_length: length of output y.
+        sentinel: optional first input to decoder and final output expected.
+                  if sentinel is not provided, zeros are used.
+                  Due to fact that y is not available in sampling time, shape
+                  of sentinel will be inferred from X.
+
+    Returns:
+        Encoder input from X, and decoder inputs and outputs from y.
+    """
+    with tf.op_scope([X, y], name, "seq2seq_inputs"):
+        in_X = array_ops.split_squeeze(1, input_length, X)
+        y = array_ops.split_squeeze(1, output_length, y)
+        if not sentinel:
+            # Set to zeros of shape of y[0], using X for batch size.
+            sentinel_shape = tf.pack([tf.shape(X)[0], y[0].get_shape()[1]])
+            sentinel = tf.zeros(sentinel_shape)
+            sentinel.set_shape(y[0].get_shape())
+        in_y = [sentinel] + y
+        out_y = y + [sentinel]
+        return in_X, in_y, out_y
+
+
+def rnn_decoder(decoder_inputs, initial_state, cell, scope=None):
+    """RNN Decoder that creates training and sampling sub-graphs.
+
+    Args:
+        decoder_inputs: Inputs for decoder, list of tensors.
+                        This is used only in trianing sub-graph.
+        initial_state: Initial state for the decoder.
+        cell: RNN cell to use for decoder.
+        scope: Scope to use, if None new will be produced.
+
+    Returns:
+        List of tensors for outputs and states for training and sampling sub-graphs.
+    """
+    with tf.variable_scope(scope or "dnn_decoder"):
+        states, sampling_states = [initial_state], [initial_state]
+        outputs, sampling_outputs = [], []
+        with tf.op_scope([decoder_inputs, initial_state], "training"):
+            for i, inp in enumerate(decoder_inputs):
+                if i > 0:
+                    tf.get_variable_scope().reuse_variables()
+                output, new_state = cell(inp, states[-1])
+                outputs.append(output)
+                states.append(new_state)
+        with tf.op_scope([initial_state], "sampling"):
+            for i, _ in enumerate(decoder_inputs):
+                if i == 0:
+                    sampling_outputs.append(outputs[i])
+                    sampling_states.append(states[i])
+                else:
+                    sampling_output, sampling_state = cell(
+                        sampling_outputs[-1], sampling_states[-1])
+                    sampling_outputs.append(sampling_output)
+                    sampling_states.append(sampling_state)
+    return outputs, states, sampling_outputs, sampling_states
+
+
+def rnn_seq2seq(encoder_inputs, decoder_inputs, encoder_cell, decoder_cell=None,
+                dtype=tf.float32, scope=None):
+    """RNN Sequence to Sequence model.
+
+    Args:
+        encoder_inputs: List of tensors, inputs for encoder.
+        decoder_inputs: List of tensors, inputs for decoder.
+        encoder_cell: RNN cell to use for encoder.
+        decoder_cell: RNN cell to use for decoder, if None encoder_cell is used.
+        dtype: Type to initialize encoder state with.
+        scope: Scope to use, if None new will be produced.
+
+    Returns:
+        List of tensors for outputs and states for trianing and sampling sub-graphs.
+    """
+    with tf.variable_scope(scope or "rnn_seq2seq"):
+        _, last_enc_state = tf.nn.rnn(encoder_cell, encoder_inputs, dtype=dtype)
+        return rnn_decoder(decoder_inputs, last_enc_state, decoder_cell or encoder_cell)
diff --git a/tensorflow/contrib/skflow/python/skflow/ops/tests/__init__.py b/tensorflow/contrib/skflow/python/skflow/ops/tests/__init__.py
new file mode 100644
index 0000000000..d5daa96572
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/ops/tests/__init__.py
@@ -0,0 +1,14 @@
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+from __future__ import division, print_function, absolute_import
diff --git a/tensorflow/contrib/skflow/python/skflow/ops/tests/test_dropout_ops.py b/tensorflow/contrib/skflow/python/skflow/ops/tests/test_dropout_ops.py
new file mode 100644
index 0000000000..5516951701
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/ops/tests/test_dropout_ops.py
@@ -0,0 +1,44 @@
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+import numpy as np
+import tensorflow as tf
+
+from tensorflow.contrib.skflow.python.skflow import ops
+
+
+class DropoutTest(tf.test.TestCase):
+
+    def test_dropout_float(self):
+        with self.test_session() as session:
+            x = tf.placeholder(tf.float32, [5, 5])
+            y = ops.dropout(x, 0.5)
+            probs = tf.get_collection(ops.DROPOUTS)
+            session.run(tf.initialize_all_variables())
+            self.assertEqual(len(probs), 1)
+            self.assertEqual(session.run(probs[0]), 0.5)
+
+    def test_dropout_tensor(self):
+        with self.test_session():
+            x = tf.placeholder(tf.float32, [5, 5])
+            y = tf.get_variable("prob", [], initializer=tf.constant_initializer(0.5))
+            z = ops.dropout(x, y)
+            probs = tf.get_collection(ops.DROPOUTS)
+            self.assertEqual(probs, [y])
+
+
+if __name__ == '__main__':
+    tf.test.main()
diff --git a/tensorflow/contrib/skflow/python/skflow/ops/tests/test_ops.py b/tensorflow/contrib/skflow/python/skflow/ops/tests/test_ops.py
new file mode 100644
index 0000000000..17878905e5
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/ops/tests/test_ops.py
@@ -0,0 +1,94 @@
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+import numpy as np
+import tensorflow as tf
+
+from tensorflow.contrib.skflow.python.skflow import ops
+
+
+class OpsTest(tf.test.TestCase):
+
+    def test_softmax_classifier(self):
+        with self.test_session() as session:
+            features = tf.placeholder(tf.float32, [None, 3])
+            labels = tf.placeholder(tf.float32, [None, 2])
+            weights = tf.constant([[0.1, 0.1], [0.1, 0.1], [0.1, 0.1]])
+            biases = tf.constant([0.2, 0.3])
+            class_weight = tf.constant([0.1, 0.9])
+            prediction, loss = ops.softmax_classifier(features, labels, weights, biases, class_weight)
+            self.assertEqual(prediction.get_shape()[1], 2)
+            self.assertEqual(loss.get_shape(), [])
+            value = session.run(loss, {features: [[0.2, 0.3, 0.2]], labels: [[0, 1]]})
+            self.assertAllClose(value, 0.55180627)
+ 
+    def test_embedding_lookup(self):
+        d_embed = 5
+        n_embed = 10
+        ids_shape = (2, 3, 4)
+        embeds = np.random.randn(n_embed, d_embed)
+        ids = np.random.randint(0, n_embed, ids_shape)
+        with self.test_session():
+            embed_np = embeds[ids]
+            embed_tf = ops.embedding_lookup(embeds, ids).eval()
+        self.assertEqual(embed_np.shape, embed_tf.shape)
+        self.assertAllClose(embed_np, embed_tf)
+
+    def test_categorical_variable(self):
+        tf.set_random_seed(42)
+        with self.test_session() as sess:
+            cat_var_idx = tf.placeholder(tf.int64, [2, 2])
+            embeddings = ops.categorical_variable(cat_var_idx, n_classes=5,
+                                                  embedding_size=10,
+                                                  name="my_cat_var")
+            sess.run(tf.initialize_all_variables())
+            emb1 = sess.run(embeddings, feed_dict={cat_var_idx.name: [[0, 1],
+                                                                      [2, 3]]})
+            emb2 = sess.run(embeddings, feed_dict={cat_var_idx.name: [[0, 2],
+                                                                      [1, 3]]})
+        self.assertEqual(emb1.shape, emb2.shape)
+        self.assertAllEqual(np.transpose(emb2, axes=[1, 0, 2]), emb1)
+
+    def test_conv2d(self):
+        tf.set_random_seed(42)
+        batch_size = 32
+        input_shape = (10, 10)
+        n_filters = 7
+        filter_shape = (5, 5)
+        vals = np.random.randn(batch_size, input_shape[0], input_shape[1], 1)
+        with self.test_session() as sess:
+            tf.add_to_collection("IS_TRAINING", True)
+            tensor_in = tf.placeholder(tf.float32, [batch_size, input_shape[0],
+                                                    input_shape[1], 1])
+            res = ops.conv2d(
+                tensor_in, n_filters, filter_shape, batch_norm=True)
+            sess.run(tf.initialize_all_variables())
+            conv = sess.run(res, feed_dict={tensor_in.name: vals})
+        self.assertEqual(conv.shape, (batch_size, input_shape[0],
+                                      input_shape[1], n_filters))
+
+    def test_one_hot_matrix(self):
+        with self.test_session() as sess:
+            tensor_in = tf.placeholder(tf.int64, [10, 2])
+            one_hot_tensor = ops.one_hot_matrix(tensor_in, 3)
+            res = sess.run(ops.one_hot_matrix([[0, 1], [2, 1]], 3))
+        self.assertAllEqual(one_hot_tensor.get_shape(), [10, 2, 3])
+        self.assertAllEqual(res, [[[1.0, 0, 0], [0, 1.0, 0]], 
+                                  [[0, 0, 1.0], [0, 1.0, 0]]])
+
+
+if __name__ == '__main__':
+    tf.test.main()
diff --git a/tensorflow/contrib/skflow/python/skflow/ops/tests/test_seq2seq_ops.py b/tensorflow/contrib/skflow/python/skflow/ops/tests/test_seq2seq_ops.py
new file mode 100644
index 0000000000..34797cebca
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/ops/tests/test_seq2seq_ops.py
@@ -0,0 +1,83 @@
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+import numpy as np
+import tensorflow as tf
+
+from tensorflow.contrib.skflow.python.skflow import ops
+
+
+class Seq2SeqOpsTest(tf.test.TestCase):
+
+    def test_sequence_classifier(self):
+        with self.test_session() as session:
+            decoding = [tf.placeholder(tf.float32, [2, 2]) for _ in range(3)]
+            labels = [tf.placeholder(tf.float32, [2, 2]) for _ in range(3)]
+            sampling_decoding = [tf.placeholder(tf.float32, [2, 2]) for _ in range(3)]
+            predictions, loss = ops.sequence_classifier(decoding, labels, sampling_decoding)
+            pred, cost = session.run([predictions, loss], feed_dict={
+                decoding[0].name: [[0.1, 0.9], [0.7, 0.3]],
+                decoding[1].name: [[0.9, 0.1], [0.8, 0.2]],
+                decoding[2].name: [[0.5, 0.5], [0.4, 0.6]],
+                labels[0].name: [[1, 0], [0, 1]],
+                labels[1].name: [[1, 0], [0, 1]],
+                labels[2].name: [[1, 0], [0, 1]],
+                sampling_decoding[0].name: [[0.1, 0.9], [0.7, 0.3]],
+                sampling_decoding[1].name: [[0.9, 0.1], [0.8, 0.2]],
+                sampling_decoding[2].name: [[0.5, 0.5], [0.4, 0.6]],
+            })
+        self.assertAllEqual(pred.argmax(axis=2), [[1, 0, 0], [0, 0, 1]])
+        self.assertAllClose(cost, 4.7839908599)
+
+    def test_seq2seq_inputs(self):
+        inp = np.array([[[1, 0], [0, 1], [1, 0]], [[0, 1], [1, 0], [0, 1]]])
+        out = np.array([[[0, 1, 0], [1, 0, 0]], [[1, 0, 0], [0, 1, 0]]])
+        with self.test_session() as session:
+            X = tf.placeholder(tf.float32, [2, 3, 2])
+            y = tf.placeholder(tf.float32, [2, 2, 3])
+            in_X, in_y, out_y = ops.seq2seq_inputs(X, y, 3, 2)
+            enc_inp = session.run(in_X, feed_dict={X.name: inp})
+            dec_inp = session.run(in_y, feed_dict={X.name: inp, y.name: out})
+            dec_out = session.run(out_y, feed_dict={X.name: inp, y.name: out})
+        # Swaps from batch x len x height to list of len of batch x height.
+        self.assertAllEqual(enc_inp, np.swapaxes(inp, 0, 1))
+        self.assertAllEqual(dec_inp, [[[0, 0, 0], [0, 0, 0]], 
+                                      [[0, 1, 0], [1, 0, 0]],
+                                      [[1, 0, 0], [0, 1, 0]]]) 
+        self.assertAllEqual(dec_out, [[[0, 1, 0], [1, 0, 0]],
+                                      [[1, 0, 0], [0, 1, 0]],
+                                      [[0, 0, 0], [0, 0, 0]]]) 
+
+    def test_rnn_decoder(self):
+        with self.test_session() as session:
+            decoder_inputs = [
+                tf.placeholder(tf.float32, [2, 2]) for _ in range(3)]
+            encoding = tf.placeholder(tf.float32, [2, 2])
+            cell = tf.nn.rnn_cell.GRUCell(2)
+            outputs, states, sampling_outputs, sampling_states = (
+                ops.rnn_decoder(decoder_inputs, encoding, cell))
+            self.assertEqual(len(outputs), 3)
+            self.assertEqual(outputs[0].get_shape(), [2, 2])
+            self.assertEqual(len(states), 4)
+            self.assertEqual(states[0].get_shape(), [2, 2])
+            self.assertEqual(len(sampling_outputs), 3)
+            self.assertEqual(sampling_outputs[0].get_shape(), [2, 2])
+            self.assertEqual(len(sampling_states), 4)
+            self.assertEqual(sampling_states[0].get_shape(), [2, 2])
+
+
+if __name__ == '__main__':
+    tf.test.main()
diff --git a/tensorflow/contrib/skflow/python/skflow/preprocessing/__init__.py b/tensorflow/contrib/skflow/python/skflow/preprocessing/__init__.py
new file mode 100644
index 0000000000..ea4d97ea6f
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/preprocessing/__init__.py
@@ -0,0 +1,19 @@
+"""Preprocessing tools useful for building models."""
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+from tensorflow.contrib.skflow.python.skflow.preprocessing.text import *
+from tensorflow.contrib.skflow.python.skflow.preprocessing.categorical import *
diff --git a/tensorflow/contrib/skflow/python/skflow/preprocessing/categorical.py b/tensorflow/contrib/skflow/python/skflow/preprocessing/categorical.py
new file mode 100644
index 0000000000..b32683e18b
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/preprocessing/categorical.py
@@ -0,0 +1,123 @@
+"""Implements preprocesing transformers for categorical variables."""
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+import math
+import numpy as np
+
+from . import categorical_vocabulary
+from ..io.data_feeder import setup_processor_data_feeder
+
+
+class CategoricalProcessor(object):
+    """Maps documents to sequences of word ids.
+
+    As a common convention, Nan values are handled as unknown tokens.
+    Both float('nan') and np.nan are accepted.
+
+    Parameters:
+        min_frequency: Minimum frequency of categories in the vocabulary.
+        share: Share vocabulary between variables.
+        vocabularies: list of CategoricalVocabulary objects for each variable in
+                    the input dataset.
+
+    Attributes:
+        vocabularies_: list of CategoricalVocabulary objects.
+    """
+
+    def __init__(self, min_frequency=0, share=False, vocabularies=None):
+        self.min_frequency = min_frequency
+        self.share = share
+        self.vocabularies_ = vocabularies
+
+    def freeze(self, freeze=True):
+        """Freeze or unfreeze all vocabularies.
+
+        Args:
+            freeze: Boolean, indicate if vocabularies should be frozen.
+        """
+        for vocab in self.vocabularies_:
+            vocab.freeze(freeze)
+
+    def fit(self, X, unused_y=None):
+        """Learn a vocabulary dictionary of all categories in X.
+
+        Args:
+            raw_documents: numpy matrix or iterable of lists/numpy arrays.
+            unused_y: to match fit format signature of estimators.
+
+        Returns:
+            self
+        """
+        X = setup_processor_data_feeder(X)
+        for row in X:
+            # Create vocabularies if not given.
+            if self.vocabularies_ is None:
+                # If not share, one per column, else one shared across.
+                if not self.share:
+                    self.vocabularies_ = [
+                        categorical_vocabulary.CategoricalVocabulary() for _ in row]
+                else:
+                    vocab = categorical_vocabulary.CategoricalVocabulary()
+                    self.vocabularies_ = [vocab for _ in row]
+            for idx, value in enumerate(row):
+                # Nans are handled as unknowns.
+                if (isinstance(value, float) and math.isnan(value)) or value == np.nan:
+                    continue
+                self.vocabularies_[idx].add(value)
+        if self.min_frequency > 0:
+            for vocab in self.vocabularies_:
+                vocab.trim(self.min_frequency)
+        self.freeze()
+        return self
+
+    def fit_transform(self, X, unused_y=None):
+        """Learn the vocabulary dictionary and return indexies of categories.
+
+        Args:
+            X: numpy matrix or iterable of lists/numpy arrays.
+            unused_y: to match fit_transform signature of estimators.
+
+        Returns:
+            X: iterable, [n_samples]. Category-id matrix.
+        """
+        self.fit(X)
+        return self.transform(X)
+
+    def transform(self, X):
+        """Transform documents to category-id matrix.
+
+        Converts categories to ids give fitted vocabulary from `fit` or
+        one provided in the constructor.
+
+        Args:
+            X: numpy matrix or iterable of lists/numpy arrays.
+
+        Returns:
+            X: iterable, [n_samples]. Category-id matrix.
+        """
+        self.freeze()
+        X = setup_processor_data_feeder(X)
+        for row in X:
+            output_row = []
+            for idx, value in enumerate(row):
+                # Return <UNK> when it's Nan.
+                if (isinstance(value, float) and math.isnan(value)) or value == np.nan:
+                    output_row.append(0)
+                    continue
+                output_row.append(self.vocabularies_[idx].get(value))
+            yield np.array(output_row, dtype=np.int64)
+
diff --git a/tensorflow/contrib/skflow/python/skflow/preprocessing/categorical_vocabulary.py b/tensorflow/contrib/skflow/python/skflow/preprocessing/categorical_vocabulary.py
new file mode 100644
index 0000000000..cf0503bda9
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/preprocessing/categorical_vocabulary.py
@@ -0,0 +1,133 @@
+"""Categorical vocabulary classes to map categories to indexes.
+
+Can be used for categorical variables, sparse variables and words.
+"""
+
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+import collections
+import six
+
+
+class CategoricalVocabulary(object):
+    """Categorical variables vocabulary class.
+
+    Accumulates and provides mapping from classes to indexes.
+    Can be easily used for words.
+    """
+
+    def __init__(self, unknown_token='<UNK>', support_reverse=True):
+        self._unknown_token = unknown_token
+        self._mapping = {unknown_token: 0}
+        self._support_reverse = support_reverse
+        if support_reverse:
+            self._reverse_mapping = [unknown_token]
+        self._freq = collections.defaultdict(int)
+        self._freeze = False
+
+    def __len__(self):
+        """Returns total count of mappings. Including unknown token."""
+        return len(self._mapping)
+
+    def freeze(self, freeze=True):
+        """Freezes the vocabulary, after which new words return unknown token id.
+
+        Args:
+            freeze: True to freeze, False to unfreeze.
+        """
+        self._freeze = freeze
+
+    def get(self, category):
+        """Returns word's id in the vocabulary.
+
+        If category is new, creates a new id for it.
+
+        Args:
+            category: string or integer to lookup in vocabulary.
+
+        Returns:
+            interger, id in the vocabulary.
+        """
+        if category not in self._mapping:
+            if self._freeze:
+                return 0
+            self._mapping[category] = len(self._mapping)
+            if self._support_reverse:
+                self._reverse_mapping.append(category)
+        return self._mapping[category]
+
+    def add(self, category, count=1):
+        """Adds count of the category to the frequency table.
+
+        Args:
+            category: string or integer, category to add frequency to.
+            count: optional integer, how many to add.
+        """
+        category_id = self.get(category)
+        if category_id <= 0:
+            return
+        self._freq[category] += count
+
+    def trim(self, min_frequency, max_frequency=-1):
+        """Trims vocabulary for minimum frequency.
+
+        Remaps ids from 1..n in sort frequency order.
+        where n - number of elements left.
+
+        Args:
+            min_frequency: minimum frequency to keep.
+            max_frequency: optional, maximum frequency to keep.
+                Useful to remove very frequent categories (like stop words).
+        """
+        # Sort by alphabet then reversed frequency.
+        self._freq = sorted(
+            sorted(six.iteritems(self._freq),
+                   key=lambda x: (isinstance(x[0], str), x[0])),
+            key=lambda x: x[1], reverse=True)
+        self._mapping = {self._unknown_token: 0}
+        if self._support_reverse:
+            self._reverse_mapping = [self._unknown_token]
+        idx = 1
+        for category, count in self._freq:
+            if max_frequency > 0 and count >= max_frequency:
+                continue
+            if count <= min_frequency:
+                break
+            self._mapping[category] = idx
+            idx += 1
+            if self._support_reverse:
+                self._reverse_mapping.append(category)
+        self._freq = dict(self._freq[:idx - 1])
+
+    def reverse(self, class_id):
+        """Given class id reverse to original class name.
+
+        Args:
+            class_id: Id of the class.
+
+        Returns:
+            Class name.
+
+        Raises:
+            ValueError if this vocabulary wasn't initalized with
+            support_reverse.
+        """
+        if not self._support_reverse:
+            raise ValueError("This vocabulary wasn't initalized with "
+                             "support_reverse to support reverse() function.")
+        return self._reverse_mapping[class_id]
+
diff --git a/tensorflow/contrib/skflow/python/skflow/preprocessing/tests/__init__.py b/tensorflow/contrib/skflow/python/skflow/preprocessing/tests/__init__.py
new file mode 100644
index 0000000000..d5daa96572
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/preprocessing/tests/__init__.py
@@ -0,0 +1,14 @@
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+from __future__ import division, print_function, absolute_import
diff --git a/tensorflow/contrib/skflow/python/skflow/preprocessing/tests/test_categorical.py b/tensorflow/contrib/skflow/python/skflow/preprocessing/tests/test_categorical.py
new file mode 100644
index 0000000000..72fbee26ae
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/preprocessing/tests/test_categorical.py
@@ -0,0 +1,55 @@
+# encoding: utf-8
+
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+import numpy as np
+import tensorflow as tf
+
+from tensorflow.contrib.skflow.python.skflow.preprocessing import categorical
+from tensorflow.contrib.skflow.python.skflow.io import *
+
+
+class CategoricalTest(tf.test.TestCase):
+
+    def testSingleCategoricalProcessor(self):
+        cat_processor = categorical.CategoricalProcessor(
+            min_frequency=1)
+        X = cat_processor.fit_transform(
+            [["0"], [1], [float('nan')], 
+            ["C"], ["C"], [1], ["0"], [np.nan], [3]])
+        self.assertAllEqual(list(X), [
+            [2], [1], [0], [3], 
+            [3], [1], [2], [0],
+            [0]])
+
+    def testSingleCategoricalProcessorPandasSingleDF(self):
+        if HAS_PANDAS:
+            cat_processor = categorical.CategoricalProcessor()
+            data = pd.DataFrame({"Gender": ["Male", "Female", "Male"]})
+            X = list(cat_processor.fit_transform(data))
+            self.assertAllEqual(list(X), [[1], [2], [1]]) 
+
+    def testMultiCategoricalProcessor(self):
+        cat_processor = categorical.CategoricalProcessor(
+            min_frequency=0, share=False)
+        x = cat_processor.fit_transform(
+            [["0", "Male"], [1, "Female"], ["3", "Male"]])
+        self.assertAllEqual(list(x), [[1, 1], [2, 2], [3, 1]])
+
+
+if __name__ == "__main__":
+    tf.test.main()
diff --git a/tensorflow/contrib/skflow/python/skflow/preprocessing/tests/test_categorical_vocabulary.py b/tensorflow/contrib/skflow/python/skflow/preprocessing/tests/test_categorical_vocabulary.py
new file mode 100644
index 0000000000..96c9204994
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/preprocessing/tests/test_categorical_vocabulary.py
@@ -0,0 +1,61 @@
+# encoding: utf-8
+
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+import tensorflow as tf
+
+from tensorflow.contrib.skflow.python.skflow.preprocessing import categorical_vocabulary
+
+
+class CategoricalVocabularyTest(tf.test.TestCase):
+
+    def testIntVocabulary(self):
+        vocab = categorical_vocabulary.CategoricalVocabulary()
+        self.assertEqual(vocab.get(1), 1)
+        self.assertEqual(vocab.get(3), 2)
+        self.assertEqual(vocab.get(2), 3)
+        self.assertEqual(vocab.get(3), 2)
+        # This vocab doesn't handle nan specially.
+        self.assertEqual(vocab.get(float('nan')), 4)
+        self.assertEqual(len(vocab), 5)
+
+    def testWordVocabulary(self):
+        vocab = categorical_vocabulary.CategoricalVocabulary()
+        self.assertEqual(vocab.get('a'), 1)
+        self.assertEqual(vocab.get('b'), 2)
+        self.assertEqual(vocab.get('a'), 1)
+        self.assertEqual(vocab.get('b'), 2)
+
+    def testCountsTrim(self):
+        vocab = categorical_vocabulary.CategoricalVocabulary()
+        vocab.get('c')
+        vocab.add('c', 5)
+        vocab.get('a')
+        vocab.add('a', 10)
+       # not in vocab yet, skips.
+        vocab.add('b', 5)
+        vocab.add('d', 12)
+        vocab.trim(7, 11)
+        vocab.freeze()
+        self.assertEqual(vocab.get('b'), 0)
+        self.assertEqual(vocab.get('c'), 0)
+        self.assertEqual(len(vocab), 2)
+        self.assertEqual(vocab.get('a'), 1)
+
+
+if __name__ == "__main__":
+    tf.test.main()
diff --git a/tensorflow/contrib/skflow/python/skflow/preprocessing/tests/test_text.py b/tensorflow/contrib/skflow/python/skflow/preprocessing/tests/test_text.py
new file mode 100644
index 0000000000..3dff6e1486
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/preprocessing/tests/test_text.py
@@ -0,0 +1,86 @@
+# encoding: utf-8
+
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+from __future__ import unicode_literals
+
+import tensorflow as tf
+
+from tensorflow.contrib.skflow.python.skflow.preprocessing import text
+from tensorflow.contrib.skflow.python.skflow.preprocessing import CategoricalVocabulary
+
+
+class TextTest(tf.test.TestCase):
+
+    def testTokenizer(self):
+        words = text.tokenizer(["a b c", "a\nb\nc", "a, b - c",
+                                "фыв выф", "你好 怎么样"])
+        self.assertEqual(list(words),
+                         [["a", "b", "c"],
+                          ["a", "b", "c"],
+                          ["a", "b", "-", "c"],
+                          ["фыв", "выф"],
+                          ["你好", "怎么样"]])
+
+    def testByteProcessor(self):
+        processor = text.ByteProcessor(max_document_length=8)
+        inp = ["abc", "фыва", "фыва", b"abc",
+               "12345678901234567890"]
+        res = list(processor.fit_transform(inp))
+        self.assertAllEqual(res,
+                            [[97, 98, 99, 0, 0, 0, 0, 0],
+                            [209, 132, 209, 139, 208, 178, 208, 176],
+                            [209, 132, 209, 139, 208, 178, 208, 176],
+                            [97, 98, 99, 0, 0, 0, 0, 0],
+                            [49, 50, 51, 52, 53, 54, 55, 56]])
+        res = list(processor.reverse(res))
+        self.assertAllEqual(res,
+            ["abc", "фыва", "фыва", "abc", "12345678"])
+
+    def testVocabularyProcessor(self):
+        vocab_processor = text.VocabularyProcessor(
+            max_document_length=4,
+            min_frequency=1)
+        tokens = vocab_processor.fit_transform(
+            ["a b c", "a\nb\nc", "a, b - c"])
+        self.assertAllEqual(list(tokens),
+                            [[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 0, 3]])
+
+    def testVocabularyProcessorSaveRestore(self):
+        filename = tf.test.get_temp_dir() + 'test.vocab'
+        vocab_processor = text.VocabularyProcessor(
+            max_document_length=4,
+            min_frequency=1)
+        tokens = vocab_processor.fit_transform(
+            ["a b c", "a\nb\nc", "a, b - c"])
+        vocab_processor.save(filename)
+        new_vocab = text.VocabularyProcessor.restore(filename)
+        tokens = new_vocab.transform(["a b c"])
+        self.assertAllEqual(list(tokens), [[1, 2, 3, 0]])
+
+    def testExistingVocabularyProcessor(self):
+        vocab = CategoricalVocabulary()
+        vocab.get("A")
+        vocab.get("B")
+        vocab.freeze()
+        vocab_processor = text.VocabularyProcessor(
+            max_document_length=4, vocabulary=vocab, tokenizer_fn=list)
+        tokens = vocab_processor.fit_transform(["ABC", "CBABAF"])
+        self.assertAllEqual(list(tokens), [[1, 2, 0, 0], [0, 2, 1, 2]])
+
+
+if __name__ == "__main__":
+    tf.test.main()
diff --git a/tensorflow/contrib/skflow/python/skflow/preprocessing/text.py b/tensorflow/contrib/skflow/python/skflow/preprocessing/text.py
new file mode 100644
index 0000000000..25f96ef5ba
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/preprocessing/text.py
@@ -0,0 +1,224 @@
+"""Implements a number of text preprocessing utilities."""
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+import re
+import six
+
+try:
+    import cPickle as pickle
+except ImportError:
+    import pickle
+
+import numpy as np
+
+from .categorical_vocabulary import CategoricalVocabulary
+
+TOKENIZER_RE = re.compile(
+    r"[A-Z]{2,}(?![a-z])|[A-Z][a-z]+(?=[A-Z])|[\'\w\-]+", re.UNICODE)
+
+
+def tokenizer(iterator):
+    """Tokenizer generator.
+
+    Args:
+        iterator: Input iterator with strings.
+
+    Yields:
+        array of tokens per each value in the input.
+    """
+    for value in iterator:
+        yield TOKENIZER_RE.findall(value)
+
+
+class ByteProcessor(object):
+    """Maps documents into sequence of ids for bytes."""
+
+    def __init__(self, max_document_length):
+        self.max_document_length = max_document_length
+
+    def fit(self, X):
+        """Does nothing. No fitting required."""
+        pass
+
+    def fit_transform(self, X):
+        """Calls transform."""
+        return self.transform(X)
+
+    # pylint: disable=no-self-use
+    def reverse(self, X):
+        """Reverses output of transform back to text.
+
+        Args:
+            X: iterator or matrix of integers.
+               Document representation in bytes.
+
+        Returns:
+            Iterators of utf-8 strings.
+        """
+        for data in X:
+            document = np.trim_zeros(data.astype(np.int8), trim='b').tostring()
+            try:
+                yield document.decode('utf-8')
+            except UnicodeDecodeError:
+                yield ''
+
+    def transform(self, X):
+        """Transforms input documents into sequence of ids.
+
+        Args:
+            X: iterator or list of input documents.
+               Documents can be bytes or unicode strings, which will be encoded
+               as utf-8 to map to bytes. Note, in Python2 str and bytes is the
+               same type.
+        Returns:
+            iterator of byte ids.
+        """
+        if six.PY3:
+            # For Python3 defined buffer as memoryview.
+            buffer_or_memoryview = memoryview
+        else:
+            buffer_or_memoryview = buffer  # pylint: disable=undefined-variable
+        for document in X:
+            if isinstance(document, six.text_type):
+                document = document.encode('utf-8')
+            document_mv = buffer_or_memoryview(document)
+            buff = np.frombuffer(document_mv[:self.max_document_length],
+                                 dtype=np.uint8)
+            yield np.pad(buff, (0, self.max_document_length - len(buff)),
+                         'constant')
+
+
+class VocabularyProcessor(object):
+    """Maps documents to sequences of word ids.
+
+    Parameters:
+        max_document_length: Maximum length of documents.
+            if documents are longer, they will be trimmed, if shorter - padded.
+        min_frequency: Minimum frequency of words in the vocabulary.
+        vocabulary: CategoricalVocabulary object.
+
+    Attributes:
+        vocabulary_: CategoricalVocabulary object.
+    """
+
+    def __init__(self, max_document_length,
+                 min_frequency=0, vocabulary=None,
+                 tokenizer_fn=None):
+        self.max_document_length = max_document_length
+        self.min_frequency = min_frequency
+        if vocabulary:
+            self.vocabulary_ = vocabulary
+        else:
+            self.vocabulary_ = CategoricalVocabulary()
+        if tokenizer_fn:
+            self._tokenizer = tokenizer_fn
+        else:
+            self._tokenizer = tokenizer
+
+    def fit(self, raw_documents, unused_y=None):
+        """Learn a vocabulary dictionary of all tokens in the raw documents.
+
+        Args:
+            raw_documents: iterable
+                An iterable which yield either str or unicode.
+            unused_y: to match fit format signature of estimators.
+
+        Returns:
+            self
+        """
+        for tokens in self._tokenizer(raw_documents):
+            for token in tokens:
+                self.vocabulary_.add(token)
+        if self.min_frequency > 0:
+            self.vocabulary_.trim(self.min_frequency)
+        self.vocabulary_.freeze()
+        return self
+
+    def fit_transform(self, raw_documents, unused_y=None):
+        """Learn the vocabulary dictionary and return indexies of words.
+
+        Args:
+            raw_documents: iterable
+                An iterable which yield either str or unicode.
+            unused_y: to match fit_transform signature of estimators.
+
+        Returns:
+            X: iterable, [n_samples, max_document_length]
+                Word-id matrix.
+        """
+        self.fit(raw_documents)
+        return self.transform(raw_documents)
+
+    def transform(self, raw_documents):
+        """Transform documents to word-id matrix.
+
+        Convert words to ids with vocabulary fitted with fit or the one
+        provided in the constructor.
+
+        Args:
+            raw_documents: iterable.
+                An iterable which yield either str or unicode.
+
+        Returns:
+            X: iterable, [n_samples, max_document_length]
+                Word-id matrix.
+        """
+        for tokens in self._tokenizer(raw_documents):
+            word_ids = np.zeros(self.max_document_length, np.int64)
+            for idx, token in enumerate(tokens):
+                if idx >= self.max_document_length:
+                    break
+                word_ids[idx] = self.vocabulary_.get(token)
+            yield word_ids
+
+    def reverse(self, documents):
+        """Reverses output of vocabulary mapping to words.
+
+        Args:
+            documents: iterable, list of class ids.
+
+        Returns:
+            Iterator over mapped in words documents.
+        """
+        for item in documents:
+            output = []
+            for class_id in item:
+                output.append(self.vocabulary_.reverse(class_id))
+            yield ' '.join(output)
+
+    def save(self, filename):
+        """Saves vocabulary processor into given file.
+
+        Args:
+            filename: Path to output file.
+        """
+        with open(filename, 'wb') as f:
+            f.write(pickle.dumps(self))
+
+    @classmethod
+    def restore(cls, filename):
+        """Restores vocabulary processor from given file.
+
+        Args:
+            filename: Path to file to load from.
+
+        Returns:
+            VocabularyProcessor object.
+        """
+        with open(filename, 'rb') as f:
+            return pickle.loads(f.read())
+
diff --git a/tensorflow/contrib/skflow/python/skflow/tests/__init__.py b/tensorflow/contrib/skflow/python/skflow/tests/__init__.py
new file mode 100644
index 0000000000..d5daa96572
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/tests/__init__.py
@@ -0,0 +1,14 @@
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+from __future__ import division, print_function, absolute_import
diff --git a/tensorflow/contrib/skflow/python/skflow/tests/test_base.py b/tensorflow/contrib/skflow/python/skflow/tests/test_base.py
new file mode 100644
index 0000000000..88d89be0e2
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/tests/test_base.py
@@ -0,0 +1,129 @@
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+import random
+
+from sklearn import datasets
+from sklearn.metrics import accuracy_score, mean_squared_error, log_loss
+
+import numpy as np
+import tensorflow as tf
+
+from tensorflow.contrib.skflow.python import skflow
+from tensorflow.contrib.skflow.python.skflow.estimators import base
+
+
+class BaseTest(tf.test.TestCase):
+
+    def testOneDim(self):
+        random.seed(42)
+        X = np.random.rand(1000)
+        y = 2 * X + 3
+        regressor = skflow.TensorFlowLinearRegressor()
+        regressor.fit(X, y)
+        score = mean_squared_error(regressor.predict(X), y)
+        self.assertLess(score, 0.3, "Failed with score = {0}".format(score))
+
+    def testIris(self):
+        iris = datasets.load_iris()
+        classifier = skflow.TensorFlowLinearClassifier(n_classes=3)
+        classifier.fit(iris.data, [float(x) for x in iris.target])
+        score = accuracy_score(iris.target, classifier.predict(iris.data))
+        self.assertGreater(score, 0.7, "Failed with score = {0}".format(score))
+
+    def testIrisClassWeight(self):
+        iris = datasets.load_iris()
+        classifier = skflow.TensorFlowLinearClassifier(
+            n_classes=3, class_weight=[0.1, 0.8, 0.1])
+        classifier.fit(iris.data, iris.target)
+        score = accuracy_score(iris.target, classifier.predict(iris.data))
+        self.assertLess(score, 0.7, "Failed with score = {0}".format(score))
+
+    def testIrisSummaries(self):
+        iris = datasets.load_iris()
+        classifier = skflow.TensorFlowLinearClassifier(n_classes=3)
+        classifier.fit(iris.data, iris.target, logdir='/tmp/skflow_tests/')
+        score = accuracy_score(iris.target, classifier.predict(iris.data))
+        self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
+
+
+    def testIrisContinueTraining(self):
+        iris = datasets.load_iris()
+        classifier = skflow.TensorFlowLinearClassifier(n_classes=3,
+            learning_rate=0.01, continue_training=True, steps=250)
+        classifier.fit(iris.data, iris.target)
+        score1 = accuracy_score(iris.target, classifier.predict(iris.data))
+        classifier.fit(iris.data, iris.target)
+        score2 = accuracy_score(iris.target, classifier.predict(iris.data))
+        self.assertGreater(score2, score1,
+                           "Failed with score = {0}".format(score2))
+
+    def testIrisStreaming(self):
+        iris = datasets.load_iris()
+
+        def iris_data():
+            while True:
+                for x in iris.data:
+                    yield x
+
+        def iris_predict_data():
+            for x in iris.data:
+                yield x
+
+        def iris_target():
+            while True:
+                for y in iris.target:
+                    yield y
+
+        classifier = skflow.TensorFlowLinearClassifier(n_classes=3, steps=100)
+        classifier.fit(iris_data(), iris_target())
+        score1 = accuracy_score(iris.target, classifier.predict(iris.data))
+        score2 = accuracy_score(iris.target, classifier.predict(iris_predict_data()))
+        self.assertGreater(score1, 0.5, "Failed with score = {0}".format(score1))
+        self.assertEqual(score2, score1, "Scores from {0} iterator doesn't "
+                                         "match score {1} from full "
+                                         "data.".format(score2, score1))
+
+    def testIris_proba(self):
+        random.seed(42)
+        iris = datasets.load_iris()
+        classifier = skflow.TensorFlowClassifier(n_classes=3, steps=250)
+        classifier.fit(iris.data, iris.target)
+        score = log_loss(iris.target, classifier.predict_proba(iris.data))
+        self.assertLess(score, 0.8, "Failed with score = {0}".format(score))
+
+    def testBoston(self):
+        random.seed(42)
+        boston = datasets.load_boston()
+        regressor = skflow.TensorFlowLinearRegressor(
+            batch_size=boston.data.shape[0],
+            steps=500,
+            learning_rate=0.001)
+        regressor.fit(boston.data, boston.target)
+        score = mean_squared_error(
+            boston.target, regressor.predict(boston.data))
+        self.assertLess(score, 150, "Failed with score = {0}".format(score))
+
+    def testUnfitted(self):
+        estimator = skflow.TensorFlowEstimator(model_fn=None, n_classes=1)
+        with self.assertRaises(base.NotFittedError):
+            estimator.predict([1, 2, 3])
+        with self.assertRaises(base.NotFittedError):
+            estimator.save('/tmp/path')
+
+
+if __name__ == '__main__':
+    tf.test.main()
diff --git a/tensorflow/contrib/skflow/python/skflow/tests/test_custom_decay.py b/tensorflow/contrib/skflow/python/skflow/tests/test_custom_decay.py
new file mode 100644
index 0000000000..bdad76d091
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/tests/test_custom_decay.py
@@ -0,0 +1,50 @@
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+import tensorflow as tf
+
+import random
+
+from sklearn import datasets, metrics
+from sklearn.cross_validation import train_test_split
+
+from tensorflow.contrib.skflow.python import skflow
+
+class CustomDecayTest(tf.test.TestCase):
+
+    def testIrisExponentialDecay(self):
+        random.seed(42)
+
+        iris = datasets.load_iris()
+        X_train, X_test, y_train, y_test = train_test_split(iris.data,
+                                                            iris.target,
+                                                            test_size=0.2,
+                                                            random_state=42)
+        # setup exponential decay function
+        def exp_decay(global_step):
+            return tf.train.exponential_decay(
+                learning_rate=0.1, global_step=global_step,
+                decay_steps=100, decay_rate=0.001)
+        classifier = skflow.TensorFlowDNNClassifier(hidden_units=[10, 20, 10],
+                                                    n_classes=3, steps=800,
+                                                    learning_rate=exp_decay)
+        classifier.fit(X_train, y_train)
+        score = metrics.accuracy_score(y_test, classifier.predict(X_test))
+
+        self.assertGreater(score, 0.7, "Failed with score = {0}".format(score))
+
+if __name__ == "__main__":
+    tf.test.main()
diff --git a/tensorflow/contrib/skflow/python/skflow/tests/test_data_feeder.py b/tensorflow/contrib/skflow/python/skflow/tests/test_data_feeder.py
new file mode 100644
index 0000000000..8a004ac48c
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/tests/test_data_feeder.py
@@ -0,0 +1,120 @@
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+from struct import Struct
+import numpy as np
+import six
+
+import tensorflow as tf
+
+from tensorflow.contrib.skflow.python.skflow.io import *
+
+
+class MockPlaceholder(object):
+
+    def __init__(self, name):
+        self.name = name
+
+
+class DataFeederTest(tf.test.TestCase):
+
+    def test_data_feeder_regression(self):
+        X = np.matrix([[1, 2], [3, 4]])
+        y = np.array([1, 2])
+        df = data_feeder.DataFeeder(X, y, n_classes=0, batch_size=3)
+        feed_dict_fn = df.get_feed_dict_fn(
+            MockPlaceholder(name='input'),
+            MockPlaceholder(name='output'))
+        feed_dict = feed_dict_fn()
+
+        self.assertAllClose(feed_dict['input'], [[3, 4], [1, 2]])
+        self.assertAllClose(feed_dict['output'], [2, 1])
+
+    def test_data_feeder_multioutput_regression(self):
+        X = np.matrix([[1, 2], [3, 4]])
+        y = np.array([[1, 2], [3, 4]])
+        df = data_feeder.DataFeeder(X, y, n_classes=0, batch_size=2)
+        feed_dict_fn = df.get_feed_dict_fn(
+            MockPlaceholder(name='input'),
+            MockPlaceholder(name='output'))
+        feed_dict = feed_dict_fn()
+        self.assertAllClose(feed_dict['input'], [[3, 4], [1, 2]])
+        self.assertAllClose(feed_dict['output'], [[3, 4], [1, 2]])
+
+    def test_data_feeder_multioutput_classification(self):
+        X = np.matrix([[1, 2], [3, 4]])
+        y = np.array([[0, 1, 2], [2, 3, 4]])
+        df = data_feeder.DataFeeder(X, y, n_classes=5, batch_size=2)
+        feed_dict_fn = df.get_feed_dict_fn(
+            MockPlaceholder(name='input'),
+            MockPlaceholder(name='output'))
+        feed_dict = feed_dict_fn()
+        self.assertAllClose(feed_dict['input'], [[3, 4], [1, 2]])
+        self.assertAllClose(feed_dict['output'], [[[0, 0, 1, 0, 0],
+                                                   [0, 0, 0, 1, 0],
+                                                   [0, 0, 0, 0, 1]],
+                                                  [[1, 0, 0, 0, 0],
+                                                   [0, 1, 0, 0, 0],
+                                                   [0, 0, 1, 0, 0]]])
+
+    def test_streaming_data_feeder(self):
+        def X_iter():
+            yield np.array([1, 2])
+            yield np.array([3, 4])
+
+        def y_iter():
+            yield np.array([1])
+            yield np.array([2])
+        df = data_feeder.StreamingDataFeeder(X_iter(), y_iter(), n_classes=0,
+                                             batch_size=2)
+        feed_dict_fn = df.get_feed_dict_fn(
+            MockPlaceholder(name='input'),
+            MockPlaceholder(name='output'))
+        feed_dict = feed_dict_fn()
+        self.assertAllClose(feed_dict['input'], [[1, 2], [3, 4]])
+        self.assertAllClose(feed_dict['output'], [1, 2])
+
+    def test_dask_data_feeder(self):
+        if HAS_PANDAS and HAS_DASK:
+            X = pd.DataFrame(dict(a=np.array([.1, .3, .4, .6, .2, .1, .6]),
+                                  b=np.array([.7, .8, .1, .2, .5, .3, .9])))
+            X = dd.from_pandas(X, npartitions=2)
+            y = pd.DataFrame(dict(labels=np.array([1, 0, 2, 1, 0, 1, 2])))
+            y = dd.from_pandas(y, npartitions=2)
+            # X = extract_dask_data(X)
+            # y = extract_dask_labels(y)
+            df = data_feeder.DaskDataFeeder(X, y, n_classes=2, batch_size=2)
+            feed_dict_fn = df.get_feed_dict_fn(
+                MockPlaceholder(name='input'),
+                MockPlaceholder(name='output'))
+            feed_dict = feed_dict_fn()
+            self.assertAllClose(feed_dict['input'], [[ 0.40000001, 0.1],
+                                                     [ 0.60000002, 0.2]])
+            self.assertAllClose(feed_dict['output'], [[ 0., 0., 1.],
+                                                     [ 0., 1., 0.]])
+
+
+class SetupPredictDataFeederTest(tf.test.TestCase):
+
+    def test_iterable_data(self):
+        X = iter([[1, 2], [3, 4], [5, 6]])
+        df = data_feeder.setup_predict_data_feeder(X, batch_size=2)
+        self.assertAllClose(six.next(df), [[1, 2], [3, 4]])
+        self.assertAllClose(six.next(df), [[5, 6]])
+
+
+if __name__ == '__main__':
+    tf.test.main()
diff --git a/tensorflow/contrib/skflow/python/skflow/tests/test_early_stopping.py b/tensorflow/contrib/skflow/python/skflow/tests/test_early_stopping.py
new file mode 100644
index 0000000000..e6630a3464
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/tests/test_early_stopping.py
@@ -0,0 +1,57 @@
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+import tensorflow as tf
+
+import random
+
+from sklearn import datasets, metrics
+from sklearn.cross_validation import train_test_split
+
+from tensorflow.contrib.skflow.python import skflow
+
+
+class EarlyStoppingTest(tf.test.TestCase):
+
+    def testIrisES(self):
+        random.seed(42)
+
+        iris = datasets.load_iris()
+        X_train, X_test, y_train, y_test = train_test_split(iris.data,
+                                                            iris.target,
+                                                            test_size=0.2,
+                                                            random_state=42)
+
+        X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2)
+        val_monitor = skflow.monitors.ValidationMonitor(X_val, y_val, n_classes=3)
+
+        # classifier without early stopping - overfitting
+        classifier1 = skflow.TensorFlowDNNClassifier(hidden_units=[10, 20, 10],
+                                                     n_classes=3, steps=1000)
+        classifier1.fit(X_train, y_train)
+        score1 = metrics.accuracy_score(y_test, classifier1.predict(X_test))
+
+        # classifier with early stopping - improved accuracy on testing set
+        classifier2 = skflow.TensorFlowDNNClassifier(hidden_units=[10, 20, 10],
+                                                     n_classes=3, steps=1000)
+
+        classifier2.fit(X_train, y_train, val_monitor)
+        score2 = metrics.accuracy_score(y_test, classifier2.predict(X_test))
+
+        # self.assertGreater(score2, score1, "No improvement using early stopping.")
+
+if __name__ == "__main__":
+    tf.test.main()
diff --git a/tensorflow/contrib/skflow/python/skflow/tests/test_estimators.py b/tensorflow/contrib/skflow/python/skflow/tests/test_estimators.py
new file mode 100644
index 0000000000..c888186bc1
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/tests/test_estimators.py
@@ -0,0 +1,53 @@
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+import tensorflow as tf
+
+import random
+
+from sklearn import datasets, metrics
+from sklearn.cross_validation import train_test_split
+
+from tensorflow.contrib.skflow.python import skflow
+
+
+class CustomOptimizer(tf.test.TestCase):
+
+    def testIrisMomentum(self):
+        random.seed(42)
+
+        iris = datasets.load_iris()
+        X_train, X_test, y_train, y_test = train_test_split(iris.data,
+                                                            iris.target,
+                                                            test_size=0.2,
+                                                            random_state=42)
+        # setup exponential decay function
+        def exp_decay(global_step):
+            return tf.train.exponential_decay(
+                learning_rate=0.1, global_step=global_step,
+                decay_steps=100, decay_rate=0.001)
+        custom_optimizer = lambda x: tf.train.MomentumOptimizer(x, 0.9)
+        classifier = skflow.TensorFlowDNNClassifier(hidden_units=[10, 20, 10],
+                                                    n_classes=3, steps=800,
+                                                    learning_rate=exp_decay,
+                                                    optimizer=custom_optimizer)
+        classifier.fit(X_train, y_train)
+        score = metrics.accuracy_score(y_test, classifier.predict(X_test))
+
+        self.assertGreater(score, 0.7, "Failed with score = {0}".format(score))
+
+if __name__ == "__main__":
+    tf.test.main()
diff --git a/tensorflow/contrib/skflow/python/skflow/tests/test_grid_search.py b/tensorflow/contrib/skflow/python/skflow/tests/test_grid_search.py
new file mode 100644
index 0000000000..ed79627acd
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/tests/test_grid_search.py
@@ -0,0 +1,44 @@
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+import random
+
+from sklearn import datasets
+from sklearn.grid_search import GridSearchCV
+from sklearn.metrics import accuracy_score, mean_squared_error
+
+import tensorflow as tf
+
+from tensorflow.contrib.skflow.python import skflow
+
+
+class GridSearchTest(tf.test.TestCase):
+
+    def testIrisDNN(self):
+        random.seed(42)
+        iris = datasets.load_iris()
+        classifier = skflow.TensorFlowDNNClassifier(
+            hidden_units=[10, 20, 10], n_classes=3, steps=50)
+        grid_search = GridSearchCV(classifier,
+            {'hidden_units': [[5, 5], [10, 10]],
+             'learning_rate': [0.1, 0.01]})
+        grid_search.fit(iris.data, iris.target)
+        score = accuracy_score(iris.target, grid_search.predict(iris.data))
+        self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
+
+
+if __name__ == "__main__":
+    tf.test.main()
diff --git a/tensorflow/contrib/skflow/python/skflow/tests/test_io.py b/tensorflow/contrib/skflow/python/skflow/tests/test_io.py
new file mode 100644
index 0000000000..58c5089764
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/tests/test_io.py
@@ -0,0 +1,98 @@
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+import random
+
+from sklearn import datasets
+from sklearn.metrics import accuracy_score
+
+import tensorflow as tf
+
+from tensorflow.contrib.skflow.python import skflow
+from tensorflow.contrib.skflow.python.skflow.io import *
+
+
+class IOTest(tf.test.TestCase):
+
+    def test_pandas_dataframe(self):
+        if HAS_PANDAS:
+            random.seed(42)
+            iris = datasets.load_iris()
+            data = pd.DataFrame(iris.data)
+            labels = pd.DataFrame(iris.target)
+            classifier = skflow.TensorFlowLinearClassifier(n_classes=3)
+            classifier.fit(data, labels)
+            score = accuracy_score(labels, classifier.predict(data))
+            self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
+        else:
+            print("No pandas installed. pandas-related tests are skipped.")
+
+    def test_pandas_series(self):
+        if HAS_PANDAS:
+            random.seed(42)
+            iris = datasets.load_iris()
+            data = pd.DataFrame(iris.data)
+            labels = pd.Series(iris.target)
+            classifier = skflow.TensorFlowLinearClassifier(n_classes=3)
+            classifier.fit(data, labels)
+            score = accuracy_score(labels, classifier.predict(data))
+            self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
+
+    def test_string_data_formats(self):
+        if HAS_PANDAS:
+            with self.assertRaises(ValueError):
+                skflow.io.extract_pandas_data(pd.DataFrame({"Test": ["A", "B"]}))
+            with self.assertRaises(ValueError):
+                skflow.io.extract_pandas_labels(pd.DataFrame({"Test": ["A", "B"]}))
+   
+    def test_dask_io(self):
+        if HAS_DASK and HAS_PANDAS:
+            # test dask.dataframe
+            df = pd.DataFrame(dict(a=list('aabbcc'), b=list(range(6))),
+                              index=pd.date_range(start='20100101', periods=6))
+            ddf = dd.from_pandas(df, npartitions=3)
+            extracted_ddf = extract_dask_data(ddf)
+            self.assertEqual(extracted_ddf.divisions, (0, 2, 4, 6),
+                             "Failed with divisions = {0}".format(extracted_ddf.divisions))
+            self.assertEqual(extracted_ddf.columns.tolist(), ['a', 'b'],
+                             "Failed with columns = {0}".format(extracted_ddf.columns))
+            # test dask.series
+            labels = ddf['a']
+            extracted_labels = extract_dask_labels(labels)
+            self.assertEqual(extracted_labels.divisions, (0, 2, 4, 6),
+                             "Failed with divisions = {0}".format(extracted_labels.divisions))
+            # labels should only have one column
+            with self.assertRaises(ValueError):
+                extract_dask_labels(ddf)
+        else:
+            print("No dask installed. dask-related tests are skipped.")
+
+    def test_dask_iris_classification(self):
+        if HAS_DASK and HAS_PANDAS:
+            random.seed(42)
+            iris = datasets.load_iris()
+            data = pd.DataFrame(iris.data)
+            data = dd.from_pandas(data, npartitions=2)
+            labels = pd.DataFrame(iris.target)
+            labels = dd.from_pandas(labels, npartitions=2)
+            classifier = skflow.TensorFlowLinearClassifier(n_classes=3)
+            classifier.fit(data, labels)
+            predictions = data.map_partitions(classifier.predict).compute()
+            score = accuracy_score(labels.compute(), predictions)
+            self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
+
+if __name__ == '__main__':
+    tf.test.main()
diff --git a/tensorflow/contrib/skflow/python/skflow/tests/test_multioutput.py b/tensorflow/contrib/skflow/python/skflow/tests/test_multioutput.py
new file mode 100644
index 0000000000..69f61e73a1
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/tests/test_multioutput.py
@@ -0,0 +1,42 @@
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+import random
+
+import numpy as np
+
+from sklearn import datasets
+from sklearn.metrics import accuracy_score, mean_squared_error
+
+import tensorflow as tf
+from tensorflow.contrib.skflow.python import skflow
+
+
+class MultiOutputTest(tf.test.TestCase):
+
+    def testMultiRegression(self):
+        random.seed(42)
+        rng = np.random.RandomState(1)
+        X = np.sort(200 * rng.rand(100, 1) - 100, axis=0)
+        y = np.array([np.pi * np.sin(X).ravel(), np.pi * np.cos(X).ravel()]).T
+        regressor = skflow.TensorFlowLinearRegressor(learning_rate=0.01)
+        regressor.fit(X, y)
+        score = mean_squared_error(regressor.predict(X), y)
+        self.assertLess(score, 10, "Failed with score = {0}".format(score))
+
+
+if __name__ == "__main__":
+    tf.test.main()
diff --git a/tensorflow/contrib/skflow/python/skflow/tests/test_nonlinear.py b/tensorflow/contrib/skflow/python/skflow/tests/test_nonlinear.py
new file mode 100644
index 0000000000..83208e6fc7
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/tests/test_nonlinear.py
@@ -0,0 +1,127 @@
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+import random
+
+from sklearn import datasets
+from sklearn.metrics import accuracy_score, mean_squared_error
+
+import tensorflow as tf
+from tensorflow.contrib.skflow.python import skflow
+
+
+class NonLinearTest(tf.test.TestCase):
+
+    def testIrisDNN(self):
+        random.seed(42)
+        iris = datasets.load_iris()
+        classifier = skflow.TensorFlowDNNClassifier(
+            hidden_units=[10, 20, 10], n_classes=3)
+        classifier.fit(iris.data, iris.target)
+        score = accuracy_score(iris.target, classifier.predict(iris.data))
+        self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
+        weights = classifier.weights_
+        self.assertEqual(weights[0].shape, (4, 10))
+        self.assertEqual(weights[1].shape, (10, 20))
+        self.assertEqual(weights[2].shape, (20, 10))
+        self.assertEqual(weights[3].shape, (10, 3))
+        biases = classifier.bias_
+        self.assertEqual(len(biases), 4)
+
+    def testBostonDNN(self):
+        random.seed(42)
+        boston = datasets.load_boston()
+        regressor = skflow.TensorFlowDNNRegressor(
+            hidden_units=[10, 20, 10], n_classes=0,
+            batch_size=boston.data.shape[0],
+            steps=200, learning_rate=0.001)
+        regressor.fit(boston.data, boston.target)
+        score = mean_squared_error(
+            boston.target, regressor.predict(boston.data))
+        self.assertLess(score, 100, "Failed with score = {0}".format(score))
+        weights = regressor.weights_
+        self.assertEqual(weights[0].shape, (13, 10))
+        self.assertEqual(weights[1].shape, (10, 20))
+        self.assertEqual(weights[2].shape, (20, 10))
+        self.assertEqual(weights[3].shape, (10, 1))
+        biases = regressor.bias_
+        self.assertEqual(len(biases), 4)
+
+    def testRNN(self):
+        random.seed(42)
+        import numpy as np
+        data = np.array(list([[2, 1, 2, 2, 3],
+                              [2, 2, 3, 4, 5],
+                              [3, 3, 1, 2, 1],
+                              [2, 4, 5, 4, 1]]), dtype=np.float32)
+        # labels for classification
+        labels = np.array(list([1, 0, 1, 0]), dtype=np.float32)
+        # targets for regression
+        targets = np.array(list([10, 16, 10, 16]), dtype=np.float32)
+        test_data = np.array(list([[1, 3, 3, 2, 1], [2, 3, 4, 5, 6]]))
+        def input_fn(X):
+            return tf.split(1, 5, X)
+
+        # Classification
+        classifier = skflow.TensorFlowRNNClassifier(
+            rnn_size=2, cell_type='lstm', n_classes=2, input_op_fn=input_fn)
+        classifier.fit(data, labels)
+        classifier.weights_
+        classifier.bias_
+        predictions = classifier.predict(test_data)
+        self.assertAllClose(predictions, np.array([1, 0]))
+        
+        classifier = skflow.TensorFlowRNNClassifier(
+            rnn_size=2, cell_type='rnn', n_classes=2,
+            input_op_fn=input_fn, num_layers=2)
+        classifier.fit(data, labels)
+        classifier = skflow.TensorFlowRNNClassifier(
+            rnn_size=2, cell_type='invalid_cell_type', n_classes=2,
+            input_op_fn=input_fn, num_layers=2)
+        with self.assertRaises(ValueError):
+            classifier.fit(data, labels)
+
+        # Regression
+        regressor = skflow.TensorFlowRNNRegressor(
+            rnn_size=2, cell_type='gru', input_op_fn=input_fn)
+        regressor.fit(data, targets)
+        regressor.weights_
+        regressor.bias_
+        predictions = regressor.predict(test_data)
+    
+    def testBidirectionalRNN(self):
+        random.seed(42)
+        import numpy as np
+        data = np.array(list([[2, 1, 2, 2, 3],
+                              [2, 2, 3, 4, 5],
+                              [3, 3, 1, 2, 1],
+                              [2, 4, 5, 4, 1]]), dtype=np.float32)
+        labels = np.array(list([1, 0, 1, 0]), dtype=np.float32)
+        def input_fn(X):
+            return tf.split(1, 5, X)
+
+        # Classification
+        classifier = skflow.TensorFlowRNNClassifier(
+            rnn_size=2, cell_type='lstm', n_classes=2, input_op_fn=input_fn,
+            bidirectional=True)
+        classifier.fit(data, labels)
+        predictions = classifier.predict(np.array(list([[1, 3, 3, 2, 1],
+                                                        [2, 3, 4, 5, 6]])))
+        self.assertAllClose(predictions, np.array([1, 0]))
+        
+
+if __name__ == "__main__":
+    tf.test.main()
diff --git a/tensorflow/contrib/skflow/python/skflow/tests/test_regression.py b/tensorflow/contrib/skflow/python/skflow/tests/test_regression.py
new file mode 100644
index 0000000000..183716c92b
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/tests/test_regression.py
@@ -0,0 +1,47 @@
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+import random
+
+from sklearn import datasets
+from sklearn.metrics import accuracy_score, mean_squared_error, log_loss
+
+import numpy as np
+
+import tensorflow as tf
+from tensorflow.contrib.skflow.python import skflow
+
+
+class RegressionTest(tf.test.TestCase):
+
+    def testLinearRegression(self):
+        rng = np.random.RandomState(67)
+        N = 1000
+        n_weights = 10
+        self.bias = 2
+        self.X = rng.uniform(-1, 1, (N, n_weights))
+        self.weights = 10 * rng.randn(n_weights)
+        self.y = np.dot(self.X, self.weights)
+        self.y += rng.randn(len(self.X)) * 0.05 + rng.normal(self.bias, 0.01)
+        regressor = skflow.TensorFlowLinearRegressor()
+        regressor.fit(self.X, self.y)
+        # Have to flatten weights since they come in (X, 1) shape
+        self.assertAllClose(self.weights, regressor.weights_.flatten(), rtol=0.01)
+        assert abs(self.bias - regressor.bias_) < 0.1
+
+
+if __name__ == "__main__":
+    tf.test.main()
diff --git a/tensorflow/contrib/skflow/python/skflow/tests/test_saver.py b/tensorflow/contrib/skflow/python/skflow/tests/test_saver.py
new file mode 100644
index 0000000000..8f946cf3e2
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/tests/test_saver.py
@@ -0,0 +1,85 @@
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+import os
+import random
+
+from sklearn import datasets
+from sklearn.metrics import accuracy_score, mean_squared_error, log_loss
+
+import tensorflow as tf
+from tensorflow.contrib.skflow.python import skflow
+
+
+class SaverTest(tf.test.TestCase):
+
+    def testIris(self):
+        path = tf.test.get_temp_dir() + '/tmp.saver'
+        random.seed(42)
+        iris = datasets.load_iris()
+        classifier = skflow.TensorFlowLinearClassifier(n_classes=3)
+        classifier.fit(iris.data, iris.target)
+        classifier.save(path)
+        new_classifier = skflow.TensorFlowEstimator.restore(path)
+        self.assertEqual(type(new_classifier), type(classifier))
+        score = accuracy_score(iris.target, new_classifier.predict(iris.data))
+        self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
+
+    def testCustomModel(self):
+        path = tf.test.get_temp_dir() + '/tmp.saver2'
+        random.seed(42)
+        iris = datasets.load_iris()
+        def custom_model(X, y):
+            return skflow.models.logistic_regression(X, y)
+        classifier = skflow.TensorFlowEstimator(model_fn=custom_model,
+            n_classes=3)
+        classifier.fit(iris.data, iris.target)
+        classifier.save(path)
+        new_classifier = skflow.TensorFlowEstimator.restore(path)
+        self.assertEqual(type(new_classifier), type(classifier))
+        score = accuracy_score(iris.target, new_classifier.predict(iris.data))
+        self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
+    
+    def testDNN(self):
+        path = tf.test.get_temp_dir() + '/tmp_saver3'
+        random.seed(42)
+        iris = datasets.load_iris()
+        classifier = skflow.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=3)
+        classifier.fit(iris.data, iris.target)
+        classifier.save(path)
+        new_classifier = skflow.TensorFlowEstimator.restore(path)
+        self.assertEqual(type(new_classifier), type(classifier))
+        score = accuracy_score(iris.target, new_classifier.predict(iris.data))
+        self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
+
+    def testNoFolder(self):
+        with self.assertRaises(ValueError):
+            skflow.TensorFlowEstimator.restore('no_model_path')
+
+    def testNoCheckpoints(self):
+        path = tf.test.get_temp_dir() + '/tmp/tmp.saver4'
+        random.seed(42)
+        iris = datasets.load_iris()
+        classifier = skflow.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=3)
+        classifier.fit(iris.data, iris.target)
+        classifier.save(path)
+        os.remove(os.path.join(path, 'checkpoint'))
+        with self.assertRaises(ValueError):
+            skflow.TensorFlowEstimator.restore(path)
+        
+
+if __name__ == "__main__":
+    tf.test.main()
diff --git a/tensorflow/contrib/skflow/python/skflow/trainer.py b/tensorflow/contrib/skflow/python/skflow/trainer.py
new file mode 100644
index 0000000000..d5149d8843
--- /dev/null
+++ b/tensorflow/contrib/skflow/python/skflow/trainer.py
@@ -0,0 +1,148 @@
+"""Generic trainer for TensorFlow models."""
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+from six.moves import xrange   # pylint: disable=redefined-builtin
+
+from tensorflow.python.training import training as train
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import clip_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import gradients
+from tensorflow.python.ops import variables
+from tensorflow.python.ops import variable_scope as vs
+
+
+OPTIMIZER_CLS_NAMES = {
+    "SGD": train.GradientDescentOptimizer,
+    "Adagrad": train.AdagradOptimizer,
+    "Adam": train.AdamOptimizer,
+}
+
+
+class TensorFlowTrainer(object):
+    """General trainer class.
+
+    Attributes:
+      model: Model object.
+      gradients: Gradients tensor.
+    """
+
+    def __init__(self, loss, global_step, optimizer,
+                 learning_rate, clip_gradients=5.0):
+        """Build a trainer part of graph.
+
+        Args:
+          loss: Tensor that evaluates to model's loss.
+          global_step: Tensor with global step of the model.
+          optimizer: Name of the optimizer class (SGD, Adam, Adagrad) or class.
+          learning_rate: If this is constant float value, no decay function is used.
+                         Instead, a customized decay function can be passed that accepts
+                         global_step as parameter and returns a Tensor.
+                         e.g. exponential decay function:
+                         def exp_decay(global_step):
+                            return tf.train.exponential_decay(
+                                learning_rate=0.1, global_step=global_step,
+                                decay_steps=2, decay_rate=0.001)
+        Raises:
+            ValueError: if learning_rate is not a float or a callable.
+        """
+        self.loss = loss
+        self.global_step = global_step
+        # pylint: disable=redefined-variable-type
+        if isinstance(learning_rate, float):
+            self._learning_rate = vs.get_variable(
+                "learning_rate",
+                [],
+                initializer=init_ops.constant_initializer(learning_rate))
+        elif callable(learning_rate):
+            self._learning_rate = learning_rate(self.global_step)
+        else:
+            raise ValueError("learning_rate should be a float or a callable function.")
+        params = variables.trainable_variables()
+        self.gradients = gradients.gradients(loss, params)
+        if clip_gradients > 0.0:
+            self.gradients, self.gradients_norm = clip_ops.clip_by_global_norm(
+                self.gradients, clip_gradients)
+        grads_and_vars = zip(self.gradients, params)
+        if isinstance(optimizer, str):
+            self._optimizer = OPTIMIZER_CLS_NAMES[
+                optimizer](self._learning_rate)
+        else:
+            self._optimizer = optimizer(self._learning_rate)
+        self.trainer = self._optimizer.apply_gradients(grads_and_vars,
+                                                       global_step=global_step,
+                                                       name="train")
+        # Update ops during training, e.g. batch_norm_ops
+        self.trainer = control_flow_ops.group(self.trainer, *ops.get_collection('update_ops'))
+        # Get all initializers for all trainable variables.
+        self._initializers = variables.initialize_all_variables()
+
+    def initialize(self, sess):
+        """Initalizes all variables.
+
+        Args:
+            sess: Session object.
+
+        Returns:
+            Values of initializers.
+        """
+        return sess.run(self._initializers)
+
+    def train(self, sess, feed_dict_fn, steps, monitor,
+              summary_writer=None, summaries=None,
+              feed_params_fn=None):
+        """Trains a model for given number of steps, given feed_dict function.
+
+        Args:
+            sess: Session object.
+            feed_dict_fn: Function that will return a feed dictionary.
+            summary_writer: SummaryWriter object to use for writing summaries.
+            steps: Number of steps to run.
+            monitor: Monitor object to track training progress and induce early stopping
+            summaries: Joined object of all summaries that should be ran.
+
+        Returns:
+            List of losses for each step.
+        """
+        for step in xrange(steps):
+            feed_dict = feed_dict_fn()
+            if summaries is not None:
+                global_step, loss, summ, _ = sess.run(
+                    [self.global_step, self.loss, summaries, self.trainer],
+                    feed_dict=feed_dict)
+            else:
+                global_step, loss, _ = sess.run(
+                    [self.global_step, self.loss, self.trainer],
+                    feed_dict=feed_dict)
+            monitor.update(step, global_step, loss, sess,
+                           feed_params_fn, loss_expression_tensor=self.loss)
+            if summaries is not None and summary_writer and summ is not None:
+                summary_writer.add_summary(summ, global_step)
+            if monitor.monitor_inducing_stop():
+                break
+        return
+
+
+class RestoredTrainer(TensorFlowTrainer):
+    """Trainer class  that takes already existing graph."""
+
+    # pylint: disable=super-init-not-called
+    def __init__(self, loss, global_step, trainer):
+        self.global_step = global_step
+        self.loss = loss
+        self.trainer = trainer
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc
index bcd71f7949..fcb86028a2 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc
@@ -160,7 +160,7 @@ class GrpcWorkerService : public AsyncServiceInterface {
   ::grpc::Alarm* shutdown_alarm_;
 
   // The following section contains one request handler method per
-  // RPC. The The `FooHandler` method is called (indirectly) by
+  // RPC. The `FooHandler` method is called (indirectly) by
   // `HandleRPCsLoop()` when the next Foo RPC is received. Each
   // `FooHandler` call schedules a closure on `env_->compute_pool`,
   // and is responsible for requesting the next Foo call by calling
diff --git a/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc b/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc
index 7bf2e3d94c..f29531a6d5 100644
--- a/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc
@@ -24,6 +24,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/kernels/maxpooling_op.h"
 #include "tensorflow/core/kernels/maxpooling_op_gpu.h"
+#include "tensorflow/core/util/cuda_kernel_helper.h"
 
 namespace tensorflow {
 namespace {
@@ -43,10 +44,7 @@ namespace {
 //         int form, keeping track of the flattened index of the input item that
 //         produces the max output. If a nullptr is passed in for mask, no mask
 //         will be produced.
-#define CUDA_1D_KERNEL_LOOP(i, n)                              \
-  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \
-       i += blockDim.x * gridDim.x)
-
+//
 // To call the forward and backward functions, use e.g.:
 // const int kThreadsPerBlock = 1024
 // const int output_size = batch * channels * pooled_height * pooled_width;
@@ -201,11 +199,6 @@ __global__ void MaxPoolBackward(const int nthreads, const dtype* top_diff,
   }
 }
 
-template <typename dtype>
-__global__ void SetZero(const int nthreads, dtype* bottom_diff) {
-  CUDA_1D_KERNEL_LOOP(index, nthreads) { *(bottom_diff + index) = dtype(0); }
-}
-
 #undef CUDA_1D_KERNEL_LOOP
 }  // namespace
 
diff --git a/tensorflow/core/kernels/resize_nearest_neighbor_op.cc b/tensorflow/core/kernels/resize_nearest_neighbor_op.cc
index 26cdac1519..61b89fb9a5 100644
--- a/tensorflow/core/kernels/resize_nearest_neighbor_op.cc
+++ b/tensorflow/core/kernels/resize_nearest_neighbor_op.cc
@@ -258,6 +258,84 @@ TF_CALL_GPU_NUMBER_TYPES(REGISTER_KERNEL);
 
 #undef REGISTER_KERNEL
 
+template <typename T>
+class ResizeNearestNeighborGPUOpGrad : public OpKernel {
+ public:
+  explicit ResizeNearestNeighborGPUOpGrad(OpKernelConstruction* context)
+      : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("align_corners", &align_corners_));
+  }
+
+  void Compute(OpKernelContext* context) override {
+    // Grab and validate the input:
+    const Tensor& input = context->input(0);
+    OP_REQUIRES(context, input.dims() == 4,
+                errors::InvalidArgument("input must be 4-dimensional",
+                                        input.shape().DebugString()));
+
+    // Grab and validate the output shape:
+    const Tensor& shape_t = context->input(1);
+    OP_REQUIRES(context, shape_t.dims() == 1,
+                errors::InvalidArgument("shape_t must be 1-dimensional",
+                                        shape_t.shape().DebugString()));
+    OP_REQUIRES(context, shape_t.NumElements() == 2,
+                errors::InvalidArgument("shape_t must have two elements",
+                                        shape_t.shape().DebugString()));
+
+    auto sizes = shape_t.vec<int32>();
+    OP_REQUIRES(context, sizes(0) > 0 && sizes(1) > 0,
+                errors::InvalidArgument("shape_t's elements must be positive"));
+
+    // Initialize shape to the batch size of the input, then add
+    // the rest of the dimensions
+    Tensor* output = nullptr;
+    OP_REQUIRES_OK(
+        context, context->allocate_output(0, TensorShape({input.dim_size(0), sizes(0),
+                                                          sizes(1), input.dim_size(3)}),
+                                          &output));
+
+    const int64 batch_size = input.dim_size(0);
+    const int64 in_height = input.dim_size(1);
+    const int64 in_width = input.dim_size(2);
+    const int64 channels = input.dim_size(3);
+
+    const int64 out_height = output->dim_size(1);
+    const int64 out_width = output->dim_size(2);
+
+    const float height_scale =
+        (align_corners_ && in_height > 1)
+            ? (out_height - 1) / static_cast<float>(in_height - 1)
+            : out_height / static_cast<float>(in_height);
+    const float width_scale =
+        (align_corners_ && in_width > 1)
+            ? (out_width - 1) / static_cast<float>(in_width - 1)
+            : out_width / static_cast<float>(in_width);
+
+    bool status = ResizeNearestNeighborBackward(
+        input.flat<T>().data(), batch_size, in_height,
+        in_width, channels, out_height, out_width,
+        height_scale, width_scale, output->flat<T>().data(),
+        context->eigen_gpu_device());
+
+    if (!status) {
+      context->SetStatus(
+          errors::Internal("Failed launching ResizeNearestNeighborGrad"));
+    }
+  }
+  bool align_corners_;
+};
+
+#define REGISTER_KERNEL(T)                                           \
+  REGISTER_KERNEL_BUILDER(Name("ResizeNearestNeighborGrad")          \
+                            .Device(DEVICE_GPU)                      \
+                            .TypeConstraint<T>("T")                  \
+                            .HostMemory("size"),                     \
+                          ResizeNearestNeighborGPUOpGrad<T>);
+
+TF_CALL_GPU_NUMBER_TYPES(REGISTER_KERNEL);
+
+#undef REGISTER_KERNEL
+
 #endif  // GOOGLE_CUDA
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/resize_nearest_neighbor_op_gpu.cu.cc b/tensorflow/core/kernels/resize_nearest_neighbor_op_gpu.cu.cc
index bee24a5b02..3ed580bbe6 100644
--- a/tensorflow/core/kernels/resize_nearest_neighbor_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/resize_nearest_neighbor_op_gpu.cu.cc
@@ -51,6 +51,30 @@ __global__ void ResizeNearestNeighborNHWC(const int nthreads, const T* bottom_da
   }
 }
 
+template <typename T>
+__global__ void ResizeNearestNeighborBackwardNHWC(
+                                   const int nthreads, const T* top_diff,
+                                   const int in_height, const int in_width,
+                                   const int channels, const int out_height,
+                                   const int out_width, const float height_scale,
+                                   const float width_scale, T* bottom_diff) {
+  CUDA_1D_KERNEL_LOOP(index, nthreads) {
+    int n = index;
+    int c = n % channels;
+    n /= channels;
+    int in_x = n % in_width;
+    n /= in_width;
+    int in_y = n % in_height;
+    n /= in_height;
+
+    T* bottom_diff_n = bottom_diff + n * channels * out_height * out_width;
+    const int out_x = min(static_cast<int>(floorf(in_x * width_scale)), out_width - 1);
+    const int out_y = min(static_cast<int>(floorf(in_y * height_scale)), out_height - 1);
+    const int idx = (out_y * out_width + out_x) * channels + c;
+    CudaAtomicAdd(bottom_diff_n + idx, ldg(top_diff + index));
+  }
+}
+
 }  // namespace
 
 template <typename T>
@@ -81,6 +105,41 @@ bool ResizeNearestNeighbor(const T* bottom_data, const int batch,
 TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPEC);
 
 #undef DECLARE_GPU_SPEC
+
+template <typename T>
+bool ResizeNearestNeighborBackward(const T* top_diff, const int batch,
+                                   const int in_height, const int in_width,
+                                   const int channels, const int out_height,
+                                   const int out_width, const float height_scale,
+				                   const float width_scale, T* bottom_diff,
+                                   const Eigen::GpuDevice& d) {
+  const int output_size = batch * channels * out_height * out_width;
+  CudaLaunchConfig output_config = GetCudaLaunchConfig(output_size, d);
+  SetZero<<<output_config.block_count,
+            output_config.thread_per_block, 0, d.stream()>>>(output_size, bottom_diff);
+
+  const int input_size = batch * channels * in_height * in_width;
+  CudaLaunchConfig input_config = GetCudaLaunchConfig(input_size, d);
+  ResizeNearestNeighborBackwardNHWC<T><<<input_config.block_count,
+	                                     input_config.thread_per_block, 0, d.stream()>>>(
+	      input_config.virtual_thread_count, top_diff,
+	      in_height, in_width, channels, out_height,
+	      out_width, height_scale, width_scale, bottom_diff);
+  return d.ok();
+}
+
+#define DECLARE_GPU_SPEC(T)                                                           \
+  template bool ResizeNearestNeighborBackward(const T* top_diff, const int batch,     \
+                               const int in_height, const int in_width,               \
+                               const int channels, const int out_height,              \
+                               const int out_width, const float height_scale,         \
+                               const float width_scale, T* bottom_diff,               \
+                               const Eigen::GpuDevice& d);
+
+TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPEC);
+
+#undef DECLARE_GPU_SPEC
+
 }  // end namespace tensorflow
 
 #endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/resize_nearest_neighbor_op_gpu.h b/tensorflow/core/kernels/resize_nearest_neighbor_op_gpu.h
index 65b4b331d9..d99a91d887 100644
--- a/tensorflow/core/kernels/resize_nearest_neighbor_op_gpu.h
+++ b/tensorflow/core/kernels/resize_nearest_neighbor_op_gpu.h
@@ -32,6 +32,12 @@ bool ResizeNearestNeighbor(const T* bottom_data, const int batch, const int in_h
                            const int out_width, const float height_scale, const float width_scale,
                            T* top_data, const Eigen::GpuDevice& d);
 
+template <typename T>
+bool ResizeNearestNeighborBackward(const T* top_diff, const int batch, const int in_height,
+                                   const int in_width, const int channels, const int out_height,
+                                   const int out_width, const float height_scale, const float width_scale,
+                                   T* bottom_diff, const Eigen::GpuDevice& d);
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_CORE_KERNELS_RESIZE_NEAREST_NEIGHBOR_OP_GPU_H_
diff --git a/tensorflow/core/kernels/softmax_op.cc b/tensorflow/core/kernels/softmax_op.cc
index cfcfeb5760..305a91fecf 100644
--- a/tensorflow/core/kernels/softmax_op.cc
+++ b/tensorflow/core/kernels/softmax_op.cc
@@ -34,8 +34,8 @@ namespace functor {
 template <typename T>
 struct SoftmaxFunctor<CPUDevice, T> {
   void operator()(const CPUDevice& d, typename TTypes<T>::ConstMatrix logits,
-                  typename TTypes<T>::Matrix softmax) {
-    SoftmaxEigenImpl<CPUDevice, T>::Compute(d, logits, softmax);
+                  typename TTypes<T>::Matrix softmax, const bool log) {
+    SoftmaxEigenImpl<CPUDevice, T>::Compute(d, logits, softmax, log);
   }
 };
 }  // namespace functor
@@ -48,12 +48,24 @@ REGISTER_KERNEL_BUILDER(Name("Softmax")
                             .Device(DEVICE_CPU)
                             .TypeConstraint<double>("T"),
                         SoftmaxOp<CPUDevice, double>);
+REGISTER_KERNEL_BUILDER(Name("LogSoftmax")
+                            .Device(DEVICE_CPU)
+                            .TypeConstraint<float>("T"),
+                        SoftmaxOp<CPUDevice, float>);
+REGISTER_KERNEL_BUILDER(Name("LogSoftmax")
+                            .Device(DEVICE_CPU)
+                            .TypeConstraint<double>("T"),
+                        SoftmaxOp<CPUDevice, double>);
 
 #if GOOGLE_CUDA
 REGISTER_KERNEL_BUILDER(Name("Softmax")
                             .Device(DEVICE_GPU)
                             .TypeConstraint<float>("T"),
                         SoftmaxOp<GPUDevice, float>);
+REGISTER_KERNEL_BUILDER(Name("LogSoftmax")
+                            .Device(DEVICE_GPU)
+                            .TypeConstraint<float>("T"),
+                        SoftmaxOp<GPUDevice, float>);
 #endif  // GOOGLE_CUDA
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/softmax_op.h b/tensorflow/core/kernels/softmax_op.h
index e5e8c584fc..df78f85cc2 100644
--- a/tensorflow/core/kernels/softmax_op.h
+++ b/tensorflow/core/kernels/softmax_op.h
@@ -31,7 +31,9 @@ namespace tensorflow {
 template <typename Device, typename T>
 class SoftmaxOp : public OpKernel {
  public:
-  explicit SoftmaxOp(OpKernelConstruction* context) : OpKernel(context) {}
+  explicit SoftmaxOp(OpKernelConstruction* context) : OpKernel(context) {
+    log_ = StringPiece(name()).starts_with("Log");
+  }
 
   void Compute(OpKernelContext* context) override {
     const Tensor& logits_in = context->input(0);
@@ -43,9 +45,12 @@ class SoftmaxOp : public OpKernel {
     if (logits_in.NumElements()) {
       functor::SoftmaxFunctor<Device, T> functor;
       functor(context->eigen_device<Device>(), logits_in.matrix<T>(),
-              softmax_out->matrix<T>());
+              softmax_out->matrix<T>(), log_);
     }
   }
+
+ private:
+  bool log_;
 };
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/softmax_op_functor.h b/tensorflow/core/kernels/softmax_op_functor.h
index 88e693d078..47bb9de411 100644
--- a/tensorflow/core/kernels/softmax_op_functor.h
+++ b/tensorflow/core/kernels/softmax_op_functor.h
@@ -26,21 +26,23 @@ namespace functor {
 // Functor used by SoftmaxOp to do the computations.
 template <typename Device, typename T>
 struct SoftmaxFunctor {
-  // Computes Softmax activation.
+  // Computes Softmax or LogSoftmax activation.
   //
   // logits: dim: batch_size, num_classes.
   // softmax: dims: batch_size, num_classes.
+  // log: boolean
   void operator()(const Device& d, typename TTypes<T>::ConstMatrix logits,
-                  typename TTypes<T>::Matrix softmax);
+                  typename TTypes<T>::Matrix softmax, const bool log);
 };
 
-// Eigen code implementing SoftmaxFunctor::operator().
+// Eigen code implementing SoftmaxFunctor::operator() or
+// LogSoftmaxFunctor::operator().
 // This code works for both CPU and GPU and is used by the functor
 // specializations for both device types.
 template <typename Device, typename T>
 struct SoftmaxEigenImpl {
   static void Compute(const Device& d, typename TTypes<T>::ConstMatrix logits,
-                      typename TTypes<T>::Matrix softmax) {
+                      typename TTypes<T>::Matrix softmax, const bool log) {
     const int kBatchDim = 0;
     const int kClassDim = 1;
 
@@ -61,22 +63,35 @@ struct SoftmaxEigenImpl {
     Eigen::IndexList<Eigen::type2index<1>, int> one_by_class;
     one_by_class.set(1, num_classes);
 #endif
-    // NOTE(touts): If you modify this implementation please run
-    // the ImageNetSoftmaxFwd benchmark in core_ops_test.cc.
-    //
-    // softmax = exp(logits - max(logits along classes));
-    softmax.device(d) = (logits -
-                         logits.maximum(along_class)
-                             .eval()
-                             .reshape(batch_by_one)
-                             .broadcast(one_by_class))
-                            .exp();
-    // softmax = softmax / sum(softmax along classes);
-    softmax.device(d) = (softmax /
-                         softmax.sum(along_class)
-                             .eval()
-                             .reshape(batch_by_one)
-                             .broadcast(one_by_class));
+    //shifted_logits = logits - max(logits along classes);
+    auto shifted_logits = (logits - logits.maximum(along_class)
+                                      .eval()
+                                      .reshape(batch_by_one)
+                                      .broadcast(one_by_class));
+    if (log) {
+      // Calculate the log of the softmax
+      // softmax = logits - max(logits along classes);
+      softmax.device(d) = shifted_logits;
+      // softmax = softmax - log(sum(exp(softmax along classes)));
+      softmax.device(d) = (softmax -
+                           softmax.exp().sum(along_class)
+                              .eval()
+                              .reshape(batch_by_one)
+                              .broadcast(one_by_class)
+                              .log());
+    } else {
+      // NOTE(touts): If you modify this implementation please run
+      // the BM_ImageNetSoftmaxFwd benchmark in nn_ops_test.cc.
+      //
+      // softmax = exp(logits - max(logits along classes));
+      softmax.device(d) = shifted_logits.exp();
+      // softmax = softmax / sum(softmax along classes);
+      softmax.device(d) = (softmax /
+                           softmax.sum(along_class)
+                               .eval()
+                               .reshape(batch_by_one)
+                               .broadcast(one_by_class));
+    }
   }
 };
 
diff --git a/tensorflow/core/kernels/softmax_op_gpu.cu.cc b/tensorflow/core/kernels/softmax_op_gpu.cu.cc
index cfd471383f..e27fff9b92 100644
--- a/tensorflow/core/kernels/softmax_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/softmax_op_gpu.cu.cc
@@ -32,8 +32,8 @@ namespace functor {
 template <typename T>
 struct SoftmaxFunctor<GPUDevice, T> {
   void operator()(const GPUDevice& d, typename TTypes<T>::ConstMatrix logits,
-                  typename TTypes<T>::Matrix softmax) {
-    SoftmaxEigenImpl<GPUDevice, T>::Compute(d, logits, softmax);
+                  typename TTypes<T>::Matrix softmax, const bool log) {
+    SoftmaxEigenImpl<GPUDevice, T>::Compute(d, logits, softmax, log);
   }
 };
 }  // end namespace functor
diff --git a/tensorflow/core/kernels/training_ops.cc b/tensorflow/core/kernels/training_ops.cc
index 7591f83d32..f761bf6dfc 100644
--- a/tensorflow/core/kernels/training_ops.cc
+++ b/tensorflow/core/kernels/training_ops.cc
@@ -37,6 +37,22 @@ struct ApplyGradientDescent<CPUDevice, T> {
 };
 
 template <typename T>
+struct ApplyAdadelta<CPUDevice, T> {
+  void operator()(const CPUDevice& d, typename TTypes<T>::Flat var,
+                  typename TTypes<T>::Flat accum,
+                  typename TTypes<T>::Flat accum_update,
+                  typename TTypes<T>::ConstScalar lr,
+                  typename TTypes<T>::ConstScalar rho,
+                  typename TTypes<T>::ConstScalar epsilon,
+                  typename TTypes<T>::ConstFlat grad) {
+    accum.device(d) = accum * rho() + grad.square() * (1 - rho());
+    const auto update = accum_update * (accum + epsilon()).rsqrt() * grad;
+    accum_update.device(d) = accum_update * rho()  + update.square() * (1 - rho());
+    var.device(d) -= update * lr();
+  }
+};
+
+template <typename T>
 struct ApplyAdagrad<CPUDevice, T> {
   void operator()(const CPUDevice& d, typename TTypes<T>::Flat var,
                   typename TTypes<T>::Flat accum,
@@ -225,6 +241,266 @@ REGISTER_KERNELS(GPU, double);
 #undef REGISTER_KERNELS
 
 template <typename Device, typename T>
+class ApplyAdadeltaOp : public OpKernel {
+ public:
+  explicit ApplyAdadeltaOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("use_locking", &use_exclusive_lock_));
+  }
+
+  void Compute(OpKernelContext* ctx) override {
+    if (use_exclusive_lock_) {
+      mutex_lock l1(*ctx->input_ref_mutex(0));
+      // Don't try to acquire a lock on the second ref as they share the same
+      // mutex.
+      //
+      // mutex_lock l2(*ctx->input_ref_mutex(1));
+      DoValidate(ctx);
+      if (!ctx->status().ok()) return;
+      DoCompute(ctx);
+    } else {
+      DoValidate(ctx);
+      if (!ctx->status().ok()) return;
+      DoCompute(ctx);
+    }
+    ctx->forward_ref_input_to_ref_output(0, 0);
+  }
+
+ private:
+  bool use_exclusive_lock_;
+
+  void DoValidate(OpKernelContext* ctx) {
+    Tensor var = ctx->mutable_input(0, use_exclusive_lock_);
+    Tensor accum = ctx->mutable_input(1, use_exclusive_lock_);
+    Tensor accum_update = ctx->mutable_input(2, use_exclusive_lock_);
+
+    OP_REQUIRES(
+        ctx, var.IsInitialized(),
+        errors::FailedPrecondition(
+            "Attempting to use uninitialized variables: ", def().input(0)));
+    OP_REQUIRES(
+        ctx, accum.IsInitialized(),
+        errors::FailedPrecondition(
+            "Attempting to use uninitialized variables: ", def().input(1)));
+    OP_REQUIRES(
+        ctx, accum_update.IsInitialized(),
+        errors::FailedPrecondition(
+            "Attempting to use uninitialized variables: ", def().input(2)));
+
+    const Tensor& lr = ctx->input(3);
+    const Tensor& rho = ctx->input(4);
+    const Tensor& epsilon = ctx->input(5);
+    const Tensor& grad = ctx->input(6);
+
+    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(lr.shape()),
+                errors::InvalidArgument("lr is not a scalar: ",
+                                        lr.shape().DebugString()));
+
+    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(rho.shape()),
+                errors::InvalidArgument("rho is not a scalar: ",
+                                        rho.shape().DebugString()));
+
+    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(epsilon.shape()),
+                errors::InvalidArgument("epsilon is not a scalar: ",
+                                        epsilon.shape().DebugString()));
+
+    OP_REQUIRES(
+        ctx, var.shape().IsSameSize(accum.shape()),
+        errors::InvalidArgument("var and accum do not have the same shape",
+                                var.shape().DebugString(), " ",
+                                accum.shape().DebugString()));
+    OP_REQUIRES(
+        ctx, var.shape().IsSameSize(grad.shape()),
+        errors::InvalidArgument("var and grad do not have the same shape",
+                                var.shape().DebugString(), " ",
+                                grad.shape().DebugString()));
+  }
+
+  void DoCompute(OpKernelContext* ctx) {
+    const Device& device = ctx->template eigen_device<Device>();
+    Tensor var = ctx->mutable_input(0, use_exclusive_lock_);
+    Tensor accum = ctx->mutable_input(1, use_exclusive_lock_);
+    Tensor accum_update = ctx->mutable_input(2, use_exclusive_lock_);
+
+    const Tensor& lr = ctx->input(3);
+    const Tensor& rho = ctx->input(4);
+    const Tensor& epsilon = ctx->input(5);
+    const Tensor& grad = ctx->input(6);
+
+    functor::ApplyAdadelta<Device, T>()(device, var.flat<T>(), accum.flat<T>(),
+                                        accum_update.flat<T>(), lr.scalar<T>(),
+                                        rho.scalar<T>(), epsilon.scalar<T>(),
+                                        grad.flat<T>());
+  }
+};
+
+typedef Eigen::ThreadPoolDevice CPUDevice;
+typedef Eigen::GpuDevice GPUDevice;
+
+#define REGISTER_KERNELS(D, T)                                        \
+  REGISTER_KERNEL_BUILDER(                                            \
+      Name("ApplyAdadelta").Device(DEVICE_##D).TypeConstraint<T>("T"), \
+      ApplyAdadeltaOp<D##Device, T>);
+
+REGISTER_KERNELS(CPU, float);
+REGISTER_KERNELS(CPU, double);
+
+#if GOOGLE_CUDA
+// Forward declarations of the functor specializations for GPU.
+namespace functor {
+#define DECLARE_GPU_SPEC(T)                                               \
+  template <>                                                             \
+  void ApplyAdadelta<GPUDevice, T>::operator()(                           \
+      const GPUDevice& d, typename TTypes<T>::Flat var,                   \
+      typename TTypes<T>::Flat accum,                                     \
+      typename TTypes<T>::Flat accum_update,                              \
+      typename TTypes<T>::ConstScalar lr,                                \
+      typename TTypes<T>::ConstScalar rho,                                \
+      typename TTypes<T>::ConstScalar epsilon,                            \
+      typename TTypes<T>::ConstFlat grad);                                \
+  extern template struct ApplyAdadelta<GPUDevice, T>;
+DECLARE_GPU_SPEC(float);
+DECLARE_GPU_SPEC(double);
+#undef DECLARE_GPU_SPEC
+}  // namespace functor
+
+REGISTER_KERNELS(GPU, float);
+REGISTER_KERNELS(GPU, double);
+#endif
+#undef REGISTER_KERNELS
+
+// Note, this op works on cpu only.
+template <typename T, typename Tindex>
+class SparseApplyAdadeltaOp : public OpKernel {
+ public:
+  explicit SparseApplyAdadeltaOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("use_locking", &use_exclusive_lock_));
+  }
+
+  void Compute(OpKernelContext* ctx) override NO_THREAD_SAFETY_ANALYSIS {
+    mutex* mu_var = ctx->input_ref_mutex(0);
+    // mu_accum is actually the same mutex as mu_var since currently we use a
+    // global mutex.
+    //
+    // mutex* mu_accum = ctx->input_ref_mutex(1);
+    if (use_exclusive_lock_) {
+      mu_var->lock();
+    }
+    Tensor var = ctx->mutable_input(0, use_exclusive_lock_);
+    Tensor accum_grad = ctx->mutable_input(1, use_exclusive_lock_);
+    Tensor accum_update = ctx->mutable_input(2, use_exclusive_lock_);
+    OP_REQUIRES(
+        ctx, var.IsInitialized(),
+        errors::FailedPrecondition(
+            "Attempting to use uninitialized variables: ", def().input(0)));
+    OP_REQUIRES(
+        ctx, accum_grad.IsInitialized(),
+        errors::FailedPrecondition(
+            "Attempting to use uninitialized variables: ", def().input(1)));
+    OP_REQUIRES(
+        ctx, accum_update.IsInitialized(),
+        errors::FailedPrecondition(
+            "Attempting to use uninitialized variables: ", def().input(2)));
+    OP_REQUIRES(
+        ctx, var.shape().IsSameSize(accum_grad.shape()),
+        errors::InvalidArgument("var and accum_grad do not have the same shape",
+                                var.shape().DebugString(), " ",
+                                accum_grad.shape().DebugString()));
+    OP_REQUIRES(
+        ctx, var.shape().IsSameSize(accum_update.shape()),
+        errors::InvalidArgument("var and accum_update do not have the same shape",
+                                var.shape().DebugString(), " ",
+                                accum_update.shape().DebugString()));
+    OP_REQUIRES(ctx, TensorShapeUtils::IsVectorOrHigher(var.shape()),
+                errors::InvalidArgument("var must be at least 1 dimensional"));
+
+    const Tensor& lr = ctx->input(3);
+    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(lr.shape()),
+                errors::InvalidArgument("lr is not a scalar: ",
+                                        lr.shape().DebugString()));
+    const Tensor& rho = ctx->input(4);
+    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(rho.shape()),
+                errors::InvalidArgument("rho is not a scalar: ",
+                                        rho.shape().DebugString()));
+    const Tensor& epsilon = ctx->input(5);
+    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(epsilon.shape()),
+                errors::InvalidArgument("epsilon is not a scalar: ",
+                                        epsilon.shape().DebugString()));
+    const Tensor& grad = ctx->input(6);
+    const Tensor& indices = ctx->input(7);
+    OP_REQUIRES(ctx, TensorShapeUtils::IsVector(indices.shape()),
+                errors::InvalidArgument("indices must be one-dimensional"));
+
+    for (int d = 1; d < var.dims(); d++) {
+      OP_REQUIRES(ctx, var.dim_size(d) == grad.dim_size(d),
+                  errors::InvalidArgument(strings::StrCat(
+                      "var and grad must match in dimension ", d)));
+    }
+    const Tindex N = indices.dim_size(0);
+    OP_REQUIRES(
+        ctx, grad.dim_size(0) == N,
+        errors::InvalidArgument(
+            "grad must be the same size as indices in the first dimension."));
+
+    if (N > 0) {
+      const Tindex first_dim_size = var.dim_size(0);
+      // Validate all the indices are in range
+      auto indices_vec = indices.vec<Tindex>();
+      for (Tindex i = 0; i < N; i++) {
+        const Tindex index = indices_vec(i);
+        OP_REQUIRES(ctx, index >= 0 && index < first_dim_size,
+                    errors::InvalidArgument(
+                        strings::StrCat("Index ", index, " at offset ", i,
+                                        " in indices is out of range")));
+      }
+
+      auto var_flat = var.flat_outer_dims<T>();
+      auto accum_grad_flat = accum_grad.flat_outer_dims<T>();
+      auto accum_update_flat = accum_update.flat_outer_dims<T>();
+      auto grad_flat = grad.flat_outer_dims<T>();
+      const T lr_scalar = lr.scalar<T>()();
+      const T rho_scalar = rho.scalar<T>()();
+      const T epsilon_scalar = epsilon.scalar<T>()();
+
+      for (Tindex i = 0; i < N; i++) {
+        const Tindex index = indices_vec(i);
+        auto accum_ = accum_grad_flat.template chip<0>(index);
+        auto accum_update_ = accum_update_flat.template chip<0>(index);
+        auto grad_ = grad_flat.template chip<0>(i);
+
+        accum_ = accum_ * accum_.constant(rho_scalar) + grad_.square() * grad_.constant(1 - rho_scalar);
+        const auto update = (accum_update_ + accum_update_.constant(epsilon_scalar)).sqrt() * (accum_ + accum_.constant(epsilon_scalar)).rsqrt() * grad_;
+        accum_update_ = accum_update_ * accum_update_.constant(rho_scalar) + update.square() * update.constant(1 - rho_scalar);
+
+        auto v = var_flat.template chip<0>(index);
+        v -= update * update.constant(lr_scalar);
+      }
+    }
+    if (use_exclusive_lock_) {
+      mu_var->unlock();
+    }
+
+    ctx->forward_ref_input_to_ref_output(0, 0);
+  }
+
+ private:
+  bool use_exclusive_lock_;
+};
+
+#define REGISTER_KERNELS(T, Tindices)                                \
+  REGISTER_KERNEL_BUILDER(Name("SparseApplyAdadelta")                \
+                              .Device(DEVICE_CPU)                    \
+                              .TypeConstraint<T>("T")                \
+                              .TypeConstraint<Tindices>("Tindices"), \
+                          SparseApplyAdadeltaOp<T, Tindices>);
+
+REGISTER_KERNELS(float, int32);
+REGISTER_KERNELS(float, int64);
+REGISTER_KERNELS(double, int32);
+REGISTER_KERNELS(double, int64);
+
+#undef REGISTER_KERNELS
+
+template <typename Device, typename T>
 class ApplyAdagradOp : public OpKernel {
  public:
   explicit ApplyAdagradOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
diff --git a/tensorflow/core/kernels/training_ops.h b/tensorflow/core/kernels/training_ops.h
index b2550bcd6f..6f46d64419 100644
--- a/tensorflow/core/kernels/training_ops.h
+++ b/tensorflow/core/kernels/training_ops.h
@@ -34,6 +34,17 @@ struct ApplyGradientDescent {
 };
 
 template <typename Device, typename T>
+struct ApplyAdadelta {
+  void operator()(const Device& d, typename TTypes<T>::Flat var,
+                  typename TTypes<T>::Flat accum,
+                  typename TTypes<T>::Flat accum_update,
+                  typename TTypes<T>::ConstScalar lr,
+                  typename TTypes<T>::ConstScalar rho,
+                  typename TTypes<T>::ConstScalar epsilon,
+                  typename TTypes<T>::ConstFlat grad);
+};
+
+template <typename Device, typename T>
 struct ApplyAdagrad {
   void operator()(const Device& d, typename TTypes<T>::Flat var,
                   typename TTypes<T>::Flat accum,
diff --git a/tensorflow/core/kernels/training_ops_gpu.cu.cc b/tensorflow/core/kernels/training_ops_gpu.cu.cc
index ffd3d580f2..22570ebd5a 100644
--- a/tensorflow/core/kernels/training_ops_gpu.cu.cc
+++ b/tensorflow/core/kernels/training_ops_gpu.cu.cc
@@ -52,6 +52,33 @@ struct ApplyAdagrad<GPUDevice, T> {
 };
 
 template <typename T>
+struct ApplyAdadelta<GPUDevice, T> {
+  void operator()(const GPUDevice& d, typename TTypes<T>::Flat var,
+                  typename TTypes<T>::Flat accum,
+                  typename TTypes<T>::Flat accum_update,
+                  typename TTypes<T>::ConstScalar lr,
+                  typename TTypes<T>::ConstScalar rho,
+                  typename TTypes<T>::ConstScalar epsilon,
+                  typename TTypes<T>::ConstFlat grad) {
+    Eigen::array<typename TTypes<T>::Tensor::Index, 1> bcast;
+    bcast[0] = grad.dimension(0);
+    Eigen::Sizes<1> single;
+
+    accum.device(d) =
+        accum_update * rho.reshape(single).broadcast(bcast) +
+        grad.square() * (grad.constant(1) - rho.reshape(single).broadcast(bcast));
+    const auto update =
+        (accum_update + epsilon.reshape(single).broadcast(bcast)).sqrt() *
+        (accum + epsilon.reshape(single).broadcast(bcast)).rsqrt() * grad;
+    accum_update.device(d) =
+        accum_update * rho.reshape(single).broadcast(bcast) +
+        update.square() * (grad.constant(1) - rho.reshape(single).broadcast(bcast));
+    var.device(d) -= update * lr.reshape(single).broadcast(bcast);
+  }
+};
+
+
+template <typename T>
 struct ApplyMomentum<GPUDevice, T> {
   void operator()(const GPUDevice& d, typename TTypes<T>::Flat var,
                   typename TTypes<T>::Flat accum,
@@ -129,6 +156,9 @@ template struct functor::ApplyGradientDescent<GPUDevice, double>;
 template struct functor::ApplyAdagrad<GPUDevice, float>;
 template struct functor::ApplyAdagrad<GPUDevice, double>;
 
+template struct functor::ApplyAdadelta<GPUDevice, float>;
+template struct functor::ApplyAdadelta<GPUDevice, double>;
+
 template struct functor::ApplyMomentum<GPUDevice, float>;
 template struct functor::ApplyMomentum<GPUDevice, double>;
 
diff --git a/tensorflow/core/ops/compat/ops_history.v0.pbtxt b/tensorflow/core/ops/compat/ops_history.v0.pbtxt
index 634b862ce7..6df684e24d 100644
--- a/tensorflow/core/ops/compat/ops_history.v0.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v0.pbtxt
@@ -435,6 +435,74 @@ op {
   }
 }
 op {
+  name: "ApplyAdadelta"
+  input_arg {
+    name: "var"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "accum"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "accum_update"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "rho"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "out"
+    type_attr: "T"
+    is_ref: true
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
+op {
   name: "ApplyAdagrad"
   input_arg {
     name: "var"
@@ -7829,6 +7897,27 @@ op {
   }
 }
 op {
+  name: "LogSoftmax"
+  input_arg {
+    name: "logits"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "logsoftmax"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
   name: "LogUniformCandidateSampler"
   input_arg {
     name: "true_classes"
@@ -14551,6 +14640,88 @@ op {
   }
 }
 op {
+  name: "SparseApplyAdadelta"
+  input_arg {
+    name: "var"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "accum"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "accum_update"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "rho"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
+  output_arg {
+    name: "out"
+    type_attr: "T"
+    is_ref: true
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
+op {
   name: "SparseApplyAdagrad"
   input_arg {
     name: "var"
diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
index 4ccec0f6b9..6db0289f1f 100644
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@@ -243,7 +243,7 @@ performs the following:
 3. For each patch, right-multiplies the filter matrix and the image patch
    vector.
 
-In detail, with the default NCHW format,
+In detail, with the default NHWC format,
 
     output[b, i, j, k] =
         sum_{di, dj, q} input[b, strides[1] * i + di, strides[2] * j + dj, q] *
@@ -715,6 +715,23 @@ softmax: Same shape as `logits`.
 
 // --------------------------------------------------------------------------
 
+REGISTER_OP("LogSoftmax")
+    .Input("logits: T")
+    .Output("logsoftmax: T")
+    .Attr("T: {float, double}")
+    .Doc(R"doc(
+Computes log softmax activations.
+
+For each batch `i` and class `j` we have
+
+    logsoftmax[i, j] = logits[i, j] - log(sum(exp(logits[i])))
+
+logits: 2-D with shape `[batch_size, num_classes]`.
+logsoftmax: Same shape as `logits`.
+)doc");
+
+// --------------------------------------------------------------------------
+
 REGISTER_OP("SoftmaxCrossEntropyWithLogits")
     .Input("features: T")
     .Input("labels: T")
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index c7e8902c9a..56ce0e2a5d 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -290,6 +290,85 @@ op {
   description: "Reduces `input` along the dimensions given in `reduction_indices`. Unless\n`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in\n`reduction_indices`. If `keep_dims` is true, the reduced dimensions are\nretained with length 1."
 }
 op {
+  name: "ApplyAdadelta"
+  input_arg {
+    name: "var"
+    description: "Should be from a Variable()."
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "accum"
+    description: "Should be from a Variable()."
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "accum_update"
+    description: "Should be from a Variable()."
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "lr"
+    description: "Scaling factor. Must be a scalar."
+    type_attr: "T"
+  }
+  input_arg {
+    name: "rho"
+    description: "Decay factor. Must be a scalar."
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    description: "Constant factor. Must be a scalar."
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    description: "The gradient."
+    type_attr: "T"
+  }
+  output_arg {
+    name: "out"
+    description: "Same as \"var\"."
+    type_attr: "T"
+    is_ref: true
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+    description: "If True, updating of the var, accum and update_accum tensors will be protected by\na lock; otherwise the behavior is undefined, but may exhibit less contention."
+  }
+  summary: "Update \'*var\' according to the adadelta scheme."
+  description: "accum = rho() * accum + (1 - rho()) * grad.square();\nupdate = (update_accum + epsilon).sqrt() * (accum + epsilon()).rsqrt() * grad;\nupdate_accum = rho() * update_accum + (1 - rho()) * update.square();\nvar -= update;"
+}
+op {
   name: "ApplyAdagrad"
   input_arg {
     name: "var"
@@ -2334,7 +2413,7 @@ op {
     }
   }
   summary: "Computes a 2-D convolution given 4-D `input` and `filter` tensors."
-  description: "Given an input tensor of shape `[batch, in_height, in_width, in_channels]`\nand a filter / kernel tensor of shape\n`[filter_height, filter_width, in_channels, out_channels]`, this op\nperforms the following:\n\n1. Flattens the filter to a 2-D matrix with shape\n   `[filter_height * filter_width * in_channels, output_channels]`.\n2. Extracts image patches from the input tensor to form a *virtual*\n   tensor of shape `[batch, out_height, out_width,\n   filter_height * filter_width * in_channels]`.\n3. For each patch, right-multiplies the filter matrix and the image patch\n   vector.\n\nIn detail, with the default NCHW format,\n\n    output[b, i, j, k] =\n        sum_{di, dj, q} input[b, strides[1] * i + di, strides[2] * j + dj, q] *\n                        filter[di, dj, q, k]\n\nMust have `strides[0] = strides[3] = 1`.  For the most common case of the same\nhorizontal and vertices strides, `strides = [1, stride, stride, 1]`."
+  description: "Given an input tensor of shape `[batch, in_height, in_width, in_channels]`\nand a filter / kernel tensor of shape\n`[filter_height, filter_width, in_channels, out_channels]`, this op\nperforms the following:\n\n1. Flattens the filter to a 2-D matrix with shape\n   `[filter_height * filter_width * in_channels, output_channels]`.\n2. Extracts image patches from the input tensor to form a *virtual*\n   tensor of shape `[batch, out_height, out_width,\n   filter_height * filter_width * in_channels]`.\n3. For each patch, right-multiplies the filter matrix and the image patch\n   vector.\n\nIn detail, with the default NHWC format,\n\n    output[b, i, j, k] =\n        sum_{di, dj, q} input[b, strides[1] * i + di, strides[2] * j + dj, q] *\n                        filter[di, dj, q, k]\n\nMust have `strides[0] = strides[3] = 1`.  For the most common case of the same\nhorizontal and vertices strides, `strides = [1, stride, stride, 1]`."
 }
 op {
   name: "Conv2DBackpropFilter"
@@ -4868,6 +4947,31 @@ op {
   description: "I.e., \\\\(y = \\log_e x\\\\)."
 }
 op {
+  name: "LogSoftmax"
+  input_arg {
+    name: "logits"
+    description: "2-D with shape `[batch_size, num_classes]`."
+    type_attr: "T"
+  }
+  output_arg {
+    name: "logsoftmax"
+    description: "Same shape as `logits`."
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  summary: "Computes log softmax activations."
+  description: "For each batch `i` and class `j` we have\n\n    logsoftmax[i, j] = logits[i, j] - log(sum(exp(logits[i])))"
+}
+op {
   name: "LogUniformCandidateSampler"
   input_arg {
     name: "true_classes"
@@ -9406,6 +9510,89 @@ op {
   description: "The input `SparseTensor` objects\' indices are assumed ordered in standard\nlexicographic order.  If this is not the case, before this step run\n`SparseReorder` to restore index ordering.\n\nBy default, if two values sum to zero at some index, the output `SparseTensor`\nwould still include that particular location in its index, storing a zero in the\ncorresponding value slot.  To override this, callers can specify `thresh`,\nindicating that if the sum has a magnitude strictly smaller than `thresh`, its\ncorresponding value and index would then not be included.  In particular,\n`thresh == 0` (default) means everything is kept and actual thresholding happens\nonly for a positive value.\n\nIn the following shapes, `nnz` is the count after taking `thresh` into account."
 }
 op {
+  name: "SparseApplyAdadelta"
+  input_arg {
+    name: "var"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "accum"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "accum_update"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "rho"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
+  output_arg {
+    name: "out"
+    type_attr: "T"
+    is_ref: true
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  summary: "var: Should be from a Variable()."
+}
+op {
   name: "SparseApplyAdagrad"
   input_arg {
     name: "var"
diff --git a/tensorflow/core/ops/training_ops.cc b/tensorflow/core/ops/training_ops.cc
index 9dbdff6ee6..ddb214442a 100644
--- a/tensorflow/core/ops/training_ops.cc
+++ b/tensorflow/core/ops/training_ops.cc
@@ -35,6 +35,64 @@ use_locking: If True, the subtraction will be protected by a lock;
   otherwise the behavior is undefined, but may exhibit less contention.
 )doc");
 
+REGISTER_OP("ApplyAdadelta")
+    .Input("var: Ref(T)")
+    .Input("accum: Ref(T)")
+    .Input("accum_update: Ref(T)")
+    .Input("lr: T")
+    .Input("rho: T")
+    .Input("epsilon: T")
+    .Input("grad: T")
+    .Output("out: Ref(T)")
+    .Attr("T: numbertype")
+    .Attr("use_locking: bool = false")
+    .Doc(R"doc(
+Update '*var' according to the adadelta scheme.
+
+accum = rho() * accum + (1 - rho()) * grad.square();
+update = (update_accum + epsilon).sqrt() * (accum + epsilon()).rsqrt() * grad;
+update_accum = rho() * update_accum + (1 - rho()) * update.square();
+var -= update;
+
+var: Should be from a Variable().
+accum: Should be from a Variable().
+accum_update: Should be from a Variable().
+lr: Scaling factor. Must be a scalar.
+rho: Decay factor. Must be a scalar.
+epsilon: Constant factor. Must be a scalar.
+grad: The gradient.
+out: Same as "var".
+use_locking: If True, updating of the var, accum and update_accum tensors will be protected by
+a lock; otherwise the behavior is undefined, but may exhibit less contention.
+)doc");
+
+REGISTER_OP("SparseApplyAdadelta")
+    .Input("var: Ref(T)")
+    .Input("accum: Ref(T)")
+    .Input("accum_update: Ref(T)")
+    .Input("lr: T")
+    .Input("rho: T")
+    .Input("epsilon: T")
+    .Input("grad: T")
+    .Input("indices: Tindices")
+    .Output("out: Ref(T)")
+    .Attr("T: numbertype")
+    .Attr("Tindices: {int32, int64}")
+    .Attr("use_locking: bool = false")
+    .Doc(R"doc(
+var: Should be from a Variable().
+accum_grad: Should be from a Variable().
+accum_update:: Should be from a Variable().
+lr: Learning rate. Must be a scalar.
+rho: Decay factor. Must be a scalar.
+epsilon: Constant factor. Must be a scalar.
+grad: The gradient.
+indices: A vector of indices into the first dimension of var and accum.
+out: Same as "var".
+use_locking: If True, updating of the var and accum tensors will be protected by
+a lock; otherwise the behavior is undefined, but may exhibit less contention.
+)doc");
+
 REGISTER_OP("ApplyAdagrad")
     .Input("var: Ref(T)")
     .Input("accum: Ref(T)")
diff --git a/tensorflow/core/util/cuda_kernel_helper.h b/tensorflow/core/util/cuda_kernel_helper.h
index 6124ddd1bf..a86567a7cc 100644
--- a/tensorflow/core/util/cuda_kernel_helper.h
+++ b/tensorflow/core/util/cuda_kernel_helper.h
@@ -104,6 +104,11 @@ CUDA_ATOMIC_WRAPPER(Add, double) {
   return __longlong_as_double(old);
 }
 
+template <typename T>
+__global__ void SetZero(const int nthreads, T* bottom_diff) {
+  CUDA_1D_KERNEL_LOOP(index, nthreads) { *(bottom_diff + index) = T(0); }
+}
+
 // For atomicSub.
 
 // Custom implementation for sub by just negating the value.
diff --git a/tensorflow/examples/skflow/README.md b/tensorflow/examples/skflow/README.md
new file mode 100644
index 0000000000..24b447cd7a
--- /dev/null
+++ b/tensorflow/examples/skflow/README.md
@@ -0,0 +1,49 @@
+# Examples of Using skflow
+
+Scikit Flow is high level API that allows to create, 
+train and use deep learning models easily with well
+known Scikit Learn API.
+
+To run this exampels you need to have `scikit learn` library installed (`sudo pip install sklearn`).
+Some examples use `pandas` library for data processing (`sudo pip install pandas`).
+
+* [Deep Neural Network Regression with Boston Data](boston.py)
+* [Convolutional Neural Networks with Digits Data](digits.py)
+* [Deep Neural Network Classification with Iris Data](iris.py)
+* [Deep Neural Network with Customized Decay Function](iris_custom_decay_dnn.py)
+* [Building A Custom Model](iris_custom_model.py)
+* [Accessing Weights and Biases in A Custom Model](mnist_weights.py)
+* [Building A Custom Model Using Multiple GPUs](multiple_gpu.py)
+* [Building A Model Using Different GPU Configurations](iris_config_addon.py)
+* [Using skflow with Pipeline](iris_with_pipeline.py)
+* [Example of Saving and Restoring Models](iris_save_restore.py)
+* [Multi-output Deep Neural Network regression](multioutput_regression.py)
+* [Improving Performance Using Early Stopping with Iris Data](iris_val_based_early_stopping.py)
+* [Out-of-core Data Classification Using Dask](out_of_core_data_classification.py)
+
+
+## Image classification
+
+* [Convolutional Neural Networks on MNIST Data](mnist.py)
+* [Deep Residual Networks on MNIST Data](resnet.py)
+
+
+## Text classification
+
+* [Text Classification Using Recurrent Neural Networks on Words](text_classification.py) 
+(See also [Simplified Version Using Built-in RNN Model](text_classification_builtin_rnn_model.py) using built-in parameters)
+* [Text Classification Using Convolutional Neural Networks on Words](text_classification_cnn.py)
+* [Text Classification Using Recurrent Neural Networks on Characters](text_classification_character_rnn.py)
+* [Text Classification Using Convolutional Neural Networks on Characters](text_classification_character_cnn.py)
+
+
+## Language modeling
+
+* [Character level language modeling](language_model.py)
+
+
+## Text sequence to sequence
+
+* [Character level neural language translation](neural_translation.py)
+* [Word level neural language translation](neural_translation_word.py)
+
diff --git a/tensorflow/examples/skflow/boston.py b/tensorflow/examples/skflow/boston.py
new file mode 100644
index 0000000000..2f8ff80a74
--- /dev/null
+++ b/tensorflow/examples/skflow/boston.py
@@ -0,0 +1,43 @@
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+from __future__ import division, print_function, absolute_import
+
+from sklearn import datasets, cross_validation, metrics
+from sklearn import preprocessing
+
+from tensorflow.contrib import skflow
+
+# Load dataset
+boston = datasets.load_boston()
+X, y = boston.data, boston.target
+
+# Split dataset into train / test
+X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y,
+    test_size=0.2, random_state=42)
+
+# scale data (training set) to 0 mean and unit Std. dev
+scaler = preprocessing.StandardScaler()
+X_train = scaler.fit_transform(X_train)
+
+# Build 2 layer fully connected DNN with 10, 10 units respecitvely.
+regressor = skflow.TensorFlowDNNRegressor(hidden_units=[10, 10],
+    steps=5000, learning_rate=0.1, batch_size=1)
+
+# Fit
+regressor.fit(X_train, y_train)
+
+# Predict and score
+score = metrics.mean_squared_error(regressor.predict(scaler.fit_transform(X_test)), y_test)
+
+print('MSE: {0:f}'.format(score))
diff --git a/tensorflow/examples/skflow/digits.py b/tensorflow/examples/skflow/digits.py
new file mode 100644
index 0000000000..167ac7cdab
--- /dev/null
+++ b/tensorflow/examples/skflow/digits.py
@@ -0,0 +1,58 @@
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+from sklearn import datasets, cross_validation, metrics
+import tensorflow as tf
+
+from tensorflow.contrib import skflow
+from tensorflow.contrib.skflow import monitors
+
+# Load dataset
+
+digits = datasets.load_digits()
+X = digits.images
+y = digits.target
+
+# Split it into train / test subsets
+
+X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y,
+                                                                     test_size=0.2,
+                                                                     random_state=42)
+
+# Split X_train again to create validation data
+
+X_train, X_val, y_train, y_val = cross_validation.train_test_split(X_train,
+                                                                   y_train,
+                                                                   test_size=0.2,
+                                                                   random_state=42)
+
+# TensorFlow model using Scikit Flow ops
+
+
+def conv_model(X, y):
+    X = tf.expand_dims(X, 3)
+    features = tf.reduce_max(skflow.ops.conv2d(X, 12, [3, 3]), [1, 2])
+    features = tf.reshape(features, [-1, 12])
+    return skflow.models.logistic_regression(features, y)
+
+val_monitor = monitors.ValidationMonitor(X_val, y_val, n_classes=10, print_steps=50)
+# Create a classifier, train and predict.
+classifier = skflow.TensorFlowEstimator(model_fn=conv_model, n_classes=10,
+                                        steps=1000, learning_rate=0.05,
+                                        batch_size=128)
+classifier.fit(X_train, y_train, val_monitor)
+score = metrics.accuracy_score(y_test, classifier.predict(X_test))
+print('Test Accuracy: {0:f}'.format(score))
diff --git a/tensorflow/examples/skflow/iris.py b/tensorflow/examples/skflow/iris.py
new file mode 100644
index 0000000000..054e6d4905
--- /dev/null
+++ b/tensorflow/examples/skflow/iris.py
@@ -0,0 +1,33 @@
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+from sklearn import datasets, metrics, cross_validation
+
+from tensorflow.contrib import skflow
+
+# Load dataset.
+iris = datasets.load_iris()
+X_train, X_test, y_train, y_test = cross_validation.train_test_split(iris.data, iris.target,
+    test_size=0.2, random_state=42)
+
+# Build 3 layer DNN with 10, 20, 10 units respecitvely.
+classifier = skflow.TensorFlowDNNClassifier(hidden_units=[10, 20, 10],
+    n_classes=3, steps=200)
+
+# Fit and predict.
+classifier.fit(X_train, y_train)
+score = metrics.accuracy_score(y_test, classifier.predict(X_test))
+print('Accuracy: {0:f}'.format(score))
diff --git a/tensorflow/examples/skflow/iris_config_addon.py b/tensorflow/examples/skflow/iris_config_addon.py
new file mode 100644
index 0000000000..6c053129f2
--- /dev/null
+++ b/tensorflow/examples/skflow/iris_config_addon.py
@@ -0,0 +1,41 @@
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import absolute_import
+
+from sklearn import datasets, metrics, cross_validation
+
+from tensorflow.contrib import skflow
+
+
+# Load dataset.
+iris = datasets.load_iris()
+X_train, X_test, y_train, y_test = cross_validation.train_test_split(iris.data, iris.target,
+    test_size=0.2, random_state=42)
+
+# You can define you configurations by providing a ConfigAddon object to
+# estimator to control session configurations, e.g. num_cores and gpu_memory_fraction
+config_addon = skflow.addons.ConfigAddon(num_cores=3, gpu_memory_fraction=0.6)
+
+# Build 3 layer DNN with 10, 20, 10 units respecitvely.
+classifier = skflow.TensorFlowDNNClassifier(hidden_units=[10, 20, 10],
+    n_classes=3, steps=200, config_addon=config_addon)
+
+# Fit and predict.
+classifier.fit(X_train, y_train)
+score = metrics.accuracy_score(y_test, classifier.predict(X_test))
+print('Accuracy: {0:f}'.format(score))
diff --git a/tensorflow/examples/skflow/iris_custom_decay_dnn.py b/tensorflow/examples/skflow/iris_custom_decay_dnn.py
new file mode 100644
index 0000000000..ae221255b1
--- /dev/null
+++ b/tensorflow/examples/skflow/iris_custom_decay_dnn.py
@@ -0,0 +1,38 @@
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+from sklearn import datasets, metrics
+from sklearn.cross_validation import train_test_split
+import tensorflow as tf
+from tensorflow.contrib import skflow
+
+iris = datasets.load_iris()
+X_train, X_test, y_train, y_test = train_test_split(iris.data,
+                                                    iris.target,
+                                                    test_size=0.2,
+                                                    random_state=42)
+# setup exponential decay function
+def exp_decay(global_step):
+    return tf.train.exponential_decay(
+        learning_rate=0.1, global_step=global_step,
+        decay_steps=100, decay_rate=0.001)
+
+# use customized decay function in learning_rate
+classifier = skflow.TensorFlowDNNClassifier(hidden_units=[10, 20, 10],
+                                            n_classes=3, steps=800,
+                                            learning_rate=exp_decay)
+classifier.fit(X_train, y_train)
+score = metrics.accuracy_score(y_test, classifier.predict(X_test))
diff --git a/tensorflow/examples/skflow/iris_custom_model.py b/tensorflow/examples/skflow/iris_custom_model.py
new file mode 100644
index 0000000000..8aa02b4173
--- /dev/null
+++ b/tensorflow/examples/skflow/iris_custom_model.py
@@ -0,0 +1,33 @@
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+from sklearn import datasets, metrics, cross_validation
+from tensorflow.contrib import skflow
+
+iris = datasets.load_iris()
+X_train, X_test, y_train, y_test = cross_validation.train_test_split(iris.data, iris.target,
+    test_size=0.2, random_state=42)
+
+def my_model(X, y):
+    """This is DNN with 10, 20, 10 hidden layers, and dropout of 0.9 probability."""
+    layers = skflow.ops.dnn(X, [10, 20, 10], keep_prob=0.9)
+    return skflow.models.logistic_regression(layers, y)
+
+classifier = skflow.TensorFlowEstimator(model_fn=my_model, n_classes=3,
+    steps=1000)
+classifier.fit(X_train, y_train)
+score = metrics.accuracy_score(y_test, classifier.predict(X_test))
+print('Accuracy: {0:f}'.format(score))
diff --git a/tensorflow/examples/skflow/iris_save_restore.py b/tensorflow/examples/skflow/iris_save_restore.py
new file mode 100644
index 0000000000..a93287ff39
--- /dev/null
+++ b/tensorflow/examples/skflow/iris_save_restore.py
@@ -0,0 +1,44 @@
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+import shutil
+
+from sklearn import datasets, metrics, cross_validation
+from tensorflow.contrib import skflow
+
+iris = datasets.load_iris()
+X_train, X_test, y_train, y_test = cross_validation.train_test_split(iris.data, iris.target,
+    test_size=0.2, random_state=42)
+
+classifier = skflow.TensorFlowLinearClassifier(n_classes=3)
+classifier.fit(X_train, y_train)
+score = metrics.accuracy_score(y_test, classifier.predict(X_test))
+print('Accuracy: {0:f}'.format(score))
+
+# Clean checkpoint folder if exists
+try:
+    shutil.rmtree('/tmp/skflow_examples/iris_custom_model')
+except OSError:
+    pass
+
+# Save model, parameters and learned variables.
+classifier.save('/tmp/skflow_examples/iris_custom_model')
+classifier = None
+
+## Restore everything
+new_classifier = skflow.TensorFlowEstimator.restore('/tmp/skflow_examples/iris_custom_model')
+score = metrics.accuracy_score(y_test, new_classifier.predict(X_test))
+print('Accuracy: {0:f}'.format(score))
diff --git a/tensorflow/examples/skflow/iris_val_based_early_stopping.py b/tensorflow/examples/skflow/iris_val_based_early_stopping.py
new file mode 100644
index 0000000000..652ab1de06
--- /dev/null
+++ b/tensorflow/examples/skflow/iris_val_based_early_stopping.py
@@ -0,0 +1,48 @@
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+from sklearn import datasets, metrics
+from sklearn.cross_validation import train_test_split
+
+from tensorflow.contrib import skflow
+
+
+iris = datasets.load_iris()
+X_train, X_test, y_train, y_test = train_test_split(iris.data,
+                                                    iris.target,
+                                                    test_size=0.2,
+                                                    random_state=42)
+
+X_train, X_val, y_train, y_val = train_test_split(X_train, y_train,
+                                                  test_size=0.2, random_state=42)
+val_monitor = skflow.monitors.ValidationMonitor(X_val, y_val,
+                                                early_stopping_rounds=200,
+                                                n_classes=3)
+
+# classifier with early stopping on training data
+classifier1 = skflow.TensorFlowDNNClassifier(hidden_units=[10, 20, 10],
+                                             n_classes=3, steps=2000)
+classifier1.fit(X_train, y_train)
+score1 = metrics.accuracy_score(y_test, classifier1.predict(X_test))
+
+# classifier with early stopping on validation data
+classifier2 = skflow.TensorFlowDNNClassifier(hidden_units=[10, 20, 10],
+                                             n_classes=3, steps=2000)
+classifier2.fit(X_train, y_train, val_monitor)
+score2 = metrics.accuracy_score(y_test, classifier2.predict(X_test))
+
+# in many applications, the score is improved by using early stopping on val data
+print(score2 > score1)
diff --git a/tensorflow/examples/skflow/iris_with_pipeline.py b/tensorflow/examples/skflow/iris_with_pipeline.py
new file mode 100644
index 0000000000..c60f945c6f
--- /dev/null
+++ b/tensorflow/examples/skflow/iris_with_pipeline.py
@@ -0,0 +1,43 @@
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import absolute_import
+
+from sklearn.pipeline import Pipeline
+from sklearn.datasets import load_iris
+from sklearn import cross_validation
+from sklearn.preprocessing import StandardScaler
+from sklearn.metrics import accuracy_score
+from tensorflow.contrib import skflow
+
+iris = load_iris()
+X_train, X_test, y_train, y_test = cross_validation.train_test_split(iris.data, iris.target,
+    test_size=0.2, random_state=42)
+
+# It's useful to scale to ensure Stochastic Gradient Descent will do the right thing
+scaler = StandardScaler()
+
+# DNN classifier
+DNNclassifier = skflow.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=3, steps=200)
+
+pipeline = Pipeline([('scaler', scaler, ('DNNclassifier', DNNclassifier)])
+
+pipeline.fit(X_train, y_train)
+
+score = accuracy_score(y_test, pipeline.predict(X_test))
+
+print('Accuracy: {0:f}'.format(score))
diff --git a/tensorflow/examples/skflow/language_model.py b/tensorflow/examples/skflow/language_model.py
new file mode 100644
index 0000000000..b681b51e64
--- /dev/null
+++ b/tensorflow/examples/skflow/language_model.py
@@ -0,0 +1,100 @@
+# encoding: utf-8
+
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+import itertools
+import math
+import os
+import numpy as np
+
+import tensorflow as tf
+from tensorflow.contrib import skflow
+
+### Training data
+
+CORPUS_FILENAME = "europarl-v6.fr-en.en"
+MAX_DOC_LENGTH = 10
+
+def training_data(filename):
+    f = open(filename)
+    for line in f:
+        yield line
+
+
+def iter_docs(docs):
+  for doc in docs:
+    n_parts = int(math.ceil(float(len(doc)) / MAX_DOC_LENGTH))
+    for part in range(n_parts):
+      offset_begin = part * MAX_DOC_LENGTH
+      offset_end = offset_begin + MAX_DOC_LENGTH
+      inp = np.zeros(MAX_DOC_LENGTH, dtype=np.int32)
+      out = np.zeros(MAX_DOC_LENGTH, dtype=np.int32)
+      inp[:min(offset_end - offset_begin, len(doc) - offset_begin)] = doc[offset_begin:offset_end]
+      out[:min(offset_end - offset_begin, len(doc) - offset_begin - 1)] = doc[offset_begin + 1:offset_end + 1]
+      yield inp, out
+
+
+def unpack_xy(iter_obj):
+  X, y = itertools.tee(iter_obj)
+  return (item[0] for item in X), (item[1] for item in y)
+
+
+byte_processor = skflow.preprocessing.ByteProcessor(
+    max_document_length=MAX_DOC_LENGTH)
+
+data = training_data(CORPUS_FILENAME)
+data = byte_processor.transform(data)
+X, y = unpack_xy(iter_docs(data))
+
+
+### Model
+
+HIDDEN_SIZE = 10
+
+
+def seq_autoencoder(X, y):
+    """Sequence auto-encoder with RNN."""
+    inputs = skflow.ops.one_hot_matrix(X, 256)
+    in_X, in_y, out_y = skflow.ops.seq2seq_inputs(inputs, y, MAX_DOC_LENGTH, MAX_DOC_LENGTH)
+    encoder_cell = tf.nn.rnn_cell.GRUCell(HIDDEN_SIZE)
+    decoder_cell = tf.nn.rnn_cell.OutputProjectionWrapper(tf.nn.rnn_cell.GRUCell(HIDDEN_SIZE), 256)
+    decoding, _, sampling_decoding, _ = skflow.ops.rnn_seq2seq(in_X, in_y, encoder_cell, decoder_cell)
+    return skflow.ops.sequence_classifier(decoding, out_y, sampling_decoding)
+
+
+def get_language_model(hidden_size):
+    """Returns a language model with given hidden size."""
+
+    def language_model(X, y):
+        inputs = skflow.ops.one_hot_matrix(X, 256)
+        inputs = skflow.ops.split_squeeze(1, MAX_DOC_LENGTH, inputs)
+        target = skflow.ops.split_squeeze(1, MAX_DOC_LENGTH, y)
+        encoder_cell = tf.nn.rnn_cell.OutputProjectionWrapper(tf.nn.rnn_cell.GRUCell(hidden_size),256)
+        output, _ = tf.nn.rnn(encoder_cell, inputs, dtype=tf.float32)
+        return skflow.ops.sequence_classifier(output, target)
+  
+    return language_model
+
+
+### Training model.
+
+estimator = skflow.TensorFlowEstimator(model_fn=get_language_model(HIDDEN_SIZE), 
+                                       n_classes=256, 
+                                       optimizer='Adam', learning_rate=0.01, 
+                                       steps=1000, batch_size=64, continue_training=True)
+
+estimator.fit(X, y)
diff --git a/tensorflow/examples/skflow/mnist.py b/tensorflow/examples/skflow/mnist.py
new file mode 100644
index 0000000000..7e2c935608
--- /dev/null
+++ b/tensorflow/examples/skflow/mnist.py
@@ -0,0 +1,72 @@
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+"""
+This example showcases how simple it is to build image classification networks.
+It follows description from this TensorFlow tutorial:
+    https://www.tensorflow.org/versions/master/tutorials/mnist/pros/index.html#deep-mnist-for-experts
+"""
+
+from __future__ import division, print_function, absolute_import
+
+from sklearn import metrics
+import tensorflow as tf
+from tensorflow.examples.tutorials.mnist import input_data
+from tensorflow.contrib import skflow
+
+### Download and load MNIST data.
+
+mnist = input_data.read_data_sets('MNIST_data')
+
+### Linear classifier.
+
+classifier = skflow.TensorFlowLinearClassifier(
+    n_classes=10, batch_size=100, steps=1000, learning_rate=0.01)
+classifier.fit(mnist.train.images, mnist.train.labels)
+score = metrics.accuracy_score(mnist.test.labels, classifier.predict(mnist.test.images))
+print('Accuracy: {0:f}'.format(score))
+
+### Convolutional network
+
+def max_pool_2x2(tensor_in):
+    return tf.nn.max_pool(tensor_in, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],
+        padding='SAME')
+
+def conv_model(X, y):
+    # reshape X to 4d tensor with 2nd and 3rd dimensions being image width and height
+    # final dimension being the number of color channels
+    X = tf.reshape(X, [-1, 28, 28, 1])
+    # first conv layer will compute 32 features for each 5x5 patch
+    with tf.variable_scope('conv_layer1'):
+        h_conv1 = skflow.ops.conv2d(X, n_filters=32, filter_shape=[5, 5], 
+                                    bias=True, activation=tf.nn.relu)
+        h_pool1 = max_pool_2x2(h_conv1)
+    # second conv layer will compute 64 features for each 5x5 patch
+    with tf.variable_scope('conv_layer2'):
+        h_conv2 = skflow.ops.conv2d(h_pool1, n_filters=64, filter_shape=[5, 5], 
+                                    bias=True, activation=tf.nn.relu)
+        h_pool2 = max_pool_2x2(h_conv2)
+        # reshape tensor into a batch of vectors
+        h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
+    # densely connected layer with 1024 neurons
+    h_fc1 = skflow.ops.dnn(h_pool2_flat, [1024], activation=tf.nn.relu, keep_prob=0.5)
+    return skflow.models.logistic_regression(h_fc1, y)
+
+# Training and predicting
+classifier = skflow.TensorFlowEstimator(
+    model_fn=conv_model, n_classes=10, batch_size=100, steps=20000,
+    learning_rate=0.001)
+classifier.fit(mnist.train.images, mnist.train.labels)
+score = metrics.accuracy_score(mnist.test.labels, classifier.predict(mnist.test.images))
+print('Accuracy: {0:f}'.format(score))
diff --git a/tensorflow/examples/skflow/mnist_weights.py b/tensorflow/examples/skflow/mnist_weights.py
new file mode 100644
index 0000000000..c478018a01
--- /dev/null
+++ b/tensorflow/examples/skflow/mnist_weights.py
@@ -0,0 +1,98 @@
+#t Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+"""
+This example demonstrates one way to access the weights of a custom skflow
+model. It is otherwise identical to the standard MNIST convolutional code.
+"""
+
+from __future__ import division, print_function, absolute_import
+
+from sklearn import metrics
+import tensorflow as tf
+from tensorflow.examples.tutorials.mnist import input_data
+from tensorflow.contrib import skflow
+
+### Download and load MNIST data.
+
+mnist = input_data.read_data_sets('MNIST_data')
+
+### Linear classifier.
+
+classifier = skflow.TensorFlowLinearClassifier(
+    n_classes=10, batch_size=100, steps=1000, learning_rate=0.01)
+classifier.fit(mnist.train.images, mnist.train.labels)
+score = metrics.accuracy_score(mnist.test.labels, classifier.predict(mnist.test.images))
+print('Accuracy: {0:f}'.format(score))
+
+### Convolutional network
+
+def max_pool_2x2(tensor_in):
+    return tf.nn.max_pool(tensor_in, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],
+        padding='SAME')
+
+def conv_model(X, y):
+    # reshape X to 4d tensor with 2nd and 3rd dimensions being image width and height
+    # final dimension being the number of color channels
+    X = tf.reshape(X, [-1, 28, 28, 1])
+    # first conv layer will compute 32 features for each 5x5 patch
+    with tf.variable_scope('conv_layer1'):
+        h_conv1 = skflow.ops.conv2d(X, n_filters=32, filter_shape=[5, 5], 
+                                    bias=True, activation=tf.nn.relu)
+        h_pool1 = max_pool_2x2(h_conv1)
+    # second conv layer will compute 64 features for each 5x5 patch
+    with tf.variable_scope('conv_layer2'):
+        h_conv2 = skflow.ops.conv2d(h_pool1, n_filters=64, filter_shape=[5, 5], 
+                                    bias=True, activation=tf.nn.relu)
+        h_pool2 = max_pool_2x2(h_conv2)
+        # reshape tensor into a batch of vectors
+        h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
+    # densely connected layer with 1024 neurons
+    h_fc1 = skflow.ops.dnn(h_pool2_flat, [1024], activation=tf.nn.relu, keep_prob=0.5)
+    return skflow.models.logistic_regression(h_fc1, y)
+
+# Training and predicting
+classifier = skflow.TensorFlowEstimator(
+    model_fn=conv_model, n_classes=10, batch_size=100, steps=20000,
+    learning_rate=0.001)
+classifier.fit(mnist.train.images, mnist.train.labels)
+score = metrics.accuracy_score(mnist.test.labels, classifier.predict(mnist.test.images))
+print('Accuracy: {0:f}'.format(score))
+
+# Examining fitted weights
+
+## General usage is classifier.get_tensor_value('foo')
+## 'foo' must be the variable scope of the desired tensor followed by the
+## graph path. 
+
+## To understand the mechanism and figure out the right scope and path, you can do logging.
+## Then use TensorBoard or a text editor on the log file to look at available strings.
+
+## First Convolutional Layer
+print('1st Convolutional Layer weights and Bias')
+print(classifier.get_tensor_value('conv_layer1/convolution/filters:0'))
+print(classifier.get_tensor_value('conv_layer1/convolution/bias:0'))
+
+## Second Convolutional Layer
+print('2nd Convolutional Layer weights and Bias')
+print(classifier.get_tensor_value('conv_layer2/convolution/filters:0'))
+print(classifier.get_tensor_value('conv_layer2/convolution/bias:0'))
+
+## Densely Connected Layer
+print('Densely Connected Layer weights')
+print(classifier.get_tensor_value('dnn/layer0/Linear/Matrix:0'))
+
+## Logistic Regression weights
+print('Logistic Regression weights')
+print(classifier.get_tensor_value('logistic_regression/weights:0'))
diff --git a/tensorflow/examples/skflow/multioutput_regression.py b/tensorflow/examples/skflow/multioutput_regression.py
new file mode 100644
index 0000000000..5b5d2b135d
--- /dev/null
+++ b/tensorflow/examples/skflow/multioutput_regression.py
@@ -0,0 +1,73 @@
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+"""
+This example uses the same data as one here:
+  http://scikit-learn.org/stable/auto_examples/tree/plot_tree_regression_multioutput.html
+
+Instead of DecisionTree a 2-layer Deep Neural Network with RELU activations is used.
+"""
+from __future__ import division, print_function, absolute_import
+
+
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn import datasets
+from sklearn.metrics import mean_squared_error
+
+from tensorflow.contrib import skflow
+
+# Create random dataset.
+rng = np.random.RandomState(1)
+X = np.sort(200 * rng.rand(100, 1) - 100, axis=0)
+y = np.array([np.pi * np.sin(X).ravel(), np.pi * np.cos(X).ravel()]).T
+
+# Fit regression DNN models.
+regressors = []
+options = [[2], [10, 10], [20, 20]]
+for hidden_units in options:
+    def tanh_dnn(X, y):
+        features = skflow.ops.dnn(X, hidden_units=hidden_units,
+          activation=skflow.tf.tanh)
+        return skflow.models.linear_regression(features, y)
+
+    regressor = skflow.TensorFlowEstimator(model_fn=tanh_dnn, n_classes=0,
+        steps=500, learning_rate=0.1, batch_size=100)
+    regressor.fit(X, y)
+    score = mean_squared_error(regressor.predict(X), y)
+    print("Mean Squared Error for {0}: {1:f}".format(str(hidden_units), score))
+    regressors.append(regressor)
+
+# Predict on new random Xs.
+X_test = np.arange(-100.0, 100.0, 0.1)[:, np.newaxis]
+y_1 = regressors[0].predict(X_test)
+y_2 = regressors[1].predict(X_test)
+y_3 = regressors[2].predict(X_test)
+
+# Plot the results
+plt.figure()
+plt.scatter(y[:, 0], y[:, 1], c="k", label="data")
+plt.scatter(y_1[:, 0], y_1[:, 1], c="g",
+    label="hidden_units{}".format(str(options[0])))
+plt.scatter(y_2[:, 0], y_2[:, 1], c="r",
+    label="hidden_units{}".format(str(options[1])))
+plt.scatter(y_3[:, 0], y_3[:, 1], c="b",
+    label="hidden_units{}".format(str(options[2])))
+plt.xlim([-6, 6])
+plt.ylim([-6, 6])
+plt.xlabel("data")
+plt.ylabel("target")
+plt.title("Multi-output DNN Regression")
+plt.legend()
+plt.show()
diff --git a/tensorflow/examples/skflow/multiple_gpu.py b/tensorflow/examples/skflow/multiple_gpu.py
new file mode 100644
index 0000000000..279bc5615f
--- /dev/null
+++ b/tensorflow/examples/skflow/multiple_gpu.py
@@ -0,0 +1,40 @@
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+from sklearn import datasets, metrics, cross_validation
+import tensorflow as tf
+from tensorflow.contrib import skflow
+
+iris = datasets.load_iris()
+X_train, X_test, y_train, y_test = cross_validation.train_test_split(iris.data, iris.target,
+    test_size=0.2, random_state=42)
+
+def my_model(X, y):
+    """
+    This is DNN with 10, 20, 10 hidden layers, and dropout of 0.5 probability.
+
+    Note: If you want to run this example with multiple GPUs, Cuda Toolkit 7.0 and 
+    CUDNN 6.5 V2 from NVIDIA need to be installed beforehand. 
+    """
+    with tf.device('/gpu:1'):
+    	layers = skflow.ops.dnn(X, [10, 20, 10], keep_prob=0.5)
+    with tf.device('/gpu:2'):
+    	return skflow.models.logistic_regression(layers, y)
+
+classifier = skflow.TensorFlowEstimator(model_fn=my_model, n_classes=3)
+classifier.fit(X_train, y_train)
+score = metrics.accuracy_score(y_test, classifier.predict(X_test))
+print('Accuracy: {0:f}'.format(score))
diff --git a/tensorflow/examples/skflow/neural_translation.py b/tensorflow/examples/skflow/neural_translation.py
new file mode 100644
index 0000000000..4583a8b250
--- /dev/null
+++ b/tensorflow/examples/skflow/neural_translation.py
@@ -0,0 +1,127 @@
+# encoding: utf-8
+
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+import itertools
+import os
+import numpy as np
+
+import tensorflow as tf
+from tensorflow.contrib import skflow
+
+# Get training data
+
+# This dataset can be downloaded from http://www.statmt.org/europarl/v6/fr-en.tgz
+
+ENGLISH_CORPUS = "europarl-v6.fr-en.en"
+FRENCH_CORPUS = "europarl-v6.fr-en.fr"
+
+def read_iterator(filename):
+    f = open(filename)
+    for line in f:
+        yield line.strip()
+
+
+def repeated_read_iterator(filename):
+    while True:
+        f = open(filename)
+        for line in f:
+            yield line.strip()
+
+
+def split_train_test(data, partition=0.2, random_seed=42):
+    rnd = np.random.RandomState(random_seed)
+    for item in data:
+        if rnd.uniform() > partition:
+            yield (0, item)
+        else:
+            yield (1, item)
+
+
+def save_partitions(data, filenames):
+    files = [open(filename, 'w') for filename in filenames]
+    for partition, item in data:
+        files[partition].write(item + '\n')
+
+
+def loop_iterator(data):
+    while True:
+        for item in data:
+            yield item
+
+
+if not (os.path.exists('train.data') and os.path.exists('test.data')):
+    english_data = read_iterator(ENGLISH_CORPUS)
+    french_data = read_iterator(FRENCH_CORPUS)
+    parallel_data = ('%s;;;%s' % (eng, fr) for eng, fr in itertools.izip(english_data, french_data))
+    save_partitions(split_train_test(parallel_data), ['train.data', 'test.data'])
+
+def Xy(data):
+    def split_lines(data):
+        for item in data:
+            yield item.split(';;;')
+    X, y = itertools.tee(split_lines(data))
+    return (item[0] for item in X), (item[1] for item in y)
+
+X_train, y_train = Xy(repeated_read_iterator('train.data'))
+X_test, y_test = Xy(read_iterator('test.data'))
+
+
+# Translation model
+
+MAX_DOCUMENT_LENGTH = 30
+HIDDEN_SIZE = 100
+
+def translate_model(X, y):
+    byte_list = skflow.ops.one_hot_matrix(X, 256)
+    in_X, in_y, out_y = skflow.ops.seq2seq_inputs(
+        byte_list, y, MAX_DOCUMENT_LENGTH, MAX_DOCUMENT_LENGTH)
+    cell = tf.nn.rnn_cell.OutputProjectionWrapper(tf.nn.rnn_cell.GRUCell(HIDDEN_SIZE), 256)
+    decoding, _, sampling_decoding, _ = skflow.ops.rnn_seq2seq(in_X, in_y, cell)
+    return skflow.ops.sequence_classifier(decoding, out_y, sampling_decoding)
+
+
+vocab_processor = skflow.preprocessing.ByteProcessor(
+    max_document_length=MAX_DOCUMENT_LENGTH)
+
+x_iter = vocab_processor.transform(X_train)
+y_iter = vocab_processor.transform(y_train)
+xpred = np.array(list(vocab_processor.transform(X_test))[:20])
+ygold = list(y_test)[:20]
+
+PATH = '/tmp/tf_examples/ntm/'
+
+if os.path.exists(PATH):
+    translator = skflow.TensorFlowEstimator.restore(PATH)
+else:
+    translator = skflow.TensorFlowEstimator(model_fn=translate_model,
+        n_classes=256,
+        optimizer='Adam', learning_rate=0.01, batch_size=128,
+        continue_training=True)
+
+while True:
+    translator.fit(x_iter, y_iter, logdir=PATH)
+    translator.save(PATH)
+
+    predictions = translator.predict(xpred, axis=2)
+    xpred_inp = vocab_processor.reverse(xpred)
+    text_outputs = vocab_processor.reverse(predictions)
+    for inp_data, input_text, pred, output_text, gold in zip(xpred, xpred_inp,
+        predictions, text_outputs, ygold):
+        print('English: %s. French (pred): %s, French (gold): %s' %
+            (input_text, output_text, gold.decode('utf-8')))
+        print(inp_data, pred)
diff --git a/tensorflow/examples/skflow/neural_translation_word.py b/tensorflow/examples/skflow/neural_translation_word.py
new file mode 100644
index 0000000000..2fa5f897bb
--- /dev/null
+++ b/tensorflow/examples/skflow/neural_translation_word.py
@@ -0,0 +1,167 @@
+# encoding: utf-8
+
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+import cPickle
+import itertools
+import os
+import random
+
+import numpy as np
+import tensorflow as tf
+
+import skflow
+
+# Get training data
+
+# This dataset can be downloaded from http://www.statmt.org/europarl/v6/fr-en.tgz
+
+ENGLISH_CORPUS = "europarl-v6.fr-en.en"
+FRENCH_CORPUS = "europarl-v6.fr-en.fr"
+
+def read_iterator(filename, reporting=True):
+    f = open(filename)
+    line_count = 0
+    for line in f:
+        line_count += 1
+        if reporting and line_count % 100000 == 0:
+            print("%d lines read from %s" % (line_count, filename))
+        yield line.strip()
+
+
+def repeated_read_iterator(filename):
+    while True:
+        f = open(filename)
+        for line in f:
+            yield line.strip()
+
+
+def split_train_test(data, partition=0.2, random_seed=42):
+    rnd = np.random.RandomState(random_seed)
+    for item in data:
+        if rnd.uniform() > partition:
+            yield (0, item)
+        else:
+            yield (1, item)
+
+
+def save_partitions(data, filenames):
+    files = [open(filename, 'w') for filename in filenames]
+    for partition, item in data:
+        files[partition].write(item + '\n')
+
+
+def loop_iterator(data):
+    while True:
+        for item in data:
+            yield item
+
+
+if not (os.path.exists('train.data') and os.path.exists('test.data')):
+    english_data = read_iterator(ENGLISH_CORPUS)
+    french_data = read_iterator(FRENCH_CORPUS)
+    parallel_data = ('%s;;;%s' % (eng, fr) for eng, fr in itertools.izip(english_data, french_data))
+    save_partitions(split_train_test(parallel_data), ['train.data', 'test.data'])
+
+def Xy(data):
+    def split_lines(data):
+        for item in data:
+            yield item.split(';;;')
+    X, y = itertools.tee(split_lines(data))
+    return (item[0] for item in X), (item[1] for item in y)
+
+X_train, y_train = Xy(repeated_read_iterator('train.data'))
+X_test, y_test = Xy(read_iterator('test.data'))
+
+# Preprocessing
+
+MAX_DOCUMENT_LENGTH = 10
+
+if not (os.path.exists('en.vocab') and os.path.exists('fr.vocab')):
+    X_vocab_processor = skflow.preprocessing.VocabularyProcessor(MAX_DOCUMENT_LENGTH,
+        min_frequency=5)
+    y_vocab_processor = skflow.preprocessing.VocabularyProcessor(MAX_DOCUMENT_LENGTH,
+        min_frequency=5)
+    Xtrainff, ytrainff = Xy(read_iterator('train.data'))
+    print('Fitting dictionary for English...')
+    X_vocab_processor.fit(Xtrainff)
+    print('Fitting dictionary for French...')
+    y_vocab_processor.fit(ytrainff)
+    open('en.vocab', 'w').write(cPickle.dumps(X_vocab_processor))
+    open('fr.vocab', 'w').write(cPickle.dumps(y_vocab_processor))
+else:
+    X_vocab_processor = cPickle.loads(open('en.vocab').read())
+    y_vocab_processor = cPickle.loads(open('fr.vocab').read())
+print('Transforming...')
+X_train = X_vocab_processor.transform(X_train)
+y_train = y_vocab_processor.transform(y_train)
+X_test = X_vocab_processor.transform(X_test)
+
+# TODO: Expand this to use the whole test set.
+X_test = np.array([X_test.next() for _ in range(1000)])
+y_test = [y_test.next() for _ in range(1000)]
+
+n_en_words = len(X_vocab_processor.vocabulary_)
+n_fr_words = len(y_vocab_processor.vocabulary_)
+print('Total words, en: %d, fr: %d' % (n_en_words, n_fr_words))
+
+# Translation model
+
+HIDDEN_SIZE = 20
+EMBEDDING_SIZE = 20
+
+def translate_model(X, y):
+    word_vectors = skflow.ops.categorical_variable(X, n_classes=n_en_words,
+        embedding_size=EMBEDDING_SIZE, name='words')
+    in_X, in_y, out_y = skflow.ops.seq2seq_inputs(
+        word_vectors, y, MAX_DOCUMENT_LENGTH, MAX_DOCUMENT_LENGTH)
+    encoder_cell = tf.nn.rnn_cell.GRUCell(HIDDEN_SIZE)
+    decoder_cell = tf.nn.rnn_cell.OutputProjectionWrapper(
+        tf.nn.rnn_cell.GRUCell(HIDDEN_SIZE), n_fr_words)
+    decoding, _, sampling_decoding, _ = skflow.ops.rnn_seq2seq(in_X, in_y,
+        encoder_cell, decoder_cell=decoder_cell)
+    return skflow.ops.sequence_classifier(decoding, out_y, sampling_decoding)
+
+
+PATH = '/tmp/tf_examples/ntm_words/'
+
+if os.path.exists(os.path.join(PATH, 'graph.pbtxt')):
+    translator = skflow.TensorFlowEstimator.restore(PATH)
+else:
+    translator = skflow.TensorFlowEstimator(model_fn=translate_model,
+        n_classes=n_fr_words,
+        optimizer='Adam', learning_rate=0.01, batch_size=128,
+        continue_training=True, steps=100)
+
+while True:
+    translator.fit(X_train, y_train, logdir=PATH)
+    translator.save(PATH)
+
+    xpred, ygold = [], []
+    for _ in range(10):
+        idx = random.randint(0, len(X_test) - 1)
+        xpred.append(X_test[idx])
+        ygold.append(y_test[idx])
+    xpred = np.array(xpred)
+    predictions = translator.predict(xpred, axis=2)
+    xpred_inp = X_vocab_processor.reverse(xpred)
+    text_outputs = y_vocab_processor.reverse(predictions)
+    for inp_data, input_text, pred, output_text, gold in zip(xpred, xpred_inp,
+        predictions, text_outputs, ygold):
+        print('English: %s. French (pred): %s, French (gold): %s' %
+            (input_text, output_text, gold))
+        print(inp_data, pred)
diff --git a/tensorflow/examples/skflow/out_of_core_data_classification.py b/tensorflow/examples/skflow/out_of_core_data_classification.py
new file mode 100644
index 0000000000..d040738eb9
--- /dev/null
+++ b/tensorflow/examples/skflow/out_of_core_data_classification.py
@@ -0,0 +1,50 @@
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+from sklearn import datasets, metrics, cross_validation
+
+import pandas as pd
+import dask.dataframe as dd
+
+from tensorflow.contrib import skflow
+
+# Sometimes when your dataset is too large to hold in the memory
+# you may want to load it into a out-of-core dataframe as provided by dask library
+# to firstly draw sample batches and then load into memory for training. 
+
+# Load dataset.
+iris = datasets.load_iris()
+X_train, X_test, y_train, y_test = cross_validation.train_test_split(iris.data, iris.target,
+    test_size=0.2, random_state=42)
+
+# Note that we use iris here just for demo purposes
+# You can load your own large dataset into a out-of-core dataframe
+# using dask's methods, e.g. read_csv() in dask
+# details please see: http://dask.pydata.org/en/latest/dataframe.html
+
+# We firstly load them into pandas dataframe and then convert into dask dataframe
+X_train, y_train, X_test, y_test = [pd.DataFrame(data) for data in [X_train, y_train, X_test, y_test]]
+X_train, y_train, X_test, y_test = [dd.from_pandas(data, npartitions=2) for data in [X_train, y_train, X_test, y_test]]
+
+# Intialize a TensorFlow linear classifier
+classifier = skflow.TensorFlowLinearClassifier(n_classes=3)
+
+# Fit the model using training set
+classifier.fit(X_train, y_train)
+# Make predictions on each partitions of testing data
+predictions = X_test.map_partitions(classifier.predict).compute()
+# Calculate accuracy
+score = metrics.accuracy_score(y_test.compute(), predictions)
diff --git a/tensorflow/examples/skflow/resnet.py b/tensorflow/examples/skflow/resnet.py
new file mode 100644
index 0000000000..7f02124a22
--- /dev/null
+++ b/tensorflow/examples/skflow/resnet.py
@@ -0,0 +1,157 @@
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+"""
+This example builds deep residual network for mnist data. 
+Reference Paper: http://arxiv.org/pdf/1512.03385.pdf
+
+Note that this is still a work-in-progress. Feel free to submit a PR
+to make this better. 
+"""
+
+from __future__ import division, print_function, absolute_import
+
+import os
+from collections import namedtuple
+from math import sqrt
+
+from sklearn import metrics
+import tensorflow as tf
+from tensorflow.examples.tutorials.mnist import input_data
+from tensorflow.contrib import skflow
+
+
+def res_net(x, y, activation=tf.nn.relu):
+    """Builds a residual network. Note that if the input tensor is 2D, it must be
+    square in order to be converted to a 4D tensor. 
+
+    Borrowed structure from here: https://github.com/pkmital/tensorflow_tutorials/blob/master/10_residual_network.py
+
+    Args:
+        x: Input of the network
+        y: Output of the network
+        activation: Activation function to apply after each convolution
+    """
+
+    # Configurations for each bottleneck block
+    BottleneckBlock = namedtuple(
+        'BottleneckBlock', ['num_layers', 'num_filters', 'bottleneck_size'])
+    blocks = [BottleneckBlock(3, 128, 32),
+              BottleneckBlock(3, 256, 64),
+              BottleneckBlock(3, 512, 128),
+              BottleneckBlock(3, 1024, 256)]
+
+    input_shape = x.get_shape().as_list()
+
+    # Reshape the input into the right shape if it's 2D tensor
+    if len(input_shape) == 2:
+        ndim = int(sqrt(input_shape[1]))
+        x = tf.reshape(x, [-1, ndim, ndim, 1])
+
+    # First convolution expands to 64 channels
+    with tf.variable_scope('conv_layer1'):
+        net = skflow.ops.conv2d(x, 64, [7, 7], batch_norm=True,
+                                activation=activation, bias=False)
+
+    # Max pool
+    net = tf.nn.max_pool(
+        net, [1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')
+
+    # First chain of resnets
+    with tf.variable_scope('conv_layer2'):
+        net = skflow.ops.conv2d(net, blocks[0].num_filters,
+                               [1, 1], [1, 1, 1, 1],
+                               padding='VALID', bias=True)
+
+    # Create each bottleneck building block for each layer
+    for block_i, block in enumerate(blocks):
+        for layer_i in range(block.num_layers):
+
+            name = 'block_%d/layer_%d' % (block_i, layer_i)
+
+            # 1x1 convolution responsible for reducing dimension
+            with tf.variable_scope(name + '/conv_in'):
+                conv = skflow.ops.conv2d(net, block.num_filters,
+                                         [1, 1], [1, 1, 1, 1],
+                                         padding='VALID',
+                                         activation=activation,
+                                         batch_norm=True,
+                                         bias=False)
+
+            with tf.variable_scope(name + '/conv_bottleneck'):
+                conv = skflow.ops.conv2d(conv, block.bottleneck_size,
+                                         [3, 3], [1, 1, 1, 1],
+                                         padding='SAME',
+                                         activation=activation,
+                                         batch_norm=True,
+                                         bias=False)
+
+            # 1x1 convolution responsible for restoring dimension
+            with tf.variable_scope(name + '/conv_out'):
+                conv = skflow.ops.conv2d(conv, block.num_filters,
+                                         [1, 1], [1, 1, 1, 1],
+                                         padding='VALID',
+                                         activation=activation,
+                                         batch_norm=True,
+                                         bias=False)
+
+            # shortcut connections that turn the network into its counterpart
+            # residual function (identity shortcut)
+            net = conv + net
+
+        try:
+            # upscale to the next block size
+            next_block = blocks[block_i + 1]
+            with tf.variable_scope('block_%d/conv_upscale' % block_i):
+                net = skflow.ops.conv2d(net, next_block.num_filters,
+                                        [1, 1], [1, 1, 1, 1],
+                                        bias=False,
+                                        padding='SAME')
+        except IndexError:
+            pass
+
+    net_shape = net.get_shape().as_list()
+    net = tf.nn.avg_pool(net,
+                         ksize=[1, net_shape[1], net_shape[2], 1],
+                         strides=[1, 1, 1, 1], padding='VALID')
+
+    net_shape = net.get_shape().as_list()
+    net = tf.reshape(net, [-1, net_shape[1] * net_shape[2] * net_shape[3]])
+
+    return skflow.models.logistic_regression(net, y)
+
+
+# Download and load MNIST data.
+mnist = input_data.read_data_sets('MNIST_data')
+
+# Restore model if graph is saved into a folder.
+if os.path.exists("models/resnet/graph.pbtxt"):
+    classifier = skflow.TensorFlowEstimator.restore("models/resnet/")
+else:
+    # Create a new resnet classifier.
+    classifier = skflow.TensorFlowEstimator(
+        model_fn=res_net, n_classes=10, batch_size=100, steps=100,
+        learning_rate=0.001, continue_training=True)
+
+while True:
+    # Train model and save summaries into logdir.
+    classifier.fit(mnist.train.images, mnist.train.labels, logdir="models/resnet/")
+
+    # Calculate accuracy.
+    score = metrics.accuracy_score(
+        mnist.test.labels, classifier.predict(mnist.test.images, batch_size=64))
+    print('Accuracy: {0:f}'.format(score))
+
+    # Save model graph and checkpoints.
+    classifier.save("models/resnet/")
diff --git a/tensorflow/examples/skflow/text_classification.py b/tensorflow/examples/skflow/text_classification.py
new file mode 100644
index 0000000000..9b553d850d
--- /dev/null
+++ b/tensorflow/examples/skflow/text_classification.py
@@ -0,0 +1,86 @@
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+import numpy as np
+from sklearn import metrics
+import pandas
+
+import tensorflow as tf
+from tensorflow.models.rnn import rnn, rnn_cell
+from tensorflow.contrib import skflow
+
+### Training data
+
+# Download dbpedia_csv.tar.gz from
+# https://drive.google.com/folderview?id=0Bz8a_Dbh9Qhbfll6bVpmNUtUcFdjYmF2SEpmZUZUcVNiMUw1TWN6RDV3a0JHT3kxLVhVR2M
+# Unpack: tar -xvf dbpedia_csv.tar.gz
+
+train = pandas.read_csv('dbpedia_csv/train.csv', header=None)
+X_train, y_train = train[2], train[0]
+test = pandas.read_csv('dbpedia_csv/test.csv', header=None)
+X_test, y_test = test[2], test[0]
+
+### Process vocabulary
+
+MAX_DOCUMENT_LENGTH = 10
+
+vocab_processor = skflow.preprocessing.VocabularyProcessor(MAX_DOCUMENT_LENGTH)
+X_train = np.array(list(vocab_processor.fit_transform(X_train)))
+X_test = np.array(list(vocab_processor.transform(X_test)))
+
+n_words = len(vocab_processor.vocabulary_)
+print('Total words: %d' % n_words)
+
+### Models
+
+EMBEDDING_SIZE = 50
+
+def average_model(X, y):
+    word_vectors = skflow.ops.categorical_variable(X, n_classes=n_words,
+        embedding_size=EMBEDDING_SIZE, name='words')
+    features = tf.reduce_max(word_vectors, reduction_indices=1)
+    return skflow.models.logistic_regression(features, y)
+
+def rnn_model(X, y):
+    """Recurrent neural network model to predict from sequence of words
+    to a class."""
+    # Convert indexes of words into embeddings.
+    # This creates embeddings matrix of [n_words, EMBEDDING_SIZE] and then
+    # maps word indexes of the sequence into [batch_size, sequence_length,
+    # EMBEDDING_SIZE].
+    word_vectors = skflow.ops.categorical_variable(X, n_classes=n_words,
+        embedding_size=EMBEDDING_SIZE, name='words')
+    # Split into list of embedding per word, while removing doc length dim.
+    # word_list results to be a list of tensors [batch_size, EMBEDDING_SIZE].
+    word_list = skflow.ops.split_squeeze(1, MAX_DOCUMENT_LENGTH, word_vectors)
+    # Create a Gated Recurrent Unit cell with hidden size of EMBEDDING_SIZE.
+    cell = rnn_cell.GRUCell(EMBEDDING_SIZE)
+    # Create an unrolled Recurrent Neural Networks to length of
+    # MAX_DOCUMENT_LENGTH and passes word_list as inputs for each unit.
+    _, encoding = rnn.rnn(cell, word_list, dtype=tf.float32)
+    # Given encoding of RNN, take encoding of last step (e.g hidden size of the
+    # neural network of last step) and pass it as features for logistic
+    # regression over output classes.
+    return skflow.models.logistic_regression(encoding, y)
+
+classifier = skflow.TensorFlowEstimator(model_fn=rnn_model, n_classes=15,
+    steps=1000, optimizer='Adam', learning_rate=0.01, continue_training=True)
+
+# Continously train for 1000 steps & predict on test set.
+while True:
+    classifier.fit(X_train, y_train, logdir='/tmp/tf_examples/word_rnn')
+    score = metrics.accuracy_score(y_test, classifier.predict(X_test))
+    print('Accuracy: {0:f}'.format(score))
diff --git a/tensorflow/examples/skflow/text_classification_builtin_rnn_model.py b/tensorflow/examples/skflow/text_classification_builtin_rnn_model.py
new file mode 100644
index 0000000000..239aa48d9c
--- /dev/null
+++ b/tensorflow/examples/skflow/text_classification_builtin_rnn_model.py
@@ -0,0 +1,73 @@
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+import numpy as np
+from sklearn import metrics
+import pandas
+
+import tensorflow as tf
+from tensorflow.contrib import skflow
+
+### Training data
+
+# Download dbpedia_csv.tar.gz from
+# https://drive.google.com/folderview?id=0Bz8a_Dbh9Qhbfll6bVpmNUtUcFdjYmF2SEpmZUZUcVNiMUw1TWN6RDV3a0JHT3kxLVhVR2M
+# Unpack: tar -xvf dbpedia_csv.tar.gz
+
+train = pandas.read_csv('dbpedia_csv/train.csv', header=None)
+X_train, y_train = train[2], train[0]
+test = pandas.read_csv('dbpedia_csv/test.csv', header=None)
+X_test, y_test = test[2], test[0]
+
+### Process vocabulary
+
+MAX_DOCUMENT_LENGTH = 10
+
+vocab_processor = skflow.preprocessing.VocabularyProcessor(MAX_DOCUMENT_LENGTH)
+X_train = np.array(list(vocab_processor.fit_transform(X_train)))
+X_test = np.array(list(vocab_processor.transform(X_test)))
+
+n_words = len(vocab_processor.vocabulary_)
+print('Total words: %d' % n_words)
+
+### Models
+
+EMBEDDING_SIZE = 50
+
+# Customized function to transform batched X into embeddings
+def input_op_fn(X):
+    # Convert indexes of words into embeddings.
+    # This creates embeddings matrix of [n_words, EMBEDDING_SIZE] and then
+    # maps word indexes of the sequence into [batch_size, sequence_length,
+    # EMBEDDING_SIZE].
+    word_vectors = skflow.ops.categorical_variable(X, n_classes=n_words,
+        embedding_size=EMBEDDING_SIZE, name='words')
+    # Split into list of embedding per word, while removing doc length dim.
+    # word_list results to be a list of tensors [batch_size, EMBEDDING_SIZE].
+    word_list = skflow.ops.split_squeeze(1, MAX_DOCUMENT_LENGTH, word_vectors)
+    return word_list
+
+# Single direction GRU with a single layer
+classifier = skflow.TensorFlowRNNClassifier(rnn_size=EMBEDDING_SIZE, 
+    n_classes=15, cell_type='gru', input_op_fn=input_op_fn,
+    num_layers=1, bidirectional=False, sequence_length=None,
+    steps=1000, optimizer='Adam', learning_rate=0.01, continue_training=True)
+
+# Continously train for 1000 steps & predict on test set.
+while True:
+    classifier.fit(X_train, y_train, logdir='/tmp/tf_examples/word_rnn')
+    score = metrics.accuracy_score(y_test, classifier.predict(X_test))
+    print('Accuracy: {0:f}'.format(score))
diff --git a/tensorflow/examples/skflow/text_classification_character_cnn.py b/tensorflow/examples/skflow/text_classification_character_cnn.py
new file mode 100644
index 0000000000..caa2c88aa2
--- /dev/null
+++ b/tensorflow/examples/skflow/text_classification_character_cnn.py
@@ -0,0 +1,92 @@
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+"""
+This is an example of using convolutional networks over characters
+for DBpedia dataset to predict class from description of an entity.
+
+This model is similar to one described in this paper:
+   "Character-level Convolutional Networks for Text Classification"
+   http://arxiv.org/abs/1509.01626
+
+and is somewhat alternative to the Lua code from here:
+   https://github.com/zhangxiangxiao/Crepe
+"""
+from __future__ import division, print_function, absolute_import
+
+import numpy as np
+from sklearn import metrics
+import pandas
+
+import tensorflow as tf
+from tensorflow.contrib import skflow
+
+### Training data
+
+# Download dbpedia_csv.tar.gz from
+# https://drive.google.com/folderview?id=0Bz8a_Dbh9Qhbfll6bVpmNUtUcFdjYmF2SEpmZUZUcVNiMUw1TWN6RDV3a0JHT3kxLVhVR2M
+# Unpack: tar -xvf dbpedia_csv.tar.gz
+
+train = pandas.read_csv('dbpedia_csv/train.csv', header=None)
+X_train, y_train = train[2], train[0]
+test = pandas.read_csv('dbpedia_csv/test.csv', header=None)
+X_test, y_test = test[2], test[0]
+
+### Process vocabulary
+
+MAX_DOCUMENT_LENGTH = 100
+
+char_processor = skflow.preprocessing.ByteProcessor(MAX_DOCUMENT_LENGTH)
+X_train = np.array(list(char_processor.fit_transform(X_train)))
+X_test = np.array(list(char_processor.transform(X_test)))
+
+### Models
+
+N_FILTERS = 10
+FILTER_SHAPE1 = [20, 256]
+FILTER_SHAPE2 = [20, N_FILTERS]
+POOLING_WINDOW = 4
+POOLING_STRIDE = 2
+
+def char_cnn_model(X, y):
+    """Character level convolutional neural network model to predict classes."""
+    byte_list = tf.reshape(skflow.ops.one_hot_matrix(X, 256), 
+        [-1, MAX_DOCUMENT_LENGTH, 256, 1])
+    with tf.variable_scope('CNN_Layer1'):
+        # Apply Convolution filtering on input sequence.
+        conv1 = skflow.ops.conv2d(byte_list, N_FILTERS, FILTER_SHAPE1, padding='VALID')
+        # Add a RELU for non linearity.
+        conv1 = tf.nn.relu(conv1)
+        # Max pooling across output of Convlution+Relu.
+        pool1 = tf.nn.max_pool(conv1, ksize=[1, POOLING_WINDOW, 1, 1], 
+            strides=[1, POOLING_STRIDE, 1, 1], padding='SAME')
+        # Transpose matrix so that n_filters from convolution becomes width.
+        pool1 = tf.transpose(pool1, [0, 1, 3, 2])
+    with tf.variable_scope('CNN_Layer2'):
+        # Second level of convolution filtering.
+        conv2 = skflow.ops.conv2d(pool1, N_FILTERS, FILTER_SHAPE2,
+            padding='VALID')
+        # Max across each filter to get useful features for classification.
+        pool2 = tf.squeeze(tf.reduce_max(conv2, 1), squeeze_dims=[1])
+    # Apply regular WX + B and classification.
+    return skflow.models.logistic_regression(pool2, y)
+
+classifier = skflow.TensorFlowEstimator(model_fn=char_cnn_model, n_classes=15,
+    steps=100, optimizer='Adam', learning_rate=0.01, continue_training=True)
+
+# Continuously train for 1000 steps & predict on test set.
+while True:
+    classifier.fit(X_train, y_train)
+    score = metrics.accuracy_score(y_test, classifier.predict(X_test))
+    print("Accuracy: %f" % score)
diff --git a/tensorflow/examples/skflow/text_classification_character_rnn.py b/tensorflow/examples/skflow/text_classification_character_rnn.py
new file mode 100644
index 0000000000..6aef12046a
--- /dev/null
+++ b/tensorflow/examples/skflow/text_classification_character_rnn.py
@@ -0,0 +1,73 @@
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+"""
+This is an example of using recurrent neural networks over characters
+for DBpedia dataset to predict class from description of an entity.
+
+This model is similar to one described in this paper:
+   "Character-level Convolutional Networks for Text Classification"
+   http://arxiv.org/abs/1509.01626
+
+and is somewhat alternative to the Lua code from here:
+   https://github.com/zhangxiangxiao/Crepe
+"""
+from __future__ import division, print_function, absolute_import
+
+import numpy as np
+from sklearn import metrics
+import pandas
+
+import tensorflow as tf
+from tensorflow.models.rnn import rnn, rnn_cell
+from tensorflow.contrib import skflow
+
+### Training data
+
+# Download dbpedia_csv.tar.gz from
+# https://drive.google.com/folderview?id=0Bz8a_Dbh9Qhbfll6bVpmNUtUcFdjYmF2SEpmZUZUcVNiMUw1TWN6RDV3a0JHT3kxLVhVR2M
+# Unpack: tar -xvf dbpedia_csv.tar.gz
+
+train = pandas.read_csv('dbpedia_csv/train.csv', header=None)
+X_train, y_train = train[2], train[0]
+test = pandas.read_csv('dbpedia_csv/test.csv', header=None)
+X_test, y_test = test[2], test[0]
+
+### Process vocabulary
+
+MAX_DOCUMENT_LENGTH = 100
+
+char_processor = skflow.preprocessing.ByteProcessor(MAX_DOCUMENT_LENGTH)
+X_train = np.array(list(char_processor.fit_transform(X_train)))
+X_test = np.array(list(char_processor.transform(X_test)))
+
+### Models
+
+HIDDEN_SIZE = 20
+
+def char_rnn_model(X, y):
+    byte_list = skflow.ops.one_hot_matrix(X, 256)
+    byte_list = skflow.ops.split_squeeze(1, MAX_DOCUMENT_LENGTH, byte_list)
+    cell = rnn_cell.GRUCell(HIDDEN_SIZE)
+    _, encoding = rnn.rnn(cell, byte_list, dtype=tf.float32)
+    return skflow.models.logistic_regression(encoding, y)
+
+classifier = skflow.TensorFlowEstimator(model_fn=char_rnn_model, n_classes=15,
+    steps=100, optimizer='Adam', learning_rate=0.01, continue_training=True)
+
+# Continuesly train for 1000 steps & predict on test set.
+while True:
+    classifier.fit(X_train, y_train)
+    score = metrics.accuracy_score(y_test, classifier.predict(X_test))
+    print("Accuracy: %f" % score)
diff --git a/tensorflow/examples/skflow/text_classification_cnn.py b/tensorflow/examples/skflow/text_classification_cnn.py
new file mode 100644
index 0000000000..edf74f1560
--- /dev/null
+++ b/tensorflow/examples/skflow/text_classification_cnn.py
@@ -0,0 +1,92 @@
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+from __future__ import division, print_function, absolute_import
+
+import numpy as np
+from sklearn import metrics
+import pandas
+
+import tensorflow as tf
+from tensorflow.contrib import skflow
+
+### Training data
+
+# Download dbpedia_csv.tar.gz from
+# https://drive.google.com/folderview?id=0Bz8a_Dbh9Qhbfll6bVpmNUtUcFdjYmF2SEpmZUZUcVNiMUw1TWN6RDV3a0JHT3kxLVhVR2M
+# Unpack: tar -xvf dbpedia_csv.tar.gz
+
+train = pandas.read_csv('dbpedia_csv/train.csv', header=None)
+X_train, y_train = train[2], train[0]
+test = pandas.read_csv('dbpedia_csv/test.csv', header=None)
+X_test, y_test = test[2], test[0]
+
+### Process vocabulary
+
+MAX_DOCUMENT_LENGTH = 100
+
+vocab_processor = skflow.preprocessing.VocabularyProcessor(MAX_DOCUMENT_LENGTH)
+X_train = np.array(list(vocab_processor.fit_transform(X_train)))
+X_test = np.array(list(vocab_processor.transform(X_test)))
+
+n_words = len(vocab_processor.vocabulary_)
+print('Total words: %d' % n_words)
+
+### Models
+
+EMBEDDING_SIZE = 20
+N_FILTERS = 10
+WINDOW_SIZE = 20
+FILTER_SHAPE1 = [WINDOW_SIZE, EMBEDDING_SIZE]
+FILTER_SHAPE2 = [WINDOW_SIZE, N_FILTERS]
+POOLING_WINDOW = 4
+POOLING_STRIDE = 2
+
+def cnn_model(X, y):
+    """2 layer Convolutional network to predict from sequence of words
+    to a class."""
+    # Convert indexes of words into embeddings.
+    # This creates embeddings matrix of [n_words, EMBEDDING_SIZE] and then
+    # maps word indexes of the sequence into [batch_size, sequence_length,
+    # EMBEDDING_SIZE].
+    word_vectors = skflow.ops.categorical_variable(X, n_classes=n_words,
+        embedding_size=EMBEDDING_SIZE, name='words')
+    word_vectors = tf.expand_dims(word_vectors, 3)
+    with tf.variable_scope('CNN_Layer1'):
+        # Apply Convolution filtering on input sequence.
+        conv1 = skflow.ops.conv2d(word_vectors, N_FILTERS, FILTER_SHAPE1, padding='VALID')
+        # Add a RELU for non linearity.
+        conv1 = tf.nn.relu(conv1)
+        # Max pooling across output of Convlution+Relu.
+        pool1 = tf.nn.max_pool(conv1, ksize=[1, POOLING_WINDOW, 1, 1], 
+            strides=[1, POOLING_STRIDE, 1, 1], padding='SAME')
+        # Transpose matrix so that n_filters from convolution becomes width.
+        pool1 = tf.transpose(pool1, [0, 1, 3, 2])
+    with tf.variable_scope('CNN_Layer2'):
+        # Second level of convolution filtering.
+        conv2 = skflow.ops.conv2d(pool1, N_FILTERS, FILTER_SHAPE2,
+            padding='VALID')
+        # Max across each filter to get useful features for classification.
+        pool2 = tf.squeeze(tf.reduce_max(conv2, 1), squeeze_dims=[1])
+    # Apply regular WX + B and classification.
+    return skflow.models.logistic_regression(pool2, y)
+
+
+classifier = skflow.TensorFlowEstimator(model_fn=cnn_model, n_classes=15,
+    steps=100, optimizer='Adam', learning_rate=0.01, continue_training=True)
+
+# Continuesly train for 1000 steps & predict on test set.
+while True:
+    classifier.fit(X_train, y_train, logdir='/tmp/tf_examples/word_cnn')
+    score = metrics.accuracy_score(y_test, classifier.predict(X_test))
+    print('Accuracy: {0:f}'.format(score))
diff --git a/tensorflow/examples/skflow/text_classification_save_restore.py b/tensorflow/examples/skflow/text_classification_save_restore.py
new file mode 100644
index 0000000000..acbbc0c682
--- /dev/null
+++ b/tensorflow/examples/skflow/text_classification_save_restore.py
@@ -0,0 +1,96 @@
+#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import division, print_function, absolute_import
+
+import os
+import numpy as np
+from sklearn import metrics
+import pandas
+
+import tensorflow as tf
+from tensorflow.models.rnn import rnn, rnn_cell
+from tensorflow.contrib import skflow
+
+### Training data
+
+# Download dbpedia_csv.tar.gz from
+# https://drive.google.com/folderview?id=0Bz8a_Dbh9Qhbfll6bVpmNUtUcFdjYmF2SEpmZUZUcVNiMUw1TWN6RDV3a0JHT3kxLVhVR2M
+# Unpack: tar -xvf dbpedia_csv.tar.gz
+
+train = pandas.read_csv('dbpedia_csv/train.csv', header=None)
+X_train, y_train = train[2], train[0]
+test = pandas.read_csv('dbpedia_csv/test.csv', header=None)
+X_test, y_test = test[2], test[0]
+
+### Process vocabulary
+
+MAX_DOCUMENT_LENGTH = 10
+
+vocab_processor = skflow.preprocessing.VocabularyProcessor(MAX_DOCUMENT_LENGTH)
+X_train = np.array(list(vocab_processor.fit_transform(X_train)))
+X_test = np.array(list(vocab_processor.transform(X_test)))
+
+n_words = len(vocab_processor.vocabulary_)
+print('Total words: %d' % n_words)
+
+### Models
+
+EMBEDDING_SIZE = 50
+
+def average_model(X, y):
+    word_vectors = skflow.ops.categorical_variable(X, n_classes=n_words,
+        embedding_size=EMBEDDING_SIZE, name='words')
+    features = tf.reduce_max(word_vectors, reduction_indices=1)
+    return skflow.models.logistic_regression(features, y)
+
+def rnn_model(X, y):
+    """Recurrent neural network model to predict from sequence of words
+    to a class."""
+    # Convert indexes of words into embeddings.
+    # This creates embeddings matrix of [n_words, EMBEDDING_SIZE] and then
+    # maps word indexes of the sequence into [batch_size, sequence_length,
+    # EMBEDDING_SIZE].
+    word_vectors = skflow.ops.categorical_variable(X, n_classes=n_words,
+        embedding_size=EMBEDDING_SIZE, name='words')
+    # Split into list of embedding per word, while removing doc length dim.
+    # word_list results to be a list of tensors [batch_size, EMBEDDING_SIZE].
+    word_list = skflow.ops.split_squeeze(1, MAX_DOCUMENT_LENGTH, word_vectors)
+    # Create a Gated Recurrent Unit cell with hidden size of EMBEDDING_SIZE.
+    cell = rnn_cell.GRUCell(EMBEDDING_SIZE)
+    # Create an unrolled Recurrent Neural Networks to length of
+    # MAX_DOCUMENT_LENGTH and passes word_list as inputs for each unit.
+    _, encoding = rnn.rnn(cell, word_list, dtype=tf.float32)
+    # Given encoding of RNN, take encoding of last step (e.g hidden size of the
+    # neural network of last step) and pass it as features for logistic
+    # regression over output classes.
+    return skflow.models.logistic_regression(encoding, y)
+
+model_path = '/tmp/skflow_examples/text_classification'
+if os.path.exists(model_path):
+    classifier = skflow.TensorFlowEstimator.restore(model_path)
+else:
+    classifier = skflow.TensorFlowEstimator(model_fn=rnn_model, n_classes=15,
+        steps=100, optimizer='Adam', learning_rate=0.01, continue_training=True)
+
+    # Continuesly train for 1000 steps
+    while True:
+        try:
+            classifier.fit(X_train, y_train)
+        except KeyboardInterrupt:
+            classifier.save(model_path)
+            break
+# Predict on test set
+score = metrics.accuracy_score(y_test, classifier.predict(X_test))
+print('Accuracy: {0:f}'.format(score))
diff --git a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py
index e81df38343..6cbe6a0f7e 100644
--- a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py
+++ b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py
@@ -46,12 +46,12 @@ def maybe_download(filename, expected_bytes):
 filename = maybe_download('text8.zip', 31344016)
 
 
-# Read the data into a string.
+# Read the data into a list of strings.
 def read_data(filename):
-  f = zipfile.ZipFile(filename)
-  for name in f.namelist():
-    return f.read(name).split()
-  f.close()
+  """Extract the first file enclosed in a zip file as a list of words"""
+  with zipfile.ZipFile(filename) as f:
+    data = f.read(f.namelist()[0]).split()
+  return data
 
 words = read_data(filename)
 print('Data size', len(words))
diff --git a/tensorflow/examples/udacity/5_word2vec.ipynb b/tensorflow/examples/udacity/5_word2vec.ipynb
index c266488bde..94ba37ee13 100644
--- a/tensorflow/examples/udacity/5_word2vec.ipynb
+++ b/tensorflow/examples/udacity/5_word2vec.ipynb
@@ -24,7 +24,7 @@
         "Assignment 5\n",
         "------------\n",
         "\n",
-        "The goal of this assignment is to train a skip-gram model over [Text8](http://mattmahoney.net/dc/textdata) data."
+        "The goal of this assignment is to train a Word2Vec skip-gram model over [Text8](http://mattmahoney.net/dc/textdata) data."
       ]
     },
     {
@@ -180,10 +180,10 @@
       },
       "source": [
         "def read_data(filename):\n",
-        "  f = zipfile.ZipFile(filename)\n",
-        "  for name in f.namelist():\n",
-        "    return tf.compat.as_str(f.read(name)).split()\n",
-        "  f.close()\n",
+        "  \"\"\"Extract the first file enclosed in a zip file as a list of words\"\"\"\n",
+        "  with zipfile.ZipFile(filename) as f:\n",
+        "    data = tf.compat.as_str(f.read(f.namelist()[0])).split()\n",
+        "  return data\n",
         "  \n",
         "words = read_data(filename)\n",
         "print('Data size %d' % len(words))"
@@ -881,7 +881,7 @@
         "Problem\n",
         "-------\n",
         "\n",
-        "An alternative to Word2Vec is called [CBOW](http://arxiv.org/abs/1301.3781) (Continuous Bag of Words). In the CBOW model, instead of predicting a context word from a word vector, you predict a word from the sum of all the word vectors in its context. Implement and evaluate a CBOW model trained on the text8 dataset.\n",
+        "An alternative to skip-gram is another Word2Vec model called [CBOW](http://arxiv.org/abs/1301.3781) (Continuous Bag of Words). In the CBOW model, instead of predicting a context word from a word vector, you predict a word from the sum of all the word vectors in its context. Implement and evaluate a CBOW model trained on the text8 dataset.\n",
         "\n",
         "---"
       ]
diff --git a/tensorflow/examples/udacity/README.md b/tensorflow/examples/udacity/README.md
index af26e2ee38..9200bcc79b 100644
--- a/tensorflow/examples/udacity/README.md
+++ b/tensorflow/examples/udacity/README.md
@@ -34,6 +34,23 @@ has two good suggestions; we recommend using 8G.
 In addition, you may need to pass `--memory=8g` as an extra argument to
 `docker run`.
 
+* **I want to create a new virtual machine instead of the default one.**
+
+`docker-machine` is a tool to provision and manage docker hosts, it supports multiple platform (ex. aws, gce, azure, virtualbox, ...). To create a new virtual machine locally with built-in docker engine, you can use
+
+    docker-machine create -d virtualbox --virtualbox-memory 8196 tensorflow
+    
+`-d` means the driver for the cloud platform, supported drivers listed [here](https://docs.docker.com/machine/drivers/). Here we use virtualbox to create a new virtual machine locally. `tensorflow` means the name of the virtual machine, feel free to use whatever you like. You can use
+
+    docker-machine ip tensorflow
+    
+to get the ip of the new virtual machine. To switch from default virtual machine to a new one (here we use tensorflow), type
+
+    eval $(docker-machine env tensorflow)
+    
+Note that `docker-machine env tensorflow` outputs some environment variables such like `DOCKER_HOST`. Then your docker client is now connected to the docker host in virtual machine `tensorflow`
+
+
 Notes for anyone needing to build their own containers (mostly instructors)
 ===========================================================================
 
diff --git a/tensorflow/g3doc/api_docs/python/train.md b/tensorflow/g3doc/api_docs/python/train.md
index 429a102d4a..b232a28410 100644
--- a/tensorflow/g3doc/api_docs/python/train.md
+++ b/tensorflow/g3doc/api_docs/python/train.md
@@ -1772,7 +1772,7 @@ This method wraps the provided summary in an `Event` protocol buffer
 and adds it to the event file.
 
 You can pass the result of evaluating any summary op, using
-[`Session.run()`](client.md#Session.run] or
+[`Session.run()`](client.md#Session.run) or
 [`Tensor.eval()`](framework.md#Tensor.eval), to this
 function. Alternatively, you can pass a `tf.Summary` protocol
 buffer that you populate with your own data. The latter is
diff --git a/tensorflow/g3doc/get_started/basic_usage.md b/tensorflow/g3doc/get_started/basic_usage.md
index f24f8867b2..3e240bb908 100644
--- a/tensorflow/g3doc/get_started/basic_usage.md
+++ b/tensorflow/g3doc/get_started/basic_usage.md
@@ -96,7 +96,7 @@ sess = tf.Session()
 # All inputs needed by the op are run automatically by the session.  They
 # typically are run in parallel.
 #
-# The call 'run(product)' thus causes the execution of threes ops in the
+# The call 'run(product)' thus causes the execution of three ops in the
 # graph: the two constants and matmul.
 #
 # The output of the op is returned in 'result' as a numpy `ndarray` object.
diff --git a/tensorflow/g3doc/how_tos/adding_an_op/index.md b/tensorflow/g3doc/how_tos/adding_an_op/index.md
index 492cb2d104..a1e764baea 100644
--- a/tensorflow/g3doc/how_tos/adding_an_op/index.md
+++ b/tensorflow/g3doc/how_tos/adding_an_op/index.md
@@ -136,6 +136,12 @@ TF_INC=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())')
 g++ -std=c++11 -shared zero_out.cc -o zero_out.so -fPIC -I $TF_INC
 ```
 
+> Note on gcc version 5: gcc5 uses the new C++
+[ABI](https://gcc.gnu.org/gcc-5/changes.html#libstdcxx). The binary pip packages
+available on the TensorFlow website are built with gcc4 that uses the older ABI.
+If you compile your op library with gcc5, add `-D_GLIBCXX_USE_CXX11_ABI=0` to
+the command line to make the library compatible with the older abi.
+
 ### With TensorFlow source installation
 
 If you have TensorFlow sources installed, you can make use of TensorFlow's build
diff --git a/tensorflow/g3doc/how_tos/documentation/index.md b/tensorflow/g3doc/how_tos/documentation/index.md
index e295cb8bac..c38f4fed44 100755
--- a/tensorflow/g3doc/how_tos/documentation/index.md
+++ b/tensorflow/g3doc/how_tos/documentation/index.md
@@ -147,7 +147,7 @@ To link to source code, use a link starting with:
 the file name starting at the github root. For instance, a link to this file
 should be written as
 `https://www.tensorflow.org/code/tensorflow/g3doc/how_tos/documentation/index.md`.
-This ensures that [tensorflow.org](tensorflow.org) can forward the link to the
+This ensures that [tensorflow.org](https://www.tensorflow.org/) can forward the link to the
 branch of the code corresponding to the version of the documentation you're
 viewing. Do not include url parameters in the URL.
 
diff --git a/tensorflow/g3doc/tutorials/index.md b/tensorflow/g3doc/tutorials/index.md
index 9773fee3be..d5c2a2e472 100644
--- a/tensorflow/g3doc/tutorials/index.md
+++ b/tensorflow/g3doc/tutorials/index.md
@@ -114,6 +114,6 @@ Building on the Inception recognition model, we will release a TensorFlow
 version of the [Deep Dream](https://github.com/google/deepdream) neural network
 visual hallucination software.
 
-COMING SOON
+[View Tutorial](https://www.tensorflow.org/code/tensorflow/examples/tutorials/deepdream/deepdream.ipynb)
 
 
diff --git a/tensorflow/g3doc/tutorials/word2vec/index.md b/tensorflow/g3doc/tutorials/word2vec/index.md
index 32323c5774..48fb18641f 100644
--- a/tensorflow/g3doc/tutorials/word2vec/index.md
+++ b/tensorflow/g3doc/tutorials/word2vec/index.md
@@ -255,7 +255,7 @@ embeddings = tf.Variable(
     tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0))
 ```
 
-The noise-contrastive estimation loss is defined in terms a logistic regression
+The noise-contrastive estimation loss is defined in terms of a logistic regression
 model. For this, we need to define the weights and biases for each word in the
 vocabulary (also called the `output weights` as opposed to the `input
 embeddings`). So let's define that.
diff --git a/tensorflow/models/rnn/translate/translate.py b/tensorflow/models/rnn/translate/translate.py
index 793de26647..f6b07230b4 100644
--- a/tensorflow/models/rnn/translate/translate.py
+++ b/tensorflow/models/rnn/translate/translate.py
@@ -193,6 +193,9 @@ def train():
         step_time, loss = 0.0, 0.0
         # Run evals on development set and print their perplexity.
         for bucket_id in xrange(len(_buckets)):
+          if len(dev_set[bucket_id]) == 0:
+            print("  eval: empty bucket %d" % (bucket_id))
+            continue
           encoder_inputs, decoder_inputs, target_weights = model.get_batch(
               dev_set, bucket_id)
           _, eval_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 4375b82aba..329bebdabc 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -1166,7 +1166,7 @@ cuda_py_tests(
     name = "kernel_tests_with_sharding",
     size = "small",
     srcs = sharded_kernel_test_list,
-    shard_count = 2,
+    shard_count = 4,
 )
 
 cuda_py_tests(
diff --git a/tensorflow/python/client/tf_session.i b/tensorflow/python/client/tf_session.i
index 51078696e2..0b10892d17 100644
--- a/tensorflow/python/client/tf_session.i
+++ b/tensorflow/python/client/tf_session.i
@@ -273,7 +273,8 @@ tensorflow::ImportNumpy();
   def TF_NewSessionOptions(target=None, config=None):
     opts = _TF_NewSessionOptions()
     if target is not None:
-      _TF_SetTarget(opts, target)
+      from tensorflow.python.util import compat
+      _TF_SetTarget(opts, compat.as_bytes(target))
     if config is not None:
       from tensorflow.core.protobuf import config_pb2
       if not isinstance(config, config_pb2.ConfigProto):
diff --git a/tensorflow/python/kernel_tests/bias_op_test.py b/tensorflow/python/kernel_tests/bias_op_test.py
index 479682dca3..c3a923cf27 100644
--- a/tensorflow/python/kernel_tests/bias_op_test.py
+++ b/tensorflow/python/kernel_tests/bias_op_test.py
@@ -63,14 +63,14 @@ class BiasAddTest(tf.test.TestCase):
                             (1,) * (3 - np_value.ndim) + np_value.shape)
     # move the last dimension to third-to-last
     np_dim = list(range(np_value.ndim))
-    np_dim_new = np_dim[0:-3] + np_dim[-1:] + np_dim[-3:-1]
+    np_dim_new = list(np_dim[0:-3]) + list(np_dim[-1:]) + list(np_dim[-3:-1])
     return np.transpose(np_value, np_dim_new)
 
   def _NCHWToNHWC(self, np_value):
-    assert np_value.shape >= 3
+    assert len(np_value.shape) >= 3
     np_dim = list(range(np_value.ndim))
     # move the third-to-last dimension to the last
-    np_dim_new = np_dim[0:-3] + np_dim[-2:] + np_dim[-3:-2]
+    np_dim_new = list(np_dim[0:-3]) + list(np_dim[-2:]) + list(np_dim[-3:-2])
     return np.transpose(np_value, np_dim_new)
 
   def _testBiasNCHW(self, np_inputs, np_bias, use_gpu):
diff --git a/tensorflow/python/kernel_tests/diag_op_test.py b/tensorflow/python/kernel_tests/diag_op_test.py
index 73cad8d34f..ef74f1273c 100644
--- a/tensorflow/python/kernel_tests/diag_op_test.py
+++ b/tensorflow/python/kernel_tests/diag_op_test.py
@@ -21,7 +21,7 @@ import numpy
 import tensorflow as tf
 
 
-class GenerateIdentityTensorTest(tf.test.TestCase):
+class DiagTest(tf.test.TestCase):
 
   def diagOp(self, diag, dtype, expected_ans, use_gpu=False):
     with self.test_session(use_gpu=use_gpu):
@@ -96,10 +96,11 @@ class GenerateIdentityTensorTest(tf.test.TestCase):
     self.diagOp(x, numpy.float32, expected_ans)
     self.diagOp(x, numpy.float64, expected_ans)
 
+
 class DiagPartOpTest(tf.test.TestCase):
 
   def setUp(self):
-    x = numpy.random.seed(0)
+    numpy.random.seed(0)
 
   def diagPartOp(self, tensor, dtpe, expected_ans, use_gpu=False):
     with self.test_session(use_gpu=use_gpu):
@@ -122,7 +123,7 @@ class DiagPartOpTest(tf.test.TestCase):
     expected_ans = x[i, j, i, j]
     self.diagPartOp(x, numpy.float32, expected_ans)
     self.diagPartOp(x, numpy.float64, expected_ans)
-    
+
   def testRankSixFloatTensor(self):
     x = numpy.random.rand(2, 2, 2, 2, 2, 2)
     i = numpy.arange(2)[:, None, None]
@@ -141,7 +142,7 @@ class DiagPartOpTest(tf.test.TestCase):
     self.assertRaises(ValueError, self.diagPartOp, x, numpy.float32, 0)
     self.assertRaises(ValueError, self.diagPartOp, y, numpy.float32, 0)
     self.assertRaises(ValueError, self.diagPartOp, z, numpy.float32, 0)
-    
+
   def testUnevenDimensions(self):
     w = numpy.random.rand(2, 5)
     x = numpy.random.rand(2, 1, 2, 3)
@@ -152,5 +153,41 @@ class DiagPartOpTest(tf.test.TestCase):
     self.assertRaises(ValueError, self.diagPartOp, y, numpy.float32, 0)
     self.assertRaises(ValueError, self.diagPartOp, z, numpy.float32, 0)
 
+
+class DiagGradOpTest(tf.test.TestCase):
+
+  def testDiagGrad(self):
+    numpy.random.seed(0)
+    shapes = ((3,), (3,3), (3,3,3))
+    dtypes = (tf.float32, tf.float64)
+    with self.test_session(use_gpu=False):
+      errors = []
+      for shape in shapes:
+        for dtype in dtypes:
+          x1 = tf.constant(numpy.random.rand(*shape), dtype=dtype)
+          y = tf.diag(x1)
+          error = tf.test.compute_gradient_error(x1, x1._shape_as_list(),
+                                                 y, y._shape_as_list())
+          tf.logging.info("error = %f", error)
+          self.assertLess(error, 1e-4)
+
+
+class DiagGradPartOpTest(tf.test.TestCase):
+
+  def testDiagPartGrad(self):
+    numpy.random.seed(0)
+    shapes = ((3,3), (3,3,3,3), (3,3,3,3,3,3))
+    dtypes = (tf.float32, tf.float64)
+    with self.test_session(use_gpu=False):
+      errors = []
+      for shape in shapes:
+        for dtype in dtypes:
+          x1 = tf.constant(numpy.random.rand(*shape), dtype=dtype)
+          y = tf.diag_part(x1)
+          error = tf.test.compute_gradient_error(x1, x1._shape_as_list(),
+                                                 y, y._shape_as_list())
+          tf.logging.info("error = %f", error)
+          self.assertLess(error, 1e-4)
+
 if __name__ == "__main__":
   tf.test.main()
diff --git a/tensorflow/python/kernel_tests/fft_ops_test.py b/tensorflow/python/kernel_tests/fft_ops_test.py
index 7eeaacdc7f..88ede80d6f 100644
--- a/tensorflow/python/kernel_tests/fft_ops_test.py
+++ b/tensorflow/python/kernel_tests/fft_ops_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import numpy as np
 import tensorflow as tf
+from six.moves import xrange  # pylint: disable=redefined-builtin
 
 
 VALID_FFT_RANKS = (1, 2, 3)
diff --git a/tensorflow/python/kernel_tests/rnn_cell_test.py b/tensorflow/python/kernel_tests/rnn_cell_test.py
index a6778506e6..5261af4aab 100644
--- a/tensorflow/python/kernel_tests/rnn_cell_test.py
+++ b/tensorflow/python/kernel_tests/rnn_cell_test.py
@@ -115,7 +115,8 @@ class RNNCellTest(tf.test.TestCase):
         x = tf.zeros([batch_size, input_size])
         m = tf.zeros([batch_size, state_size])
         output, state = tf.nn.rnn_cell.LSTMCell(
-            num_units=num_units, input_size=input_size, num_proj=num_proj)(x, m)
+            num_units=num_units, input_size=input_size, 
+            num_proj=num_proj, forget_bias=1.0)(x, m)
         sess.run([tf.initialize_all_variables()])
         res = sess.run([output, state],
                        {x.name: np.array([[1., 1.], [2., 2.], [3., 3.]]),
diff --git a/tensorflow/python/kernel_tests/softmax_op_test.py b/tensorflow/python/kernel_tests/softmax_op_test.py
index 8b5447d8d3..91c389a2a2 100644
--- a/tensorflow/python/kernel_tests/softmax_op_test.py
+++ b/tensorflow/python/kernel_tests/softmax_op_test.py
@@ -13,60 +13,102 @@
 # limitations under the License.
 # ==============================================================================
 
-"""Tests for SoftmaxOp."""
+"""Tests for SoftmaxOp and LogSoftmaxOp."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import sys
+
 import numpy as np
 import tensorflow as tf
 
 
 class SoftmaxTest(tf.test.TestCase):
 
-  def _npSoftmax(self, features):
+  def _npSoftmax(self, features, log=False):
     batch_dim = 0
     class_dim = 1
     batch_size = features.shape[batch_dim]
     e = np.exp(features -
                np.reshape(np.amax(features, axis=class_dim), [batch_size, 1]))
-    return e / np.reshape(np.sum(e, axis=class_dim), [batch_size, 1])
+    softmax = e / np.reshape(np.sum(e, axis=class_dim), [batch_size, 1])
+    if log:
+      return np.log(softmax)
+    else:
+      return softmax
 
-  def _testSoftmax(self, np_features, use_gpu=False):
-    np_softmax = self._npSoftmax(np_features)
+  def _testSoftmax(self, np_features, log=False, use_gpu=False):
+    np_softmax = self._npSoftmax(np_features, log=log)
     with self.test_session(use_gpu=use_gpu):
-      tf_softmax = tf.nn.softmax(np_features)
+      if log:
+        tf_softmax = tf.nn.log_softmax(np_features)
+      else:
+        tf_softmax = tf.nn.softmax(np_features)
       out = tf_softmax.eval()
     self.assertAllClose(np_softmax, out)
     self.assertShapeEqual(np_softmax, tf_softmax)
-    # Bonus check: the softmaxes should add to one in each
-    # batch element.
-    self.assertAllClose(np.ones(out.shape[0]),
-                        np.sum(out, axis=1))
+    if not log:
+      # Bonus check: the softmaxes should add to one in each
+      # batch element.
+      self.assertAllClose(np.ones(out.shape[0]),
+                          np.sum(out, axis=1))
 
   def _testAll(self, features):
     self._testSoftmax(features, use_gpu=False)
+    self._testSoftmax(features, log=True, use_gpu=False)
     self._testSoftmax(features, use_gpu=True)
+    self._testSoftmax(features, log=True, use_gpu=True)
+    self._testOverflow(use_gpu=True)
+
 
   def testNpSoftmax(self):
     features = [[1., 1., 1., 1.], [1., 2., 3., 4.]]
     # Batch 0: All exps are 1.  The expected result is
-    # [0.25, 0.25, 0.25, 0.25]
+    # Softmaxes = [0.25, 0.25, 0.25, 0.25]
+    # LogSoftmaxes = [-1.386294, -1.386294, -1.386294, -1.386294]
     #
     # Batch 1:
     # exps = [1., 2.718, 7.389, 20.085]
     # sum = 31.192
     # Softmaxes = exps / sum = [0.0320586, 0.08714432, 0.23688282, 0.64391426]
+    # LogSoftmaxes = [-3.44019 , -2.44019 , -1.44019 , -0.44019]
     np_sm = self._npSoftmax(np.array(features))
     self.assertAllClose(
         np.array([[0.25, 0.25, 0.25, 0.25],
                   [0.0320586, 0.08714432, 0.23688282, 0.64391426]]),
         np_sm,
         rtol=1.e-5, atol=1.e-5)
+    np_lsm = self._npSoftmax(np.array(features), log=True)
+    self.assertAllClose(
+        np.array([[-1.386294, -1.386294, -1.386294, -1.386294],
+                  [-3.4401897, -2.4401897, -1.4401897, -0.4401897]]),
+        np_lsm,
+        rtol=1.e-5, atol=1.e-5)
 
   def testShapeMismatch(self):
     with self.assertRaises(ValueError):
       tf.nn.softmax([0., 1., 2., 3.])
+    with self.assertRaises(ValueError):
+      tf.nn.log_softmax([0., 1., 2., 3.])
+
+  def _testOverflow(self, use_gpu=False):
+    if use_gpu:
+        type = np.float32
+    else:
+        type = np.float64
+    max = np.finfo(type).max
+    features = np.array(
+        [[1., 1., 1., 1.],
+         [max, 1., 2., 3.]]).astype(type)
+    with self.test_session(use_gpu=use_gpu):
+      tf_log_softmax = tf.nn.log_softmax(features)
+      out = tf_log_softmax.eval()
+    self.assertAllClose(
+        np.array([[-1.386294, -1.386294, -1.386294, -1.386294],
+                  [0, -max, -max, -max]]),
+        out,
+        rtol=1.e-5, atol=1.e-5)
 
   def testFloat(self):
     self._testAll(
@@ -76,6 +118,8 @@ class SoftmaxTest(tf.test.TestCase):
     self._testSoftmax(
         np.array([[1., 1., 1., 1.], [1., 2., 3., 4.]]).astype(np.float64),
         use_gpu=False)
+    self._testOverflow(use_gpu=False)
+
 
   def testEmpty(self):
     with self.test_session():
diff --git a/tensorflow/python/ops/array_grad.py b/tensorflow/python/ops/array_grad.py
index c7e0c514f9..d67c1dcb56 100644
--- a/tensorflow/python/ops/array_grad.py
+++ b/tensorflow/python/ops/array_grad.py
@@ -156,9 +156,14 @@ def _SplitGrad(op, *grads):
 
 ops.NoGradient("Const")
 
-# TODO(liqzhang): The gradient for Diag operator would be
-# the diagonal of the backprop. Implement if there is a need.
-ops.NoGradient("Diag")
+
+@ops.RegisterGradient("Diag")
+def _DiagGrad(_, grad):
+  return array_ops.diag_part(grad)
+
+@ops.RegisterGradient("DiagPart")
+def _DiagPartGrad(_, grad):
+  return array_ops.diag(grad)
 
 # Edit Distance has no gradient (but can be used to eval seq2seq or CTC).
 ops.NoGradient("EditDistance")
diff --git a/tensorflow/python/ops/image_grad_test.py b/tensorflow/python/ops/image_grad_test.py
index 8ff39bd71f..7f48b31eda 100644
--- a/tensorflow/python/ops/image_grad_test.py
+++ b/tensorflow/python/ops/image_grad_test.py
@@ -24,55 +24,92 @@ import tensorflow as tf
 
 class ResizeNearestNeighborOpTest(tf.test.TestCase):
 
+  TYPES = [np.float32, np.float64]
+
   def testShapeIsCorrectAfterOp(self):
     in_shape = [1, 2, 2, 1]
     out_shape = [1, 4, 6, 1]
 
-    x = np.arange(0, 4).reshape(in_shape).astype(np.float32)
+    for nptype in self.TYPES:
+      x = np.arange(0, 4).reshape(in_shape).astype(nptype)
 
-    with self.test_session() as sess:
-      input_tensor = tf.constant(x, shape=in_shape)
-      resize_out = tf.image.resize_nearest_neighbor(input_tensor,
-                                                    out_shape[1:3])
-      self.assertEqual(out_shape, list(resize_out.get_shape()))
+      for use_gpu in [False, True]:
+        with self.test_session(use_gpu=use_gpu) as sess:
+          input_tensor = tf.constant(x, shape=in_shape)
+          resize_out = tf.image.resize_nearest_neighbor(input_tensor,
+                                                      out_shape[1:3])
+          self.assertEqual(out_shape, list(resize_out.get_shape()))
 
-      resize_out = sess.run(resize_out)
-      self.assertEqual(out_shape, list(resize_out.shape))
+          resize_out = sess.run(resize_out)
+        self.assertEqual(out_shape, list(resize_out.shape))
 
   def testGradFromResizeToLargerInBothDims(self):
     in_shape = [1, 2, 3, 1]
     out_shape = [1, 4, 6, 1]
 
-    x = np.arange(0, 6).reshape(in_shape).astype(np.float32)
-
-    with self.test_session():
-      input_tensor = tf.constant(x, shape=in_shape)
-      resize_out = tf.image.resize_nearest_neighbor(input_tensor,
-                                                    out_shape[1:3])
-      err = tf.test.compute_gradient_error(input_tensor,
-                                           in_shape,
-                                           resize_out,
-                                           out_shape,
-                                           x_init_value=x)
-    self.assertLess(err, 1e-3)
+    for nptype in self.TYPES:
+      x = np.arange(0, 6).reshape(in_shape).astype(nptype)
+
+      for use_gpu in [False, True]:
+        with self.test_session(use_gpu=use_gpu):
+          input_tensor = tf.constant(x, shape=in_shape)
+          resize_out = tf.image.resize_nearest_neighbor(input_tensor,
+                                                      out_shape[1:3])
+          err = tf.test.compute_gradient_error(input_tensor,
+                                               in_shape,
+                                               resize_out,
+                                               out_shape,
+                                               x_init_value=x)
+        self.assertLess(err, 1e-3)
 
   def testGradFromResizeToSmallerInBothDims(self):
     in_shape = [1, 4, 6, 1]
     out_shape = [1, 2, 3, 1]
 
-    x = np.arange(0, 24).reshape(in_shape).astype(np.float32)
-
-    with self.test_session():
-      input_tensor = tf.constant(x, shape=in_shape)
-      resize_out = tf.image.resize_nearest_neighbor(input_tensor,
-                                                    out_shape[1:3])
-      err = tf.test.compute_gradient_error(input_tensor,
-                                           in_shape,
-                                           resize_out,
-                                           out_shape,
-                                           x_init_value=x)
-    self.assertLess(err, 1e-3)
-
+    for nptype in self.TYPES:
+      x = np.arange(0, 24).reshape(in_shape).astype(nptype)
+
+      for use_gpu in [False, True]:
+        with self.test_session(use_gpu=use_gpu):
+          input_tensor = tf.constant(x, shape=in_shape)
+          resize_out = tf.image.resize_nearest_neighbor(input_tensor,
+                                                      out_shape[1:3])
+          err = tf.test.compute_gradient_error(input_tensor,
+                                               in_shape,
+                                               resize_out,
+                                               out_shape,
+                                               x_init_value=x)
+        self.assertLess(err, 1e-3)
+
+  def testCompareGpuVsCpu(self):
+    in_shape = [1, 4, 6, 3]
+    out_shape = [1, 8, 16, 3]
+
+    for nptype in self.TYPES:
+      x = np.arange(0, np.prod(in_shape)).reshape(in_shape).astype(nptype)
+      for align_corners in [True, False]:
+        with self.test_session(use_gpu=False):
+          input_tensor = tf.constant(x, shape=in_shape)
+          resize_out = tf.image.resize_nearest_neighbor(input_tensor,
+                                                        out_shape[1:3],
+                                                        align_corners=align_corners)
+          grad_cpu = tf.test.compute_gradient(input_tensor,
+                                              in_shape,
+                                              resize_out,
+                                              out_shape,
+                                              x_init_value=x)
+
+        with self.test_session(use_gpu=True):
+          input_tensor = tf.constant(x, shape=in_shape)
+          resize_out = tf.image.resize_nearest_neighbor(input_tensor,
+                                                        out_shape[1:3],
+                                                        align_corners=align_corners)
+          grad_gpu = tf.test.compute_gradient(input_tensor,
+                                              in_shape,
+                                              resize_out,
+                                              out_shape,
+                                              x_init_value=x)
+        self.assertAllClose(grad_cpu, grad_gpu, rtol=1e-5, atol=1e-5)
 
 class ResizeBilinearOpTest(tf.test.TestCase):
 
diff --git a/tensorflow/python/ops/nn.py b/tensorflow/python/ops/nn.py
index 75de6b5d7d..113b1fde3c 100644
--- a/tensorflow/python/ops/nn.py
+++ b/tensorflow/python/ops/nn.py
@@ -152,6 +152,7 @@ TensorFlow provides several operations that help you perform classification.
 
 @@sigmoid_cross_entropy_with_logits
 @@softmax
+@@log_softmax
 @@softmax_cross_entropy_with_logits
 @@sparse_softmax_cross_entropy_with_logits
 
diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index 118240cfdb..e7140816bb 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -381,6 +381,10 @@ ops.RegisterShape("Softmax")(
     common_shapes.unchanged_shape_with_rank(2))
 
 
+ops.RegisterShape("LogSoftmax")(
+    common_shapes.unchanged_shape_with_rank(2))
+
+
 @ops.RegisterShape("InTopK")
 def _InTopKShape(op):
   """Shape function for InTopK op."""
diff --git a/tensorflow/python/ops/rnn_cell.py b/tensorflow/python/ops/rnn_cell.py
index 870dbd179f..ebdfdc113b 100644
--- a/tensorflow/python/ops/rnn_cell.py
+++ b/tensorflow/python/ops/rnn_cell.py
@@ -265,7 +265,7 @@ class LSTMCell(RNNCell):
   def __init__(self, num_units, input_size=None,
                use_peepholes=False, cell_clip=None,
                initializer=None, num_proj=None,
-               num_unit_shards=1, num_proj_shards=1):
+               num_unit_shards=1, num_proj_shards=1, forget_bias=1.0):
     """Initialize the parameters for an LSTM cell.
 
     Args:
@@ -282,6 +282,8 @@ class LSTMCell(RNNCell):
         matrix is stored across num_unit_shards.
       num_proj_shards: How to split the projection matrix.  If >1, the
         projection matrix is stored across num_proj_shards.
+      forget_bias: Biases of the forget gate are initialized by default to 1
+        in order to reduce the scale of forgetting at the beginning of the training.
     """
     self._num_units = num_units
     self._input_size = input_size
@@ -291,6 +293,7 @@ class LSTMCell(RNNCell):
     self._num_proj = num_proj
     self._num_unit_shards = num_unit_shards
     self._num_proj_shards = num_proj_shards
+    self._forget_bias = forget_bias
 
     if num_proj:
       self._state_size = num_units + num_proj
@@ -367,10 +370,10 @@ class LSTMCell(RNNCell):
             "W_O_diag", shape=[self._num_units], dtype=dtype)
 
       if self._use_peepholes:
-        c = (sigmoid(f + 1 + w_f_diag * c_prev) * c_prev +
+        c = (sigmoid(f + self._forget_bias + w_f_diag * c_prev) * c_prev +
              sigmoid(i + w_i_diag * c_prev) * tanh(j))
       else:
-        c = (sigmoid(f + 1) * c_prev + sigmoid(i) * tanh(j))
+        c = (sigmoid(f + self._forget_bias) * c_prev + sigmoid(i) * tanh(j))
 
       if self._cell_clip is not None:
         c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip)
diff --git a/tensorflow/python/training/adadelta.py b/tensorflow/python/training/adadelta.py
new file mode 100644
index 0000000000..f5ebf59894
--- /dev/null
+++ b/tensorflow/python/training/adadelta.py
@@ -0,0 +1,84 @@
+# Copyright 2015 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Adadelta for TensorFlow."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import constant_op
+from tensorflow.python.training import optimizer
+from tensorflow.python.training import training_ops
+
+
+class AdadeltaOptimizer(optimizer.Optimizer):
+  """Optimizer that implements the Adadelta algorithm. 
+
+  See [M. D. Zeiler](http://arxiv.org/abs/1212.5701)
+  ([pdf](http://arxiv.org/pdf/1212.570.pdf))
+ 
+  @@__init__
+  """
+
+  def __init__(self, learning_rate=0.001, rho=0.95, epsilon=1e-8,
+               use_locking=False, name="Adadelta"):
+    """Construct a new Adadelta optimizer.
+
+    Args:
+      learning_rate: A `Tensor` or a floating point value. The learning rate.
+      rho: A `Tensor` or a floating point value. The decay rate.
+      epsilon: A `Tensor` or a floating point value.  A constant epsilon used
+               to better conditioning the grad update.
+      use_locking: If `True` use locks for update operations.
+      name: Optional name prefix for the operations created when applying
+        gradients.  Defaults to "Adadelta".
+    """
+    super(AdadeltaOptimizer, self).__init__(use_locking, name)
+    self._lr = learning_rate
+    self._rho = rho
+    self._epsilon = epsilon
+
+    # Tensor versions of the constructor arguments, created in _prepare().
+    self._lr_t = None
+    self._rho_t = None
+    self._epsilon_t = None
+
+  def _create_slots(self, var_list):
+    for v in var_list:
+      self._zeros_slot(v, "accum", self._name)
+      self._zeros_slot(v, "accum_update", self._name)
+
+  def _prepare(self):
+    self._lr_t = ops.convert_to_tensor(self._lr, name="lr")
+    self._rho_t = ops.convert_to_tensor(self._rho, name="rho")
+    self._epsilon_t = ops.convert_to_tensor(self._epsilon,
+                                                  name="epsilon")
+
+  def _apply_dense(self, grad, var):
+    accum = self.get_slot(var, "accum")
+    accum_update = self.get_slot(var, "accum_update")
+    return training_ops.apply_adadelta(
+        var, accum, accum_update,
+        self._lr_t, self._rho_t, self._epsilon_t, grad,
+        use_locking=self._use_locking)
+
+  def _apply_sparse(self, grad, var):
+    accum = self.get_slot(var, "accum")
+    accum_update = self.get_slot(var, "accum_update")
+    return training_ops.sparse_apply_adadelta(
+        var, accum, accum_update, self._lr_t,
+        self._rho_t, self._epsilon_t, grad.values,
+        grad.indices, use_locking=self._use_locking)
diff --git a/tensorflow/python/training/adadelta_test.py b/tensorflow/python/training/adadelta_test.py
new file mode 100644
index 0000000000..9b55348aa1
--- /dev/null
+++ b/tensorflow/python/training/adadelta_test.py
@@ -0,0 +1,113 @@
+# Copyright 2015 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for Adadelta Optimizer."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow.python.platform
+
+import numpy as np
+from six.moves import xrange  # pylint: disable=redefined-builtin
+import tensorflow as tf
+
+
+class AdadeltaOptimizerTest(tf.test.TestCase):
+
+  def testBasic(self):
+    with self.test_session():
+      var0 = tf.Variable([1.0, 2.0])
+      var1 = tf.Variable([3.0, 4.0])
+      grads0 = tf.constant([0.1, 0.1])
+      grads1 = tf.constant([0.01, 0.01])
+      lr = 1.0
+      rho = 0.95
+      epsilon = 1e-8
+
+      adadelta_opt = tf.train.AdadeltaOptimizer(lr, rho=rho, epsilon=epsilon)
+      adadelta_update = adadelta_opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
+      tf.initialize_all_variables().run()
+
+      # Check we have slots
+      self.assertEqual(["accum", "accum_update"],
+                       adadelta_opt.get_slot_names())
+      slot0 = adadelta_opt.get_slot(var0, "accum")
+      self.assertEquals(slot0.get_shape(), var0.get_shape())
+      self.assertFalse(slot0 in tf.trainable_variables())
+
+      slot0_update = adadelta_opt.get_slot(var0, "accum_update")
+      self.assertEquals(slot0_update.get_shape(), var0.get_shape())
+      self.assertFalse(slot0_update in tf.trainable_variables())
+
+
+      slot1 = adadelta_opt.get_slot(var1, "accum")
+      self.assertEquals(slot1.get_shape(), var1.get_shape())
+      self.assertFalse(slot1 in tf.trainable_variables())
+
+      slot1_update = adadelta_opt.get_slot(var1, "accum_update")
+      self.assertEquals(slot1_update.get_shape(), var1.get_shape())
+      self.assertFalse(slot1_update in tf.trainable_variables())
+
+      # Fetch params to validate initial values
+      self.assertAllClose([1.0, 2.0], var0.eval())
+      self.assertAllClose([3.0, 4.0], var1.eval())
+
+      adadelta_update.run()
+
+      # Check that the accumulators have been updated.
+      grad = 0.1
+      accum = 0
+      accum_update = 0
+
+      accum = accum * rho + (grad**2) * (1 - rho)
+      update1 = np.sqrt(accum_update + epsilon) * (1. / np.sqrt(accum + epsilon)) * grad
+      accum_update = accum_update * rho + (update1**2) * (1.0 - rho)
+
+      self.assertAllClose(np.array([accum, accum]), slot0.eval())
+      self.assertAllClose(np.array([accum_update, accum_update]), slot0_update.eval())
+
+      # Check that the parameters have been updated.
+      self.assertAllClose(np.array([1.0 - update1 * lr,
+                                    2.0 - update1 * lr]),
+                          var0.eval(), rtol=1e-3)
+
+      self.assertAllClose(np.array([3.0 - update1 * lr,
+                                    4.0 - update1 * lr]),
+                          var1.eval(), rtol=1e-3)
+
+      # Step 2: the momentum accumulators contain the previous update.
+      accum =  accum * rho + (grad**2) * (1 - rho)
+      update2 = ((accum_update + epsilon)**0.5) * (1. / (accum + epsilon)**0.5) * grad
+      accum_update = accum_update * rho + (update2**2) * (1.0 - rho)
+
+      adadelta_update.run()
+
+      # Check that the momentum accumulators have been updated.
+      self.assertAllClose(np.array([accum, accum]), slot0.eval())
+      self.assertAllClose(np.array([accum_update, accum_update]), slot0_update.eval())
+
+      # Check that the parameters have been updated.
+      self.assertAllClose(
+          np.array([1.0 - update1 - update2,
+                    2.0 - update1 - update2]),
+          var0.eval(), rtol=1e-3)
+
+      self.assertAllClose(np.array([3.0 - update1 - update2,
+                                    4.0 - update1 - update2]),
+                          var1.eval(), rtol=1e-3)
+
+if __name__ == "__main__":
+  tf.test.main()
diff --git a/tensorflow/python/training/saver.py b/tensorflow/python/training/saver.py
index e1e399054d..1d5f53db5c 100644
--- a/tensorflow/python/training/saver.py
+++ b/tensorflow/python/training/saver.py
@@ -1284,7 +1284,7 @@ def _read_meta_graph_file(filename):
 
 
 def _import_meta_graph_def(meta_graph_def):
-  """Recreates a Graph saved in a a `MetaGraphDef` proto.
+  """Recreates a Graph saved in a `MetaGraphDef` proto.
 
   This function adds all the nodes from the meta graph def proto to the current
   graph, recreates all the collections, and returns a saver from saver_def.
@@ -1386,7 +1386,7 @@ def import_meta_graph(meta_graph_or_file):
   with tf.Session() as sess:
     new_saver = tf.train.import_meta_graph('my-save-dir/my-model-10000.meta')
     new_saver.restore(sess, 'my-save-dir/my-model-10000')
-    # tf.get_collection() retrurns a list. In this example we only want the
+    # tf.get_collection() returns a list. In this example we only want the
     # first one.
     train_op = tf.get_collection('train_op')[0]
     for step in xrange(1000000):
@@ -1401,7 +1401,7 @@ def import_meta_graph(meta_graph_or_file):
       the path) containing a `MetaGraphDef`.
 
   Returns:
-    A saver constructed rom `saver_def` in `MetaGraphDef` or None.
+    A saver constructed from `saver_def` in `MetaGraphDef` or None.
 
     A None value is returned if no variables exist in the `MetaGraphDef`
     (i.e., there are no variables to restore).
diff --git a/tensorflow/python/training/summary_io.py b/tensorflow/python/training/summary_io.py
index ff92008872..7aeab214a0 100644
--- a/tensorflow/python/training/summary_io.py
+++ b/tensorflow/python/training/summary_io.py
@@ -116,7 +116,7 @@ class SummaryWriter(object):
     and adds it to the event file.
 
     You can pass the result of evaluating any summary op, using
-    [`Session.run()`](client.md#Session.run] or
+    [`Session.run()`](client.md#Session.run) or
     [`Tensor.eval()`](framework.md#Tensor.eval), to this
     function. Alternatively, you can pass a `tf.Summary` protocol
     buffer that you populate with your own data. The latter is
diff --git a/tensorflow/python/training/supervisor_test.py b/tensorflow/python/training/supervisor_test.py
index e1b8cb8090..2b4c878adf 100644
--- a/tensorflow/python/training/supervisor_test.py
+++ b/tensorflow/python/training/supervisor_test.py
@@ -23,12 +23,9 @@ import os
 import shutil
 import time
 
+from six.moves import xrange  # pylint: disable=redefined-builtin
 import tensorflow as tf
 
-from tensorflow.core.protobuf import config_pb2
-from tensorflow.core.util.event_pb2 import SessionLog
-from tensorflow.python.platform import gfile
-
 
 def _summary_iterator(test_dir):
   """Reads events from test_dir/events.
@@ -70,7 +67,7 @@ class SupervisorTest(tf.test.TestCase):
         my_op = tf.constant([1.0])
       sv = tf.train.Supervisor(logdir=logdir)
       sess = sv.prepare_or_wait_for_session(
-          "", config=config_pb2.ConfigProto(device_count={"CPU": 2}))
+          "", config=tf.ConfigProto(device_count={"CPU": 2}))
       for _ in xrange(10):
         sess.run(my_op)
       sess.close()
@@ -111,7 +108,7 @@ class SupervisorTest(tf.test.TestCase):
 
     # The next one should be a stop message if we closed cleanly.
     ev = next(rr)
-    self.assertEquals(SessionLog.STOP, ev.session_log.status)
+    self.assertEquals(tf.SessionLog.STOP, ev.session_log.status)
 
     # We should be done.
     self.assertRaises(StopIteration, lambda: next(rr))
@@ -273,7 +270,7 @@ class SupervisorTest(tf.test.TestCase):
     """
     end_time = time.time() + timeout_secs
     while time.time() < end_time:
-      if len(gfile.Glob(pattern)) >= 1:
+      if len(tf.gfile.Glob(pattern)) >= 1:
         return
       time.sleep(0.05)
     self.assertFalse(True, "Glob never matched any file: %s" % pattern)
@@ -306,7 +303,7 @@ class SupervisorTest(tf.test.TestCase):
     ev = next(rr)
     self.assertProtoEquals("value { tag: 'v' simple_value: 1.0 }", ev.summary)
     ev = next(rr)
-    self.assertEquals(SessionLog.STOP, ev.session_log.status)
+    self.assertEquals(tf.SessionLog.STOP, ev.session_log.status)
 
     self.assertRaises(StopIteration, lambda: next(rr))
     # There should be a checkpoint file with the variable "foo"
@@ -344,7 +341,7 @@ class SupervisorTest(tf.test.TestCase):
     # It is actually undeterministic whether SessionLog.START gets written
     # before the summary or the checkpoint, but this works when run 10000 times.
     self.assertEquals(123, ev.step)
-    self.assertEquals(SessionLog.START, ev.session_log.status)
+    self.assertEquals(tf.SessionLog.START, ev.session_log.status)
     first = next(rr)
     second = next(rr)
     # It is undeterministic whether the value gets written before the checkpoint
@@ -354,15 +351,15 @@ class SupervisorTest(tf.test.TestCase):
                                         simple_value: 0.0 }""",
                              first.summary)
       self.assertEquals(123, second.step)
-      self.assertEquals(SessionLog.CHECKPOINT, second.session_log.status)
+      self.assertEquals(tf.SessionLog.CHECKPOINT, second.session_log.status)
     else:
       self.assertEquals(123, first.step)
-      self.assertEquals(SessionLog.CHECKPOINT, first.session_log.status)
+      self.assertEquals(tf.SessionLog.CHECKPOINT, first.session_log.status)
       self.assertProtoEquals("""value { tag: 'global_step/sec'
                                         simple_value: 0.0 }""",
                              second.summary)
     ev = next(rr)
-    self.assertEquals(SessionLog.STOP, ev.session_log.status)
+    self.assertEquals(tf.SessionLog.STOP, ev.session_log.status)
     self.assertRaises(StopIteration, lambda: next(rr))
     # There should be a checkpoint file with the variable "foo"
     with tf.Graph().as_default(), self.test_session() as sess:
diff --git a/tensorflow/python/training/training.py b/tensorflow/python/training/training.py
index 65cefc09da..ab0bbd61ef 100644
--- a/tensorflow/python/training/training.py
+++ b/tensorflow/python/training/training.py
@@ -28,6 +28,7 @@ of the subclasses.
 @@Optimizer
 
 @@GradientDescentOptimizer
+@@AdadeltaOptimizer
 @@AdagradOptimizer
 @@MomentumOptimizer
 @@AdamOptimizer
@@ -134,6 +135,7 @@ from tensorflow.python.ops import gradients
 from tensorflow.python.ops import io_ops
 from tensorflow.python.ops import state_ops
 
+from tensorflow.python.training.adadelta import AdadeltaOptimizer
 from tensorflow.python.training.adagrad import AdagradOptimizer
 from tensorflow.python.training.adam import AdamOptimizer
 from tensorflow.python.training.ftrl import FtrlOptimizer
diff --git a/tensorflow/python/training/training_ops.py b/tensorflow/python/training/training_ops.py
index 3487546872..7a194c6cb0 100644
--- a/tensorflow/python/training/training_ops.py
+++ b/tensorflow/python/training/training_ops.py
@@ -47,6 +47,18 @@ def _AssertInputIsScalar(op, index):
   op.inputs[index].get_shape().assert_is_compatible_with(tensor_shape.scalar())
 
 
+@ops.RegisterShape("ApplyAdadelta")
+def _ApplyAdadeltaShape(op):
+  """Shape function for the ApplyAdadelta op."""
+  var_shape = op.inputs[0].get_shape()
+  accum_shape = op.inputs[1].get_shape().merge_with(var_shape)
+  accum_update_shape = op.inputs[2].get_shape().merge_with(var_shape)
+  _AssertInputIsScalar(op, 3)  # lr
+  _AssertInputIsScalar(op, 4)  # rho
+  _AssertInputIsScalar(op, 5)  # epsilon
+  grad_shape = op.inputs[6].get_shape().merge_with(accum_shape)
+  return [grad_shape]
+
 @ops.RegisterShape("ApplyAdagrad")
 def _ApplyAdagradShape(op):
   """Shape function for the ApplyAdagrad op."""
@@ -120,6 +132,20 @@ def _ApplyGradientDescentShape(op):
   delta_shape = op.inputs[2].get_shape().merge_with(var_shape)
   return [delta_shape]
 
+@ops.RegisterShape("SparseApplyAdadelta")
+def _SparseApplyAdadeltaShape(op):
+   """Shape function for the SparseApplyAdadelta op."""
+   var_shape = op.inputs[0].get_shape()
+   accum_grad_shape = op.inputs[1].get_shape().merge_with(var_shape)
+   accum_update_shape = op.inputs[2].get_shape().merge_with(accum_grad_shape)
+   _AssertInputIsScalar(op, 3)  # lr
+   _AssertInputIsScalar(op, 4)  # decay_rate
+   _AssertInputIsScalar(op, 5)  # epsilon
+   grad_shape = op.inputs[6].get_shape().merge_with(
+       tensor_shape.TensorShape([None]).concatenate(accum_update_shape[1:]))
+   unused_indices_shape = op.inputs[7].get_shape().merge_with(
+       tensor_shape.vector(grad_shape[0]))
+   return [accum_update_shape]
 
 @ops.RegisterShape("SparseApplyAdagrad")
 def _SparseApplyAdagradShape(op):
diff --git a/tensorflow/tensorboard/backend/server_test.py b/tensorflow/tensorboard/backend/server_test.py
index 42c2aafe21..d6cace3e74 100644
--- a/tensorflow/tensorboard/backend/server_test.py
+++ b/tensorflow/tensorboard/backend/server_test.py
@@ -168,9 +168,9 @@ class TensorboardServerTest(tf.test.TestCase):
     self.assertEqual(graph.node[1].name, 'b')
     # Make sure the second node has an attribute that was filtered out because
     # it was too large and was added to the "too large" attributes list.
-    self.assertEqual(graph.node[1].attr.keys(), ['_very_large_attrs'])
+    self.assertEqual(list(graph.node[1].attr.keys()), ['_very_large_attrs'])
     self.assertEqual(graph.node[1].attr['_very_large_attrs'].list.s,
-                     ['very_large_attr'])
+                     [b'very_large_attr'])
 
   def _GenerateTestData(self):
     """Generates the test data directory.
diff --git a/tensorflow/tools/ci_build/Dockerfile.debian.jessie.cpu b/tensorflow/tools/ci_build/Dockerfile.debian.jessie.cpu
index fc37a5bb28..2ed7a30824 100644
--- a/tensorflow/tools/ci_build/Dockerfile.debian.jessie.cpu
+++ b/tensorflow/tools/ci_build/Dockerfile.debian.jessie.cpu
@@ -7,6 +7,7 @@ COPY install/*.sh /install/
 RUN /install/install_bootstrap_deb_packages.sh
 RUN echo "deb http://http.debian.net/debian jessie-backports main" | tee -a /etc/apt/sources.list
 RUN /install/install_deb_packages.sh
+RUN /install/install_pip_packages.sh
 RUN /install/install_bazel.sh
 
 # Set up bazelrc.
diff --git a/tensorflow/tools/ci_build/README.md b/tensorflow/tools/ci_build/README.md
index aca5829b3c..d56712d342 100644
--- a/tensorflow/tools/ci_build/README.md
+++ b/tensorflow/tools/ci_build/README.md
@@ -1,36 +1,8 @@
-# TensorFlow.org Continuous Integration
+# Tensorflow Builds
 
-This directory contains all the files and setup instructions to run
-continuous integration [ci.tensorflow.org](http://ci.tensorflow.org).
-
-
-
-## How it works
-
-We use [jenkins](https://jenkins-ci.org/) as our continuous integration.
-It is running at [ci.tensorflow.org](http://ci.tensorflow.org).
-All the jobs are run within [docker](http://www.docker.com/) containers.
-
-Builds can be triggered by push to master, push a change set or manually.
-The build started in jenkins will first pull the git tree. Then jenkins builds
-a docker container (using one of those Dockerfile.* files in this directory).
-The build itself is run within the container itself.
-
-Source tree lives in jenkins job workspace. Docker container for jenkins
-are transient - deleted after the build. Containers build very fast thanks
-to docker caching. Individual builds are fast thanks to bazel caching.
-
-
-
-## Implementation Details
-
-* The unusual `bazel-ci_build-cache` directory is mapped to docker
-  container performing the build using docker's --volume parameter.
-  This way we cache bazel output between builds.
-
-* The `builds` directory within this folder contains shell scripts to run within
-  the container. They essentially contains workarounds for current limitations
-  of bazel.
+This directory contains all the files and setup instructions to run all
+the important builds and tests. **You can trivially run it yourself!** It also
+run continuous integration [ci.tensorflow.org](http://ci.tensorflow.org).
 
 
 
@@ -39,6 +11,12 @@ to docker caching. Individual builds are fast thanks to bazel caching.
 1. Install [Docker](http://www.docker.com/). Follow instructions
    [on the Docker site](https://docs.docker.com/installation/).
 
+   You can run all the jobs **without docker** if you are on mac or on linux
+   and you just don't want docker. Just install all the dependencies from
+   [os_setup.md](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/g3doc/get_started/os_setup.md).
+   Then run any of the one liners bellow without the
+   `tensorflow/tools/ci_build/ci_build.sh` in them.
+
 2. Clone tensorflow repository.
 
    ```bash
@@ -64,25 +42,70 @@ tensorflow/tools/ci_build/ci_build.sh CPU bazel test //tensorflow/...
 The jobs run by [ci.tensorflow.org](http://ci.tensorflow.org) include following:
 
 ```bash
-# Note: You can run the following one-liners yourself if you have Docker.
+# Note: You can run the following one-liners yourself if you have Docker. Run
+# without `tensorflow/tools/ci_build/ci_build.sh` on mac or linux without Docker.
 
 # build and run cpu tests
 tensorflow/tools/ci_build/ci_build.sh CPU bazel test //tensorflow/...
 
-# build gpu
-tensorflow/tools/ci_build/ci_build.sh GPU bazel build -c opt --config=cuda //tensorflow/...
+# build and run gpu tests (note if you get unstable results you may be running
+# out of gpu memory - if so add "--jobs=1" argument)
+tensorflow/tools/ci_build/ci_build.sh GPU test build -c opt --config=cuda //tensorflow/...
 
 # build pip with gpu support
 tensorflow/tools/ci_build/ci_build.sh GPU tensorflow/tools/ci_build/builds/pip.sh GPU
 
+# build and run gpu tests using python 3
+CI_DOCKER_EXTRA_PARAMS="-e CI_BUILD_PYTHON=python3" tensorflow/tools/ci_build/ci_build.sh GPU tensorflow/tools/ci_build/builds/pip.sh GPU
+
 # build android example app
 tensorflow/tools/ci_build/ci_build.sh ANDROID tensorflow/tools/ci_build/builds/android.sh
 
+# cmake cpu build and test
+tensorflow/tools/ci_build/ci_build.sh CPU tensorflow/tools/ci_build/builds/cmake.sh
+
 # run bash inside the container
-CI_DOCKER_EXTRA_PARAMS='-it --rm' CI_COMMAND_PREFIX='' tensorflow/tools/ci_build/ci_build.sh CPU /bin/bash
+CI_DOCKER_EXTRA_PARAMS='-it --rm' tensorflow/tools/ci_build/ci_build.sh CPU /bin/bash
 ```
 
 **Note**: The set of jobs and how they are triggered is still evolving.
 There are builds for master branch on cpu, gpu and android. There is a build
 for incoming gerrit changes. Gpu tests and benchmark are coming soon. Check
 [ci.tensorflow.org](http://ci.tensorflow.org) for current jobs.
+
+
+
+## How Does Tensorflow Continuous Integration Work
+
+We use [jenkins](https://jenkins-ci.org/) as our continuous integration.
+It is running at [ci.tensorflow.org](http://ci.tensorflow.org).
+All the jobs are run within [docker](http://www.docker.com/) containers.
+
+Builds can be triggered by push to master, push a change set or manually.
+The build started in jenkins will first pull the git tree. Then jenkins builds
+a docker container (using one of those Dockerfile.* files in this directory).
+The build itself is run within the container itself.
+
+Source tree lives in jenkins job workspace. Docker container for jenkins
+are transient - deleted after the build. Containers build very fast thanks
+to docker caching. Individual builds are fast thanks to bazel caching.
+
+
+
+## Implementation Details
+
+* The ci_build.sh script create and run docker container with all dependencies.
+  The builds/with_the_same_user together with ci_build.sh creates an environment
+  which is the same inside the container as it is outside. The same user, group,
+  path, so that docker symlinks work inside and outside the container. You can
+  use it for your development. Edit files in your git clone directory. If you
+  run the ci_build.sh it gets this directory mapped inside the container and
+  build your tree.
+
+* The unusual `bazel-ci_build-cache` directory is mapped to docker container
+  performing the build using docker's --volume parameter. This way we cache
+  bazel output between builds.
+
+* The `builds` directory within this folder contains shell scripts to run within
+  the container. They essentially contains workarounds for current limitations
+  of bazel.
diff --git a/tensorflow/tools/ci_build/builds/cmake.sh b/tensorflow/tools/ci_build/builds/cmake.sh
new file mode 100755
index 0000000000..00fb2e8139
--- /dev/null
+++ b/tensorflow/tools/ci_build/builds/cmake.sh
@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+set -e
+
+mkdir -p build
+cd build
+cmake -DCMAKE_BUILD_TYPE=Release ../tensorflow/contrib/cmake
+make all test
diff --git a/tensorflow/tools/ci_build/builds/pip.sh b/tensorflow/tools/ci_build/builds/pip.sh
index 7255de0bcc..96b32dd1f4 100755
--- a/tensorflow/tools/ci_build/builds/pip.sh
+++ b/tensorflow/tools/ci_build/builds/pip.sh
@@ -163,10 +163,13 @@ source "${VENV_DIR}/bin/activate" || \
     die "FAILED: Unable to activate virtualenv"
 
 
-# Install the pip file in virtual env
-pip install -v --force-reinstall ${WHL_PATH} \
-&& echo "Successfully installed pip package ${WHL_PATH}" \
-|| die "pip install (without --upgrade) FAILED"
+# Install the pip file in virtual env (plus missing dependencies)
+pip install -v ${WHL_PATH} || die "pip install (without --upgrade) FAILED"
+# Force tensorflow reinstallation. Otherwise it may not get installed from
+# last build if it had the same version number as previous build.
+pip install -v --upgrade --no-deps --force-reinstall ${WHL_PATH} || \
+    die "pip install (forcing to reinstall tensorflow) FAILED"
+echo "Successfully installed pip package ${WHL_PATH}"
 
 # Install extra pip packages required by the test-on-install
 for PACKAGE in ${INSTALL_EXTRA_PIP_PACKAGES}; do
diff --git a/tensorflow/tools/ci_build/builds/with_the_same_user b/tensorflow/tools/ci_build/builds/with_the_same_user
index f866edb230..e723974853 100755
--- a/tensorflow/tools/ci_build/builds/with_the_same_user
+++ b/tensorflow/tools/ci_build/builds/with_the_same_user
@@ -31,7 +31,8 @@ getent group "${CI_BUILD_GID}" || addgroup --gid "${CI_BUILD_GID}" "${CI_BUILD_G
 getent passwd "${CI_BUILD_UID}" || adduser --gid "${CI_BUILD_GID}" --uid "${CI_BUILD_UID}" \
     --gecos "${CI_BUILD_USER} (generated by with_the_same_user script)" \
     --disabled-password --home "${CI_BUILD_HOME}" --quiet "${CI_BUILD_USER}"
-sudo usermod -a -G sudo "${CI_BUILD_USER}"
+usermod -a -G sudo "${CI_BUILD_USER}"
+echo "${CI_BUILD_USER} ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/90-nopasswd-sudo
 
 if [ -e /root/.bazelrc]; then
   cp /root/.bazelrc "${CI_BUILD_HOME}/.bazelrc"
diff --git a/tensorflow/tools/ci_build/install/install_deb_packages.sh b/tensorflow/tools/ci_build/install/install_deb_packages.sh
index 1bf77b236c..c14fc51adf 100755
--- a/tensorflow/tools/ci_build/install/install_deb_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_deb_packages.sh
@@ -21,6 +21,7 @@ apt-get update
 apt-get install -y \
     bc \
     build-essential \
+    cmake \
     curl \
     git \
     openjdk-8-jdk \