45 files changed, 17423 insertions, 0 deletions
diff --git a/tensorflow/python/ops/__init__.py b/tensorflow/python/ops/__init__.py
new file mode 100755
index 0000000000..e69de29bb2
--- /dev/null
+++ b/tensorflow/python/ops/__init__.py
diff --git a/tensorflow/python/ops/array_grad.py b/tensorflow/python/ops/array_grad.py
new file mode 100644
index 0000000000..2a463940d6
--- /dev/null
+++ b/tensorflow/python/ops/array_grad.py
@@ -0,0 +1,187 @@
+"""Gradients for operators defined in array_ops.py."""
+
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import constant_op
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import gen_array_ops
+
+
+@ops.RegisterGradient("Pack")
+def _PackGrad(op, grad):
+  """Gradient for pack op."""
+  return array_ops.unpack(grad, num=op.get_attr('N'))
+
+
+@ops.RegisterGradient("Unpack")
+def _UnpackGrad(_, *grads):
+  """Gradient for unpack op."""
+  return array_ops.pack(grads)
+
+
+@ops.RegisterGradient("Concat")
+def _ConcatGrad(op, grad):
+  """Gradient for concat op."""
+  assert isinstance(grad, ops.Tensor)
+  # Degenerate concatenation, just return grad.
+  if len(op.inputs) == 2:
+    return [None, grad]
+  # Get the inputs' tensor shapes
+  sizes = [array_ops.shape(x) for x in op.inputs[1:]]
+  concat_dim = op.inputs[0]
+  # Since shape is 1-D, shape_of_shape = [rank-of-inputs]
+  shape_of_shape = array_ops.shape(sizes[0])
+  # Make a vector of length equal to the input's dimensions,
+  # with 0's everywhere and 1 in the concat dim position.
+  # Note: Can't use sparse_to_dense since it isn't GPU-capable (for now)
+  mask = array_ops.concat(0,
+                          [array_ops.fill(
+                              array_ops.expand_dims(concat_dim, 0), 0), [1],
+                           array_ops.fill(shape_of_shape - concat_dim - 1, 0)])
+  out_grads = []
+  begin = array_ops.fill(shape_of_shape, 0)
+  for i in range(len(sizes)):
+    out_grads.append(array_ops.slice(grad, begin, sizes[i]))
+    # Lint complains begin = begin + ...
+    begin = math_ops.add(begin, sizes[i] * mask)
+  return [None] + out_grads
+
+
+@ops.RegisterGradient("Slice")
+def _SliceGrad(op, grad):
+  """Gradient for Slice op."""
+  # Create an Nx2 padding where the first column represents how many
+  # zeros are to be prepended for each dimension, and the second
+  # column indicates how many zeros are appended.
+  #
+  # The number of zeros to append is the shape of the input
+  # elementwise-subtracted by both the begin vector and sizes vector.
+  #
+  # Some more reshaping is needed to assemble this tensor with the
+  # right dimensions.
+  input_vec = op.inputs[0]
+  begin_vec = op.inputs[1]
+  input_rank = array_ops.rank(input_vec)
+  slice_size = array_ops.shape(op.outputs[0])
+
+  shape = array_ops.pack([input_rank, 1])
+  before_pad = array_ops.reshape(begin_vec, shape)
+  after_pad = array_ops.reshape(
+      array_ops.shape(input_vec) - slice_size - begin_vec, shape)
+  paddings = array_ops.concat(1, [before_pad, after_pad])
+  return array_ops.pad(grad, paddings), None, None
+
+
+@ops.RegisterGradient("Split")
+def _SplitGrad(op, *grads):
+  return None, array_ops.concat(op.inputs[0], list(grads))
+
+
+ops.NoGradient("Const")
+
+# TODO(liqzhang): The gradient for Diag operator would be
+# the diagonal of the backprop. Implement if there is a need.
+ops.NoGradient("Diag")
+
+# Edit Distance has no gradient (but can be used to eval seq2seq or CTC).
+ops.NoGradient("EditDistance")
+
+ops.NoGradient("Fill")
+
+
+@ops.RegisterGradient("Gather")
+def _GatherGrad(op, grad):
+  return [
+      ops.IndexedSlices(grad, op.inputs[1], array_ops.shape(op.inputs[0])), None
+  ]
+
+
+@ops.RegisterGradient("Identity")
+def _IdGrad(_, grad):
+  return grad
+
+
+@ops.RegisterGradient("RefIdentity")
+def _RefIdGrad(_, grad):
+  return grad
+
+
+ops.NoGradient("StopGradient")
+
+
+@ops.RegisterGradient("Reshape")
+def _ReshapeGrad(op, grad):
+  return [array_ops.reshape(grad, array_ops.shape(op.inputs[0])), None]
+
+
+ops.NoGradient("InvertPermutation")
+
+
+def _ReshapeToInput(op, grad):
+  """Reshapes the gradient to the shape of the original input."""
+  return array_ops.reshape(grad, array_ops.shape(op.inputs[0]))
+
+
+@ops.RegisterGradient("ExpandDims")
+def _ExpandDimsGrad(op, grad):
+  return [_ReshapeToInput(op, grad), None]
+
+
+@ops.RegisterGradient("Squeeze")
+def _SqueezeGrad(op, grad):
+  return _ReshapeToInput(op, grad)
+
+
+@ops.RegisterGradient("Transpose")
+def _TransposeGrad(op, grad):
+  """Returns unshuffle(grad)."""
+  p = op.inputs[1]
+  return [array_ops.transpose(grad, array_ops.invert_permutation(p)), None]
+
+
+ops.NoGradient("Shape")
+
+
+ops.NoGradient("Rank")
+
+
+ops.NoGradient("Size")
+
+
+@ops.RegisterGradient("Tile")
+def _TileGrad(op, grad):
+  """Sum reduces grad along the tiled dimensions."""
+  assert isinstance(grad, ops.Tensor)
+  return [gen_array_ops._tile_grad(grad, op.inputs[1]), None]
+
+
+ops.NoGradient("TileGrad")
+
+
+ops.NoGradient("BroadcastGradientArgs")
+
+
+@ops.RegisterGradient("Pad")
+def _PadGrad(op, grad):
+  """Gradient for Pad."""
+  # Pad introduces values around the original tensor, so the gradient function
+  # slices the original shape out of the gradient."""
+  x = op.inputs[0]
+  a = op.inputs[1]  # [Rank(x), 2]
+  # Takes a slice of a. The 1st column. [Rank(x), 1].
+  pad_before = array_ops.slice(a, [0, 0],
+                               array_ops.pack([array_ops.rank(x), 1]))
+  # Make it a 1-D tensor.
+  begin = array_ops.reshape(pad_before, [-1])
+  sizes = array_ops.shape(x)
+  return array_ops.slice(grad, begin, sizes), None
+
+
+# ReverseSequence is just a permutation.  The gradient permutes back.
+@ops.RegisterGradient("ReverseSequence")
+def _ReverseSequenceGrad(op, grad):
+  seq_lengths = op.inputs[1]
+  return [array_ops.reverse_sequence(grad,
+                                    seq_dim=op.get_attr("seq_dim"),
+                                    seq_lengths=seq_lengths),
+          None]
diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
new file mode 100644
index 0000000000..ed780db625
--- /dev/null
+++ b/tensorflow/python/ops/array_ops.py
@@ -0,0 +1,1207 @@
+"""## Casting
+
+TensorFlow provides several operations that you can use to cast tensor data
+types in your graph.
+
+@@string_to_number
+@@to_double
+@@to_float
+@@to_bfloat16
+@@to_int32
+@@to_int64
+@@cast
+
+## Shapes and Shaping
+
+TensorFlow provides several operations that you can use to determine the shape
+of a tensor and change the shape of a tensor.
+
+@@shape
+@@size
+@@rank
+@@reshape
+@@squeeze
+@@expand_dims
+
+## Slicing and Joining
+
+TensorFlow provides several operations to slice or extract parts of a tensor,
+or join multiple tensors together.
+
+@@slice
+@@split
+@@tile
+@@pad
+@@concat
+@@pack
+@@unpack
+@@reverse_sequence
+@@reverse
+@@transpose
+@@gather
+@@dynamic_partition
+@@dynamic_stitch
+"""
+import sys
+import tensorflow.python.platform
+import numpy as np
+
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.framework import types
+from tensorflow.python.ops import common_shapes
+from tensorflow.python.ops import gen_array_ops
+from tensorflow.python.ops import gen_math_ops
+# pylint: disable=wildcard-import
+# 'Constant' gets imported in the module 'array_ops'.
+from tensorflow.python.ops.constant_op import constant
+from tensorflow.python.ops.gen_array_ops import *
+
+
+# We override the 'slice' for the "slice" op, so we keep python's
+# existing 'slice' for later use in this module.
+_baseslice = slice
+
+
+# Aliases for some automatically-generated names.
+listdiff = gen_array_ops.list_diff
+
+
+# pylint: disable=undefined-variable,protected-access
+def _SliceHelper(tensor, slice_spec):
+  """Overload for Tensor.__getitem__.
+
+  Currently the size of the slice must be statically known in each dimension,
+  i.e. the "stop" of the slice must not be omitted.
+
+  TODO(mrry): Support slices where the sizes are not specified.
+  TODO(mrry): Support negative indices in slices with numpy/Python semantics.
+
+  Args:
+    tensor: An ops.Tensor object.
+    slice_spec: The arguments to Tensor.__getitem__.
+
+  Returns:
+    The appropriate slice of "tensor", based on "slice_spec".
+
+  Raises:
+    ValueError: If a slice range is negative size.
+    TypeError: If the slice indices aren't int, slice, or Ellipsis.
+  """
+  if not isinstance(slice_spec, (list, tuple)):
+    slice_spec = [slice_spec]
+  indices = []
+  sizes = []
+  squeeze_dims = []
+  for dim, s in enumerate(slice_spec):
+    if isinstance(s, int):
+      if s < 0:
+        raise NotImplementedError("Negative indices are currently unsupported")
+      indices.append(s)
+      sizes.append(1)
+      squeeze_dims.append(dim)
+    elif isinstance(s, _baseslice):
+      if s.step not in (None, 1):
+        raise NotImplementedError(
+            "Steps other than 1 are not currently supported")
+      start = s.start if s.start is not None else 0
+      if start < 0:
+        raise NotImplementedError(
+            "Negative start indices are not currently supported")
+      indices.append(start)
+      if s.stop is not None and s.stop < 0:
+        raise NotImplementedError(
+            "Negative stop indices are not currently supported")
+      # NOTE(mrry): If the stop is not specified, Python substitutes
+      #   sys.maxsize, which is typically (2 ** 63) - 1. Since Slice currently
+      #   supports signed DT_INT32 arguments, we use -1 to specify that all
+      #   elements should be captured.
+      if s.stop is None or s.stop == sys.maxsize:
+        sizes.append(-1)
+      else:
+        if start > s.stop:
+          raise ValueError("Stop must be at least start")
+        sizes.append(s.stop - start)
+    elif s is Ellipsis:
+      raise NotImplementedError("Ellipsis is not currently supported")
+    else:
+      raise TypeError("Bad slice index %s of type %s" % (s, type(s)))
+  sliced = slice(tensor, indices, sizes)
+  if squeeze_dims:
+    return squeeze(sliced, squeeze_dims=squeeze_dims)
+  else:
+    return sliced
+
+
+def slice(input_, begin, size, name=None):
+  """Extracts a slice from a tensor.
+
+  This operation extracts a slice of size `size` from a tensor `input` starting
+  at the location specified by `begin`. The slice `size` is represented as a
+  tensor shape, where `size[i]` is the number of elements of the 'i'th dimension
+  of `input` that you want to slice. The starting location (`begin`) for the
+  slice is represented as an offset in each dimension of `input`. In other
+  words, `begin[i]` is the offset into the 'i'th dimension of `input` that you
+  want to slice from.
+
+  `begin` is zero-based; `size` is one-based. If `size[i]` is -1,
+  all remaining elements in dimension i are included in the
+  slice. In other words, this is equivalent to setting:
+
+  `size[i] = input.dim_size(i) - begin[i]`
+
+  This operation requires that:
+
+  `0 <= begin[i] <= begin[i] + size[i] <= Di  for i in [0, n]`
+
+  For example:
+
+  ```
+  # 'input' is [[[1, 1, 1], [2, 2, 2]],
+  #             [[3, 3, 3], [4, 4, 4]],
+  #             [[5, 5, 5], [6, 6, 6]]]
+  tf.slice(input, [1, 0, 0], [1, 1, 3]) ==> [[[3, 3, 3]]]
+  tf.slice(input, [1, 0, 0], [1, 2, 3]) ==> [[[3, 3, 3],
+                                              [4, 4, 4]]]
+  tf.slice(input, [1, 0, 0], [2, 1, 3]) ==> [[[3, 3, 3]],
+                                             [[5, 5, 5]]]
+  ```
+
+  Args:
+    input_: A `Tensor`.
+    begin: An `int32` or `int64` `Tensor`.
+    size: An `int32` or `int64` `Tensor`.
+    name: A name for the operation (optional).
+
+  Returns:
+    A `Tensor` the same type as `input`.
+  """
+  return gen_array_ops._slice(input_, begin, size, name=name)
+
+
+ops.Tensor._override_operator("__getitem__", _SliceHelper)
+
+
+def pack(values, name="pack"):
+  """Packs a list of rank-`R` tensors into one rank-`(R+1)` tensor.
+
+  Packs tensors in `values` into a tensor with rank one higher than each tensor
+  in `values` and shape `[len(values)] + values[0].shape`. The output satisfies
+  `output[i, ...] = values[i][...]`.
+
+  This is the opposite of unpack.  The numpy equivalent is
+
+      tf.pack([x, y, z]) = np.asarray([x, y, z])
+
+  Args:
+    values: A list of `Tensor` objects with the same shape and type.
+    name: A name for this operation (optional).
+
+  Returns:
+    output: A packed `Tensor` with the same type as `values`.
+  """
+  return gen_array_ops._pack(values, name=name)
+
+
+def unpack(value, num=None, name="unpack"):
+  """Unpacks the outer dimension of a rank-`R` tensor into rank-`(R-1)` tensors.
+
+  Unpacks `num` tensors from `value` along the first dimension.
+  If `num` is not specified (the default), it is inferred from `value`'s shape.
+  If `value.shape[0]` is not known, `ValueError` is raised.
+
+  The ith tensor in `output` is the slice `value[i, ...]`. Each tensor in
+  `output` has shape `value.shape[1:]`.
+
+  This is the opposite of pack.  The numpy equivalent is
+
+      tf.unpack(x, n) = list(x)
+
+  Args:
+    value: A rank `R > 0` `Tensor` to be unpacked.
+    num: An `int`. The first dimension of value. Automatically inferred if
+      `None` (the default).
+    name: A name for the operation (optional).
+
+  Returns:
+    The list of `Tensor` objects unpacked from `value`.
+
+  Raises:
+    ValueError: If `num` is unspecified and cannot be inferred.
+  """
+  if num is None:
+    value = ops.convert_to_tensor(value)
+    shape = value.get_shape()
+    num = shape[0].value
+    if num is None:
+      raise ValueError("Cannot infer num from shape %s" % shape)
+  return gen_array_ops._unpack(value, num=num, name=name)
+
+
+def concat(concat_dim, values, name="concat"):
+  """Concatenates tensors along one dimension.
+
+  Concatenates the list of tensors `values` along dimension `concat_dim`.  If
+  `values[i].shape = [D0, D1, ... Dconcat_dim(i), ...Dn]`, the concatenated
+  result has shape
+
+      [D0, D1, ... Rconcat_dim, ...Dn]
+
+  where
+
+      Rconcat_dim = sum(Dconcat_dim(i))
+
+  That is, the data from the input tensors is joined along the `concat_dim`
+  dimension.
+
+  The number of dimensions of the input tensors must match, and all dimensions
+  except `concat_dim` must be equal.
+
+  For example:
+
+  ```python
+  t1 = [[1, 2, 3], [4, 5, 6]]
+  t2 = [[7, 8, 9], [10, 11, 12]]
+  tf.concat(0, [t1, t2]) ==> [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]
+  tf.concat(1, [t1, t2]) ==> [[1, 2, 3, 7, 8, 9], [4, 5, 6, 10, 11, 12]]
+
+  # tensor t3 with shape [2, 3]
+  # tensor t4 with shape [2, 3]
+  tf.shape(tf.concat(0, [t3, t4])) ==> [4, 3]
+  tf.shape(tf.concat(1, [t3, t4])) ==> [2, 6]
+  ```
+
+  Args:
+    concat_dim: 0-D `int32` `Tensor`.  Dimension along which to concatenate.
+    values: A list of `Tensor` objects or a single `Tensor`.
+    name: A name for the operation (optional).
+
+  Returns:
+    A `Tensor` resulting from concatenation of the input tensors.
+  """
+  if not isinstance(values, (list)):
+    values = [values]
+  # TODO(mrry): Change to return values?
+  if len(values) == 1:  # Degenerate case of one tensor.
+    return identity(values[0], name=name)
+  return gen_array_ops._concat(concat_dim=concat_dim,
+                               values=values,
+                               name=name)
+
+
+@ops.RegisterShape("Pack")
+def _PackShape(op):
+  input_shape = op.inputs[0].get_shape()
+  for inp in op.inputs[1:]:
+    input_shape = input_shape.merge_with(inp.get_shape())
+  return [tensor_shape.TensorShape([len(op.inputs)]).concatenate(input_shape)]
+
+
+@ops.RegisterShape("Unpack")
+def _UnpackShape(op):
+  input_shape = op.inputs[0].get_shape()
+  return [input_shape[1:]] * op.get_attr("num")
+
+
+@ops.RegisterShape("Concat")
+def _ConcatShape(op):
+  concat_dim = tensor_util.ConstantValue(op.inputs[0])
+  if concat_dim is None:
+    # Return an unknown shape with the same rank as the inputs, or an
+    # unknown rank if no input's rank is known.
+    rank = None
+    for value in op.inputs[1:]:
+      if rank is not None:
+        value.get_shape().assert_has_rank(rank)
+      else:
+        rank = value.get_shape().ndims
+    return [tensor_shape.unknown_shape(ndims=max(rank, 1))]
+
+  else:
+    # Merge all the non-concat dims, and sum the concat dim to make an
+    # output shape.
+    concat_dim = int(concat_dim)
+    output_shape = op.inputs[1].get_shape()
+    # TODO(irving): Remove once !kAllowLegacyScalars.
+    if output_shape.ndims == 0:
+      output_shape = tensor_shape.TensorShape([1])
+    for value in op.inputs[2:]:
+      value_shape = value.get_shape()
+      if value_shape.ndims is not None and concat_dim >= value_shape.ndims:
+        if value_shape.ndims == 0 and concat_dim == 0:
+          # Let concat handle scalars
+          # TODO(irving): Remove once !kAllowLegacyScalars.
+          value_shape = tensor_shape.TensorShape([1])
+        else:
+          raise ValueError("concat_dim is out of range (values rank = %d)" %
+                           value_shape.ndims)
+      before = output_shape[:concat_dim].merge_with(value_shape[:concat_dim])
+      at = output_shape[concat_dim] + value_shape[concat_dim]
+      after = output_shape[
+          concat_dim + 1:].merge_with(value_shape[concat_dim + 1:])
+      output_shape = before.concatenate(at).concatenate(after)
+    return [output_shape]
+
+
+def sparse_mask(a, mask_indices, name=None):
+  """Masks elements of `IndexedSlices`.
+
+  Given an `IndexedSlices` instance `a`, returns another `IndexedSlices` that
+  contains a subset of the slices of `a`. Only the slices at indices specified
+  in `mask_indices` are returned.
+
+  This is useful when you need to extract a subset of slices in an
+  `IndexedSlices` object.
+
+  For example:
+
+  ```python
+  # `a` contains slices at indices [12, 26, 37, 45] from a large tensor
+  # with shape [1000, 10]
+  a.indices => [12, 26, 37, 45]
+  tf.shape(a.values) => [4, 10]
+
+  # `b` will be the subset of `a` slices at its second and third indices, so
+  # we want to mask of its first and last indices (which are at absolute
+  # indices 12, 45)
+  b = tf.sparse_mask(a, [12, 45])
+
+  b.indices => [26, 37]
+  tf.shape(b.values) => [2, 10]
+
+  ```
+
+  Args:
+    * `a`: An `IndexedSlices` instance.
+    * `mask_indices`: Indices of elements to mask.
+    * `name`: A name for the operation (optional).
+
+  Returns:
+    The masked `IndexedSlices` instance.
+  """
+  with ops.op_scope([a, mask_indices], name, "sparse_mask") as name:
+    indices = a.indices
+    out_indices, to_gather = listdiff(indices, mask_indices)
+    out_values = gather(a.values, to_gather, name=name)
+    return ops.IndexedSlices(out_values, out_indices, a.dense_shape)
+
+
+def split(split_dim, num_split, value, name="split"):
+  """Splits a tensor into `num_split` tensors along one dimension.
+
+  Splits `value` along dimension `split_dim` into `num_split` smaller tensors.
+  Requires that `num_split` evenly divide `value.shape[split_dim]`.
+
+  For example:
+
+  ```python
+  # 'value' is a tensor with shape [5, 30]
+  # Split 'value' into 3 tensors along dimension 1
+  split0, split1, split2 = tf.split(1, 3, value)
+  tf.shape(split0) ==> [5, 10]
+  ```
+
+  Args:
+    split_dim: A 0-D `int32` `Tensor`. The dimension along which to split.
+      Must be in the range `[0, rank(value))`.
+    num_split: A 0-D `int32` `Tensor`. The number of ways to split.
+    value: The `Tensor` to split.
+    name: A name for the operation (optional).
+
+  Returns:
+    `num_split` `Tensor` objects resulting from splitting `value`.
+  """
+  return gen_array_ops._split(split_dim=split_dim,
+                              num_split=num_split,
+                              value=value,
+                              name=name)
+
+
+@ops.RegisterShape("Reverse")
+def _ReverseShape(op):
+  return [op.inputs[0].get_shape().with_rank_at_most(8)]
+
+
+def transpose(a, perm=None, name="transpose"):
+  """Transposes `a`. Permutes the dimensions according to `perm`.
+
+  The returned tensor's dimension i will correspond to the input dimension
+  `perm[i]`. If `perm` is not given, it is set to (n-1...0), where n is
+  the rank of the input tensor. Hence by default, this operation performs a
+  regular matrix transpose on 2-D input Tensors.
+
+  For example:
+
+  ```python
+  # 'x' is [[1 2 3]
+  #         [4 5 6]]
+  tf.transpose(x) ==> [[1 4]
+                       [2 5]
+                       [3 6]]
+
+  # Equivalently
+  tf.transpose(x perm=[0, 1]) ==> [[1 4]
+                                   [2 5]
+                                   [3 6]]
+
+  # 'perm' is more useful for n-dimensional tensors, for n > 2
+  # 'x' is   [[[1  2  3]
+  #            [4  5  6]]
+  #           [[7  8  9]
+  #            [10 11 12]]]
+  # Take the transpose of the matrices in dimension-0
+  tf.transpose(b, perm=[0, 2, 1]) ==> [[[1  4]
+                                        [2  5]
+                                        [3  6]]
+
+                                       [[7 10]
+                                        [8 11]
+                                        [9 12]]]
+  ```
+
+  Args:
+    a: A `Tensor`.
+    perm: A permutation of the dimensions of `a`.
+    name: A name for the operation (optional).
+
+  Returns:
+    A transposed `Tensor`.
+  """
+  with ops.op_scope([a], name, "transpose") as name:
+    if perm is None:
+      dims = gen_math_ops._range(0, gen_array_ops.rank(a), 1)
+      perm = gen_array_ops.reverse(dims, [True])
+      ret = gen_array_ops.transpose(a, perm, name=name)
+      # NOTE(mrry): Setting the shape explicitly because
+      #   reverse is not handled by the shape function.
+      input_shape = ret.op.inputs[0].get_shape().dims
+      if input_shape is not None:
+        ret.set_shape(input_shape[::-1])
+    else:
+      ret = gen_array_ops.transpose(a, perm, name=name)
+    return ret
+
+
+def zeros(shape, dtype=types.float32, name=None):
+  """Creates a tensor with all elements set to zero.
+
+  This operation returns a tensor of type `dtype` with shape `shape` and
+  all elements set to zero.
+
+  For example:
+
+  ```python
+  tf.zeros([3, 4], int32) ==> [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]
+  ```
+
+  Args:
+    shape: Either a list of integers, or a 1-D `Tensor` of type `int32`.
+    dtype: The type of an element in the resulting `Tensor`.
+    name: A name for the operation (optional).
+
+  Returns:
+    A `Tensor` with all elements set to zero.
+  """
+  with ops.op_scope([shape], name, "zeros") as name:
+    if isinstance(shape, list):
+      output = constant(0, shape=shape, dtype=dtype, name=name)
+    else:
+      shape = ops.convert_to_tensor(shape, name="shape")
+      output = fill(shape, constant(0, dtype=dtype), name=name)
+  assert output.dtype.base_dtype == types.as_dtype(dtype).base_dtype
+  return output
+
+
+def zeros_like(tensor, dtype=None, name=None):
+  """Creates a tensor with all elements set to zero.
+
+  Given a single tensor (`tensor`), this operation returns a tensor of the
+  same type and shape as `tensor` with all elements set to zero. Optionally,
+  you can use `dtype` to specify a new type for the returned tensor.
+
+  For example:
+
+  ```python
+  # 'tensor' is [[1, 2, 3], [4, 5, 6]]
+  tf.zeros_like(tensor) ==> [[0, 0, 0], [0, 0, 0]]
+  ```
+
+  Args:
+    tensor: A `Tensor`.
+    dtype: A type for the returned `Tensor`. Must be `float32`, `float64`,
+    `int8`, `int16`, `int32`, `int64`, `uint8`, or `complex64`.
+    name: A name for the operation (optional).
+
+  Returns:
+    A `Tensor` with all elements set to zero.
+  """
+  with ops.op_scope([tensor], name, "zeros_like") as name:
+    tensor = ops.convert_to_tensor(tensor, name="tensor")
+    zeros_shape = shape(tensor)
+    if dtype is None:
+      dtype = tensor.dtype
+    return zeros(zeros_shape, dtype=dtype, name=name)
+
+
+def ones_like(tensor, dtype=None, name=None):
+  """Creates a tensor with all elements set to 1.
+
+  Given a single tensor (`tensor`), this operation returns a tensor of the same
+  type and shape as `tensor` with all elements set to 1. Optionally, you can
+  specify a new type (`dtype`) for the returned tensor.
+
+  For example:
+
+  ```python
+  # 'tensor' is [[1, 2, 3], [4, 5, 6]]
+  tf.ones_like(tensor) ==> [[1, 1, 1], [1, 1, 1]]
+  ```
+
+  Args:
+    tensor: A `Tensor`.
+    dtype: A type for the returned `Tensor`. Must be `float32`, `float64`,
+    `int8`, `int16`, `int32`, `int64`, `uint8`, or `complex64`.
+    name: A name for the operation (optional).
+
+  Returns:
+    A `Tensor` with all elements set to 1.
+  """
+  with ops.op_scope([tensor], name, "ones_like") as name:
+    tensor = ops.convert_to_tensor(tensor, name="tensor")
+    ones_shape = shape(tensor)
+    if dtype is None:
+      dtype = tensor.dtype
+    return ones(ones_shape, dtype=dtype, name=name)
+
+
+def zeros_initializer(shape, dtype=types.float32):
+  """An adaptor for zeros() to match the Initializer spec."""
+  return zeros(shape, dtype)
+
+
+def ones(shape, dtype=types.float32, name=None):
+  """Creates a tensor with all elements set to 1.
+
+  This operation returns a tensor of type `dtype` with shape `shape` and all
+  elements set to 1.
+
+  For example:
+
+  ```python
+  tf.ones([2, 3], int32) ==> [[1, 1, 1], [1, 1, 1]]
+  ```
+
+  Args:
+    shape: Either a list of integers, or a 1-D `Tensor` of type `int32`.
+    dtype: The type of an element in the resulting `Tensor`.
+    name: A name for the operation (optional).
+
+  Returns:
+    A `Tensor` with all elements set to 1.
+  """
+  with ops.op_scope([shape], name, "ones") as name:
+    if isinstance(shape, list):
+      output = constant(1, shape=shape, dtype=dtype, name=name)
+    else:
+      shape = ops.convert_to_tensor(shape, name="shape")
+      output = fill(shape, constant(1, dtype=dtype), name=name)
+  assert output.dtype.base_dtype == types.as_dtype(dtype).base_dtype
+  return output
+
+
+def placeholder(dtype, shape=None, name=None):
+  """Inserts a placeholder for a tensor that will be always fed.
+
+  **Important**: This tensor will produce an error if evaluated. Its value must
+  be fed using the `feed_dict` optional argument to `Session.run()`,
+  `Tensor.eval()`, or `Operation.run()`.
+
+  For example:
+
+  ```python
+  x = tf.placeholder(float, shape=(1024, 1024))
+  y = tf.matmul(x, x)
+
+  with tf.Session() as sess:
+    print sess.run(y)  # ERROR: will fail because x was not fed.
+
+    rand_array = np.random.rand(1024, 1024)
+    print sess.run(y, feed_dict={x: rand_array})  # Will succeed.
+  ```
+
+  Args:
+    dtype: The type of elements in the tensor to be fed.
+    shape: The shape of the tensor to be fed (optional). If the shape is not
+      specified, you can feed a tensor of any shape.
+    name: A name for the operation (optional).
+
+  Returns:
+    A `Tensor` that may be used as a handle for feeding a value, but not
+    evaluated directly.
+  """
+  shape = tensor_shape.as_shape(shape)
+  if shape.is_fully_defined():
+    dim_list = shape.as_list()
+  else:
+    dim_list = []
+  ret = gen_array_ops._placeholder(
+      dtype=dtype,
+      shape=dim_list,
+      name=name)
+  ret.set_shape(shape)
+  return ret
+
+
+@ops.RegisterShape("Placeholder")
+def _PlaceholderShape(op):
+  given_shape = tensor_util.TensorShapeProtoToList(op.get_attr("shape"))
+  if given_shape:
+    return [tensor_shape.TensorShape(given_shape)]
+  else:
+    return [tensor_shape.unknown_shape()]
+
+
+@ops.RegisterShape("CheckNumerics")
+@ops.RegisterShape("Identity")
+@ops.RegisterShape("RefIdentity")
+@ops.RegisterShape("StopGradient")
+def _UnchangedShape(op):
+  return [op.inputs[0].get_shape()]
+
+
+@ops.RegisterShape("Rank")
+@ops.RegisterShape("Size")
+def _ScalarShape(unused_op):
+  return [tensor_shape.scalar()]
+
+
+@ops.RegisterShape("Slice")
+def _SliceShape(op):
+  """Shape function for array_ops.slice."""
+  input_shape = op.inputs[0].get_shape()
+  begin_shape = op.inputs[1].get_shape().with_rank_at_most(1)
+  sizes_shape = op.inputs[2].get_shape().with_rank_at_most(1)
+  rank_vector_shape = begin_shape.merge_with(sizes_shape)
+  ndims = rank_vector_shape.num_elements()
+  if ndims is not None:
+    input_shape.assert_has_rank(ndims)
+  begin_value = tensor_util.ConstantValue(op.inputs[1])
+  sizes_value = tensor_util.ConstantValue(op.inputs[2])
+  if sizes_value is not None:
+    returned_dims = []
+    for i, slice_size in enumerate(sizes_value.ravel()):
+      if slice_size != -1:
+        returned_dims.append(slice_size)
+      elif begin_value is not None:
+        returned_dims.append(input_shape[i] - begin_value[i])
+      else:
+        returned_dims.append(None)
+    return [tensor_shape.TensorShape(returned_dims)]
+  else:
+    if input_shape.ndims is not None:
+      return [tensor_shape.unknown_shape(ndims=input_shape.ndims)]
+    elif ndims is not None:
+      return [tensor_shape.unknown_shape(ndims=ndims)]
+    else:
+      return [tensor_shape.unknown_shape()]
+
+
+@ops.RegisterShape("Gather")
+def _GatherShape(op):
+  """Shape function for array_ops.gather."""
+  params_shape = op.inputs[0].get_shape()
+  indices_shape = op.inputs[1].get_shape()
+  return [indices_shape.concatenate(params_shape[1:])]
+
+
+@ops.RegisterShape("Unique")
+def _UniqueShape(op):
+  """Shape function for array_ops.Unique."""
+  # The output is a vector with data-dependent length.
+  input_shape = op.inputs[0].get_shape()
+  input_shape.assert_has_rank(1)
+  return [tensor_shape.vector(None), input_shape]
+
+
+@ops.RegisterShape("Diag")
+def _DiagShape(op):
+  """Shape function for array_ops.diag.
+
+  This op has one input (of rank k <= 3), and one output (of rank 2k),
+  where the shape of the output is the concatenation of the input
+  shape with itself.
+
+  Args:
+    op: A Diag Operation.
+
+  Returns:
+    A single-element list containing the shape of the output.
+  """
+  input_shape = op.inputs[0].get_shape().with_rank_at_most(3)
+  return [input_shape.concatenate(input_shape)]
+
+
+@ops.RegisterShape("ExpandDims")
+def _ExpandDimsShape(op):
+  """Determine shape for expand op's output tensor.
+
+  Args:
+    op: Operation for which to determine shape.
+        op.inputs[0] is the input tensor.
+        op.inputs[1] is the dimension in which to expand.
+  Returns:
+    Shape of op's output tensor.
+  Raises:
+    ValueError: If dim is outside of [-rank - 1, rank], where rank is the number
+        of dimensions in the input tensor.
+  """
+  input_shape = op.inputs[0].get_shape()
+  if input_shape.dims is None:
+    return [tensor_shape.unknown_shape()]
+  dim = tensor_util.ConstantValue(op.inputs[1])
+  input_ndims = input_shape.ndims
+  if dim < -input_ndims - 1 or dim > input_ndims:
+    raise ValueError(
+        "dim %d not in [%d, %d]." % (dim, -input_ndims, input_ndims))
+  if dim < 0:
+    dim += (input_ndims + 1)
+  result_shape = list(input_shape.dims)
+  result_shape.insert(dim, 1)
+  return [tensor_shape.TensorShape(result_shape)]
+
+
+@ops.RegisterShape("Squeeze")
+def _SqueezeShape(op):
+  """Determine shape for squeeze op's output tensor.
+
+  Args:
+    op: Operation for which to determine shape.
+  Returns:
+    Shape of op's output tensor.
+  Raises:
+    ValueError: if squeeze_dims includes a dimension outside of [-rank, rank),
+        where rank is the number of dimensions in the input tensor. Or, if
+        squeeze_dims includes a dimension for which input shape has a value
+        not equal to 1.
+  """
+  input_shape = op.inputs[0].get_shape()
+  if input_shape.dims is None:
+    return [tensor_shape.unknown_shape()]
+
+  squeeze_dims = op.get_attr("squeeze_dims") or []
+  wrapped_squeeze_dims = []
+  input_ndims = input_shape.ndims
+  for i, squeeze_dim in enumerate(squeeze_dims):
+    if squeeze_dim < -input_ndims or squeeze_dim >= input_ndims:
+      raise ValueError(
+          "squeeze_dims[%d]=%d not in [%d, %d)." % (
+              i, squeeze_dim, -input_ndims, input_ndims))
+    if squeeze_dim < 0:
+      squeeze_dim += input_ndims
+    wrapped_squeeze_dims.append(squeeze_dim)
+
+  result_shape = []
+  for i, dim in enumerate([d.value for d in input_shape.dims]):
+    is_explicit_match = i in wrapped_squeeze_dims
+    if is_explicit_match or not wrapped_squeeze_dims:
+      if dim is None:
+        return [tensor_shape.unknown_shape()]
+      if dim != 1:
+        if is_explicit_match:
+          raise ValueError(
+              "Can not squeeze dim[%d], expected a dimension of 1, got %d." % (
+                  i, dim))
+        result_shape.append(dim)
+    else:
+      result_shape.append(dim)
+  return [tensor_shape.TensorShape(result_shape)]
+
+
+@ops.RegisterShape("Reshape")
+def _ReshapeShape(op):
+  """Shape function for Reshape op."""
+  input_shape = op.inputs[0].get_shape()
+  new_shape_shape = op.inputs[1].get_shape().with_rank_at_most(1)
+  new_shape = tensor_util.ConstantValue(op.inputs[1])
+  if new_shape is None:
+    # Attempt to infer the rank of the output from the length of
+    # new_shape.
+    return [tensor_shape.unknown_shape(ndims=new_shape_shape.num_elements())]
+  new_shape = np.reshape(new_shape, -1).tolist()
+  if -1 not in new_shape:
+    # The new shape is fully defined.
+    return [tensor_shape.TensorShape(new_shape)]
+  elif input_shape.is_fully_defined():
+    # We know the input shape, so we can calculate the missing
+    # dimension in the new_shape.
+    num_elements = 1
+    for dim in input_shape.dims:
+      num_elements *= dim.value
+    known_elements = 1
+    unknown_index = None
+    for i, dim in enumerate(new_shape):
+      if dim == -1:
+        unknown_index = i
+      else:
+        known_elements *= dim
+    if known_elements == 0:
+      raise ValueError("cannot infer the missing input size for "
+                       "an empty tensor unless all specified "
+                       "input sizes are non-zero")
+    if num_elements % known_elements != 0:
+      raise ValueError("input has %s elements, which isn't divisible by %d" %
+                       (num_elements, known_elements))
+    new_shape[unknown_index] = num_elements / known_elements
+    return [tensor_shape.TensorShape(new_shape)]
+  else:
+    # We don't know the input shape, but we know n-1 of the dimensions
+    # in the new shape.
+    new_shape[new_shape.index(-1)] = None
+    return [tensor_shape.TensorShape(new_shape)]
+
+
+@ops.RegisterShape("BroadcastGradientArgs")
+def _BroadcastGradientArgsShape(op):
+  """Shape function for the BroadcastGradientArgs op."""
+  # TODO(mrry): Implement ConstantValue for BroadcastGradientArgs?
+  op.inputs[0].get_shape().assert_has_rank(1)
+  op.inputs[1].get_shape().assert_has_rank(1)
+  return [tensor_shape.vector(None), tensor_shape.vector(None)]
+
+
+@ops.RegisterShape("Fill")
+def _FillShape(op):
+  """Shape function for the Fill op.
+
+  This op takes a vector of dimensions and a scalar, and produces a
+  tensor with the given dimensions.
+
+  Args:
+    op: A Fill Operation.
+
+  Returns:
+    A single-element list containing the shape of the output.
+  """
+  dimensions_shape = op.inputs[0].get_shape().with_rank_at_most(1)
+  op.inputs[1].get_shape().assert_is_compatible_with(tensor_shape.scalar())
+  fill_dims = tensor_util.ConstantValue(op.inputs[0])
+  if fill_dims is None:
+    # Attempt to infer the rank of the output from the length of
+    # dimensions.
+    return [tensor_shape.unknown_shape(ndims=dimensions_shape.num_elements())]
+  else:
+    return [tensor_shape.TensorShape(fill_dims.tolist())]
+
+
+@ops.RegisterShape("InvertPermutation")
+def _InvertPermutationShape(op):
+  """Shape function for the InvertPermutation op."""
+  return [op.inputs[0].get_shape().with_rank(1)]
+
+
+@ops.RegisterShape("ListDiff")
+def _ListDiffShape(op):
+  """Shape function for the ListDiff op."""
+  op.inputs[0].get_shape().assert_has_rank(1)
+  op.inputs[1].get_shape().assert_has_rank(1)
+  # TODO(mrry): Indicate that the length falls within an interval?
+  return [tensor_shape.vector(None)] * 2
+
+
+@ops.RegisterShape("Pad")
+def _PadShape(op):
+  """Shape function for the Pad op.
+
+  This op has two inputs:
+
+  * input: A rank-N tensor.
+  * paddings: An N-by-2 matrix, in which the i^th row contains the
+    number of padding elements to add before and after `input` in the
+    i^th dimension.
+
+  It has one output, which has the same rank as input, and additional
+  elements according to the values in paddings.
+
+  Args:
+    op: A Pad Operation.
+
+  Returns:
+    A single-element list containing the shape of the output.
+
+  Raises:
+    ValueError: If the input shapes are incompatible.
+  """
+  paddings_shape = op.inputs[1].get_shape().with_rank(2)
+  input_shape = op.inputs[0].get_shape()
+  if input_shape.ndims == 0 and paddings_shape[0].value == 1:
+    # TODO(irving): Remove once !kAllowLegacyScalars.
+    input_shape = tensor_shape.TensorShape([1])
+  else:
+    input_shape = input_shape.with_rank(paddings_shape[0].value)
+  paddings_shape = paddings_shape.merge_with(
+      tensor_shape.matrix(input_shape.ndims, 2))
+  paddings = tensor_util.ConstantValue(op.inputs[1])
+  if paddings is None:
+    return [tensor_shape.unknown_shape(ndims=input_shape.ndims)]
+  else:
+    output_dims = []
+    for i, dim in enumerate(input_shape.dims):
+      if paddings[i, 0] < 0 or paddings[i, 1] < 0:
+        raise ValueError("paddings must be non-negative")
+      output_dims.append(dim + paddings[i, 0] + paddings[i, 1])
+    return [tensor_shape.TensorShape(output_dims)]
+
+
+@ops.RegisterShape("ReverseSequence")
+def _ReverseSequenceShape(op):
+  """Shape function for the ReverseSequence op.
+
+  This op has two inputs:
+
+  * input: A rank-N tensor with size B in the 0th dimension.
+  * seq_lens: A vector of length B.
+
+  It has one output, with the same size as input.
+
+  Args:
+    op: A ReverseSequence Operation.
+
+  Returns:
+    A single-element list containing the shape of the output.
+
+  Raises:
+    ValueError: If the input shapes are incompatible.
+  """
+  input_shape = op.inputs[0].get_shape()
+  seq_lens_shape = op.inputs[1].get_shape().with_rank(1)
+  batch_size = input_shape[0].merge_with(seq_lens_shape[0])
+  input_shape = tensor_shape.TensorShape([batch_size]).concatenate(
+      input_shape[1:])
+  seq_dim = op.get_attr("seq_dim")
+  if seq_dim >= input_shape.ndims:
+    raise ValueError("seq_dim must be < input.dims() (%d vs %d)" %
+                     (seq_dim, input_shape.ndims))
+  return [input_shape]
+
+
+@ops.RegisterShape("Shape")
+def _ShapeShape(op):
+  """Shape function for the Shape op."""
+  input_shape = op.inputs[0].get_shape()
+  return [tensor_shape.vector(input_shape.ndims)]
+
+
+@ops.RegisterShape("Transpose")
+def _TransposeShape(op):
+  """Shape function for the Transpose op.
+
+  This op takes two inputs:
+
+  * input: a rank-N tensor of arbitrary shape.
+  * shuffle: a length-N vector.
+
+  Its output is the rank-N tensor computed by permuting the dimensions
+  of input according to shuffle.
+
+  Args:
+    op: A Transpose op.
+
+  Returns:
+    A single-element list containing the shape of the output.
+
+  Raises:
+    ValueError: If the shapes of input and shuffle are incompatible.
+    IndexError: If shuffle contains an index that is >= the rank of input.
+  """
+  input_shape = op.inputs[0].get_shape()
+  transpose_shape = op.inputs[1].get_shape().merge_with(tensor_shape.vector(
+      input_shape.ndims))
+  transpose_vec = tensor_util.ConstantValue(op.inputs[1])
+  if transpose_vec is None:
+    return [tensor_shape.unknown_shape(ndims=transpose_shape[0].value)]
+  else:
+    return [tensor_shape.TensorShape([input_shape[i]
+                                      for i in transpose_vec.tolist()])]
+
+
+@ops.RegisterShape("Split")
+def _SplitShape(op):
+  """Shape function for the Split op."""
+  split_dim = tensor_util.ConstantValue(op.inputs[0])
+  num_split = len(op.outputs)
+  input_shape = op.inputs[1].get_shape()
+  if split_dim is None:
+    return [tensor_shape.unknown_shape(ndims=input_shape.ndims)] * num_split
+  else:
+    split_dim = int(split_dim)
+    input_shape = input_shape.with_rank_at_least(split_dim + 1)
+    if not (input_shape[split_dim] % num_split).is_compatible_with(0):
+      raise ValueError(
+          "Number of ways to split should evenly divide the split "
+          "dimension but got split_dim %d (size = %d) and num_split %d" %
+          (split_dim, input_shape[split_dim].value, num_split))
+    prefix = input_shape[:split_dim]
+    size_in_split_dim = input_shape[split_dim] / num_split
+    suffix = input_shape[split_dim + 1:]
+    output_shape = prefix.concatenate(size_in_split_dim).concatenate(suffix)
+    return [output_shape] * num_split
+
+
+@ops.RegisterShape("Tile")
+def _TileShape(op):
+  """Shape function for the Tile op.
+
+  This op has two inputs:
+
+  * input: A rank-N tensor.
+  * multiples: A length-N vector, in which the i^th element contains
+    the factor by which `input` will be tiled in the i^th dimension.
+
+  It has one output, which has the same rank as input, and additional
+  elements according to the values in multiples
+
+  Args:
+    op: A Tile Operation.
+
+  Returns:
+    A single-element list containing the shape of the output.
+  """
+  multiples_shape = op.inputs[1].get_shape().with_rank_at_most(1)
+  input_shape = op.inputs[0].get_shape().with_rank(multiples_shape.num_elements())
+  multiples = tensor_util.ConstantValue(op.inputs[1])
+  if multiples is None:
+    return [tensor_shape.unknown_shape(ndims=input_shape.ndims)]
+  else:
+    output_dims = []
+    multiples = multiples.ravel()
+    for i, dim in enumerate(input_shape.dims):
+      output_dims.append(dim * multiples[i])
+    return [tensor_shape.TensorShape(output_dims)]
+
+
+@ops.RegisterShape("TileGrad")
+def _TileGradShape(op):
+  """Shape function for the TileGrad op."""
+  multiples_shape = op.inputs[1].get_shape().with_rank_at_most(1)
+  input_shape = op.inputs[0].get_shape().with_rank(multiples_shape.num_elements())
+  multiples = tensor_util.ConstantValue(op.inputs[1])
+  if multiples is None:
+    return [tensor_shape.unknown_shape(ndims=input_shape.ndims)]
+  else:
+    output_dims = []
+    for i, dim in enumerate(input_shape.dims):
+      output_dims.append(dim / multiples[i])
+    return [tensor_shape.TensorShape(output_dims)]
+
+
+@ops.RegisterShape("Where")
+def _WhereShape(op):
+  """Shape function for the Where op."""
+  input_shape = op.inputs[0].get_shape()
+  return [tensor_shape.matrix(None, input_shape.ndims)]
+
+
+@ops.RegisterShape("ZerosLike")
+def _ZerosLikeShape(op):
+  """Shape function for the ZerosLike op."""
+  return [op.inputs[0].get_shape()]
+
+
+def edit_distance(hypothesis, truth, normalize=True, name="edit_distance"):
+  """Computes the Levenshtein distance between sequences.
+
+  This operation takes variable-length sequences (`hypothesis` and `truth`),
+  each provided as a `SparseTensor`, and computes the Levenshtein distance.
+  You can normalize the edit distance by length of `truth` by setting
+  `normalize` to true.
+
+  For example, given the following input:
+
+  ```python
+  # 'hypothesis' is a tensor of shape `[2, 1]` with variable-length values:
+  #   (0,0) = ["a"]
+  #   (1,0) = ["b"]
+  hypothesis = tf.SparseTensor(
+      [[0, 0, 0],
+       [1, 0, 0]],
+      ["a", "b"]
+      (2, 1, 1))
+
+  # 'truth' is a tensor of shape `[2, 2]` with variable-length values:
+  #   (0,0) = []
+  #   (0,1) = ["a"]
+  #   (1,0) = ["b", "c"]
+  #   (1,1) = ["a"]
+  truth = tf.SparseTensor(
+      [[0, 1, 0],
+       [1, 0, 0],
+       [1, 0, 1],
+       [1, 1, 0]]
+      ["a", "b", "c", "a"],
+      (2, 2, 2))
+
+  normalize = True
+  ```
+
+  This operation would return the following:
+
+  ```python
+  # 'output' is a tensor of shape `[2, 2]` with edit distances normalized
+  # by 'truth' lengths.
+  output ==> [[inf, 1.0],  # (0,0): no truth, (0,1): no hypothesis
+             [0.5, 1.0]]  # (1,0): addition, (1,1): no hypothesis
+  ```
+
+  Args:
+    hypothesis: A `SparseTensor` containing hypothesis sequences.
+    truth: A `SparseTensor` containing truth sequences.
+    normalize: A `bool`. If `True`, normalizes the Levenshtein distance by
+      length of `truth.`
+    name: A name for the operation (optional).
+
+  Returns:
+    A dense `Tensor` with rank `R - 1`, where R is the rank of the
+    `SparseTensor` inputs `hypothesis` and `truth`.
+
+  Raises:
+    TypeError: If either `hypothesis` or `truth` are not a `SparseTensor`.
+  """
+  if not isinstance(hypothesis, ops.SparseTensor):
+    raise TypeError("Hypothesis must be a SparseTensor")
+  if not isinstance(truth, ops.SparseTensor):
+    raise TypeError("Truth must be a SparseTensor")
+
+  return gen_array_ops._edit_distance(hypothesis.indices,
+                                      hypothesis.values,
+                                      hypothesis.shape,
+                                      truth.indices,
+                                      truth.values,
+                                      truth.shape,
+                                      normalize=normalize,
+                                      name=name)
+
+
+@ops.RegisterShape("EditDistance")
+def _EditDistanceShape(op):
+  """Shape function for the EditDistance op."""
+  hypothesis_shape = tensor_util.ConstantValue(op.inputs[2])
+  truth_shape = tensor_util.ConstantValue(op.inputs[5])
+  if hypothesis_shape is not None and truth_shape is not None:
+    if len(hypothesis_shape) != len(truth_shape):
+      raise ValueError(
+          "Inconsistent ranks in hypothesis and truth.  Saw shapes: %s and %s" %
+          (str(hypothesis_shape), str(truth_shape)))
+    return [tensor_shape.TensorShape(
+        [max(h, t) for h, t in zip(hypothesis_shape[:-1], truth_shape[:-1])])]
+
+  return [tensor_shape.unknown_shape()]
+
+
+# The remaining ops do not change the shape of their inputs.
+@ops.RegisterShape("Quantize")
+@ops.RegisterShape("Dequantize")
+def _QuantizeDequantizeShape(op):
+  unused_min_range = op.inputs[1].get_shape().merge_with(tensor_shape.scalar())
+  unused_max_range = op.inputs[2].get_shape().merge_with(tensor_shape.scalar())
+  return common_shapes.unchanged_shape(op)
diff --git a/tensorflow/python/ops/attention_ops.py b/tensorflow/python/ops/attention_ops.py
new file mode 100644
index 0000000000..4829bcd7cd
--- /dev/null
+++ b/tensorflow/python/ops/attention_ops.py
@@ -0,0 +1,34 @@
+"""Operations for implementing attention.
+"""
+import tensorflow.python.platform
+
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.ops import gen_attention_ops
+# pylint: disable=wildcard-import
+from tensorflow.python.ops.gen_attention_ops import *
+
+
+# TODO(bsteiner): Implement the gradient function for extract_glimpse
+ops.NoGradient("ExtractGlimpse")
+
+
+@ops.RegisterShape("ExtractGlimpse")
+def _ExtractGlimpseShape(op):
+  """Shape function for ExtractGlimpse op."""
+  input_shape = op.inputs[0].get_shape().with_rank(4)
+  unused_size_shape = op.inputs[1].get_shape().merge_with(
+      tensor_shape.vector(2))
+  offsets_shape = op.inputs[2].get_shape().merge_with(
+      input_shape[:1].concatenate([2]))
+  offsets_shape = offsets_shape
+  size_value = tensor_util.ConstantValue(op.inputs[1])
+  if size_value is not None:
+    height = size_value[0]
+    width = size_value[1]
+  else:
+    height = None
+    width = None
+  return [tensor_shape.TensorShape(
+      [input_shape[0], height, width, input_shape[3]])]
diff --git a/tensorflow/python/ops/candidate_sampling_ops.py b/tensorflow/python/ops/candidate_sampling_ops.py
new file mode 100644
index 0000000000..06857c0adc
--- /dev/null
+++ b/tensorflow/python/ops/candidate_sampling_ops.py
@@ -0,0 +1,365 @@
+"""Wrappers for primitive Neural Net (NN) Operations."""
+
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import random_seed
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_candidate_sampling_ops
+from tensorflow.python.ops import math_ops
+
+
+def uniform_candidate_sampler(true_classes, num_true, num_sampled, unique,
+                              range_max, seed=None, name=None):
+  """Samples a set of classes using a uniform base distribution.
+
+  This operation randomly samples a tensor of sampled classes
+  (`sampled_candidates`) from the range of integers `[0, range_max]`.
+
+  The elements of `sampled_candidates` are drawn without replacement
+  (if `unique=True`) or with replacement (if `unique=False`) from
+  the base distribution.
+
+  The base distribution for this operation is the uniform distribution
+  over the range of integers `[0, range_max]`.
+
+  In addition, this operation returns tensors `true_expected_count`
+  and `sampled_expected_count` representing the number of times each
+  of the target classes (`true_classes`) and the sampled
+  classes (`sampled_candidates`) is expected to occur in an average
+  tensor of sampled classes.  These values correspond to `Q(y|x)`
+  defined in [this
+  document](http://www.tensorflow.org/extras/candidate_sampling.pdf).
+  If `unique=True`, then these are post-rejection probabilities and we
+  compute them approximately.
+
+  Args:
+    true_classes: A `Tensor` of type `int64` and shape `[batch_size,
+      num_true]`. The target classes.
+    num_true: An `int`.  The number of target classes per training example.
+    num_sampled: An `int`.  The number of classes to randomly sample per batch.
+    unique: A `bool`. Determines whether all sampled classes in a batch are
+      unique.
+    range_max: An `int`. The number of possible classes.
+    seed: An `int`. An operation-specific seed. Default is 0.
+    name: A name for the operation (optional).
+
+  Returns:
+    sampled_candidates: A tensor of type `int64` and shape `[num_sampled]`.
+      The sampled classes.
+    true_expected_count: A tensor of type `float`.  Same shape as
+      `true_classes`. The expected counts under the sampling distribution
+      of each of `true_classes`.
+    sampled_expected_count: A tensor of type `float`. Same shape as
+      `sampled_candidates`. The expected counts under the sampling distribution
+      of each of `sampled_candidates`.
+  """
+  seed1, seed2 = random_seed.get_seed(seed)
+  return gen_candidate_sampling_ops._uniform_candidate_sampler(
+      true_classes, num_true, num_sampled, unique, range_max, seed=seed1,
+      seed2=seed2, name=name)
+
+
+def log_uniform_candidate_sampler(true_classes, num_true, num_sampled, unique,
+                                  range_max, seed=None, name=None):
+  """Samples a set of classes using a log-uniform (Zipfian) base distribution.
+
+  This operation randomly samples a tensor of sampled classes
+  (`sampled_candidates`) from the range of integers `[0, range_max]`.
+
+  The elements of `sampled_candidates` are drawn without replacement
+  (if `unique=True`) or with replacement (if `unique=False`) from
+  the base distribution.
+
+  The base distribution for this operation is an approximately log-uniform
+  or Zipfian distribution:
+
+  `P(class) = (log(class + 2) - log(class + 1)) / log(range_max + 1)`
+
+  This sampler is useful when the target classes approximately follow such
+  a distribution - for example, if the classes represent words in a lexicon
+  sorted in decreasing order of frequency. If your classes are not ordered by
+  decreasing frequency, do not use this op.
+
+  In addition, this operation returns tensors `true_expected_count`
+  and `sampled_expected_count` representing the number of times each
+  of the target classes (`true_classes`) and the sampled
+  classes (`sampled_candidates`) is expected to occur in an average
+  tensor of sampled classes.  These values correspond to `Q(y|x)`
+  defined in [this
+  document](http://www.tensorflow.org/extras/candidate_sampling.pdf).
+  If `unique=True`, then these are post-rejection probabilities and we
+  compute them approximately.
+
+  Args:
+    true_classes: A `Tensor` of type `int64` and shape `[batch_size,
+      num_true]`. The target classes.
+    num_true: An `int`.  The number of target classes per training example.
+    num_sampled: An `int`.  The number of classes to randomly sample per batch.
+    unique: A `bool`. Determines whether all sampled classes in a batch are
+      unique.
+    range_max: An `int`. The number of possible classes.
+    seed: An `int`. An operation-specific seed. Default is 0.
+    name: A name for the operation (optional).
+
+  Returns:
+    sampled_candidates: A tensor of type `int64` and shape `[num_sampled]`.
+      The sampled classes.
+    true_expected_count: A tensor of type `float`.  Same shape as
+      `true_classes`. The expected counts under the sampling distribution
+      of each of `true_classes`.
+    sampled_expected_count: A tensor of type `float`. Same shape as
+      `sampled_candidates`. The expected counts under the sampling distribution
+      of each of `sampled_candidates`.
+  """
+  seed1, seed2 = random_seed.get_seed(seed)
+  return gen_candidate_sampling_ops._log_uniform_candidate_sampler(
+      true_classes, num_true, num_sampled, unique, range_max, seed=seed1,
+      seed2=seed2, name=name)
+
+
+def learned_unigram_candidate_sampler(true_classes, num_true, num_sampled,
+                                      unique, range_max, seed=None, name=None):
+  """Samples a set of classes from a distribution learned during training.
+
+  This operation randomly samples a tensor of sampled classes
+  (`sampled_candidates`) from the range of integers `[0, range_max]`.
+
+  The elements of `sampled_candidates` are drawn without replacement
+  (if `unique=True`) or with replacement (if `unique=False`) from
+  the base distribution.
+
+  The base distribution for this operation is constructed on the fly
+  during training.  It is a unigram distribution over the target
+  classes seen so far during training.  Every integer in `[0, range_max]`
+  begins with a weight of 1, and is incremented by 1 each time it is
+  seen as a target class.  The base distribution is not saved to checkpoints,
+  so it is reset when the model is reloaded.
+
+  In addition, this operation returns tensors `true_expected_count`
+  and `sampled_expected_count` representing the number of times each
+  of the target classes (`true_classes`) and the sampled
+  classes (`sampled_candidates`) is expected to occur in an average
+  tensor of sampled classes.  These values correspond to `Q(y|x)`
+  defined in [this
+  document](http://www.tensorflow.org/extras/candidate_sampling.pdf).
+  If `unique=True`, then these are post-rejection probabilities and we
+  compute them approximately.
+
+  Args:
+    true_classes: A `Tensor` of type `int64` and shape `[batch_size,
+      num_true]`. The target classes.
+    num_true: An `int`.  The number of target classes per training example.
+    num_sampled: An `int`.  The number of classes to randomly sample per batch.
+    unique: A `bool`. Determines whether all sampled classes in a batch are
+      unique.
+    range_max: An `int`. The number of possible classes.
+    seed: An `int`. An operation-specific seed. Default is 0.
+    name: A name for the operation (optional).
+
+  Returns:
+    sampled_candidates: A tensor of type `int64` and shape `[num_sampled]`.
+      The sampled classes.
+    true_expected_count: A tensor of type `float`.  Same shape as
+      `true_classes`. The expected counts under the sampling distribution
+      of each of `true_classes`.
+    sampled_expected_count: A tensor of type `float`. Same shape as
+      `sampled_candidates`. The expected counts under the sampling distribution
+      of each of `sampled_candidates`.
+
+  """
+  seed1, seed2 = random_seed.get_seed(seed)
+  return gen_candidate_sampling_ops._learned_unigram_candidate_sampler(
+      true_classes, num_true, num_sampled, unique, range_max, seed=seed1,
+      seed2=seed2, name=name)
+
+
+def fixed_unigram_candidate_sampler(true_classes, num_true, num_sampled, unique,
+                                    range_max, vocab_file='', distortion=0.0,
+                                    num_reserved_ids=0, num_shards=1, shard=0,
+                                    unigrams=[], seed=None, name=None):
+  """Samples a set of classes using the provided (fixed) base distribution.
+
+  This operation randomly samples a tensor of sampled classes
+  (`sampled_candidates`) from the range of integers `[0, range_max]`.
+
+  The elements of `sampled_candidates` are drawn without replacement
+  (if `unique=True`) or with replacement (if `unique=False`) from
+  the base distribution.
+
+  The base distribution is read from a file or passed in as an
+  in-memory array. There is also an option to skew the distribution by
+  applying a distortion power to the weights.
+
+  In addition, this operation returns tensors `true_expected_count`
+  and `sampled_expected_count` representing the number of times each
+  of the target classes (`true_classes`) and the sampled
+  classes (`sampled_candidates`) is expected to occur in an average
+  tensor of sampled classes.  These values correspond to `Q(y|x)`
+  defined in [this
+  document](http://www.tensorflow.org/extras/candidate_sampling.pdf).
+  If `unique=True`, then these are post-rejection probabilities and we
+  compute them approximately.
+
+  Args:
+    true_classes: A `Tensor` of type `int64` and shape `[batch_size,
+      num_true]`. The target classes.
+    num_true: An `int`.  The number of target classes per training example.
+    num_sampled: An `int`.  The number of classes to randomly sample per batch.
+    unique: A `bool`. Determines whether all sampled classes in a batch are
+      unique.
+    range_max: An `int`. The number of possible classes.
+    vocab_file: Each valid line in this file (which should have a CSV-like
+      format) corresponds to a valid word ID. IDs are in sequential order,
+      starting from num_reserved_ids. The last entry in each line is expected
+      to be a value corresponding to the count or relative probability. Exactly
+      one of `vocab_file` and `unigrams` needs to be passed to this operation.
+    distortion: The distortion is used to skew the unigram probability
+      distribution.  Each weight is first raised to the distortion's power
+      before adding to the internal unigram distribution. As a result,
+      `distortion = 1.0` gives regular unigram sampling (as defined by the vocab
+      file), and `distortion = 0.0` gives a uniform distribution.
+    num_reserved_ids: Optionally some reserved IDs can be added in the range
+      `[0, num_reserved_ids]` by the users. One use case is that a special
+      unknown word token is used as ID 0. These IDs will have a sampling
+      probability of 0.
+    num_shards: A sampler can be used to sample from a subset of the original
+      range in order to speed up the whole computation through parallelism. This
+      parameter (together with `shard`) indicates the number of partitions that
+      are being used in the overall computation.
+    shard: A sampler can be used to sample from a subset of the original range
+      in order to speed up the whole computation through parallelism. This
+      parameter (together with `num_shards`) indicates the particular partition
+      number of the operation, when partitioning is being used.
+    unigrams: A list of unigram counts or probabilities, one per ID in
+      sequential order. Exactly one of `vocab_file` and `unigrams` should be
+      passed to this operation.
+    seed: An `int`. An operation-specific seed. Default is 0.
+    name: A name for the operation (optional).
+
+  Returns:
+    sampled_candidates: A tensor of type `int64` and shape `[num_sampled]`.
+      The sampled classes.
+    true_expected_count: A tensor of type `float`.  Same shape as
+      `true_classes`. The expected counts under the sampling distribution
+      of each of `true_classes`.
+    sampled_expected_count: A tensor of type `float`. Same shape as
+      `sampled_candidates`. The expected counts under the sampling distribution
+      of each of `sampled_candidates`.
+
+  """
+  seed1, seed2 = random_seed.get_seed(seed)
+  return gen_candidate_sampling_ops._fixed_unigram_candidate_sampler(
+      true_classes, num_true, num_sampled, unique, range_max,
+      vocab_file=vocab_file, distortion=distortion,
+      num_reserved_ids=num_reserved_ids, num_shards=num_shards, shard=shard,
+      unigrams=unigrams, seed=seed1, seed2=seed2, name=name)
+
+
+def all_candidate_sampler(true_classes, num_true, num_sampled, unique,
+                          seed=None, name=None):
+  """Generate the set of all classes.
+
+  Deterministically generates and returns the set of all possible classes.
+  For testing purposes.  There is no need to use this, since you might as
+  well use full softmax or full logistic regression.
+
+  Args:
+    true_classes: A `Tensor` of type `int64` and shape `[batch_size,
+      num_true]`. The target classes.
+    num_true: An `int`.  The number of target classes per training example.
+    num_sampled: An `int`.  The number of possible classes.
+    unique: A `bool`. Ignored.
+      unique.
+    seed: An `int`. An operation-specific seed. Default is 0.
+    name: A name for the operation (optional).
+
+  Returns:
+    sampled_candidates: A tensor of type `int64` and shape `[num_sampled]`.
+      This operation deterministically returns the entire range
+      `[0, num_sampled]`.
+    true_expected_count: A tensor of type `float`.  Same shape as
+      `true_classes`. The expected counts under the sampling distribution
+      of each of `true_classes`. All returned values are 1.0.
+    sampled_expected_count: A tensor of type `float`. Same shape as
+      `sampled_candidates`. The expected counts under the sampling distribution
+      of each of `sampled_candidates`. All returned values are 1.0.
+  """
+  seed1, seed2 = random_seed.get_seed(seed)
+  return gen_candidate_sampling_ops._all_candidate_sampler(
+      true_classes, num_true, num_sampled, unique, seed=seed1, seed2=seed2,
+      name=name)
+
+
+def compute_accidental_hits(true_classes, sampled_candidates, num_true,
+                            seed=None, name=None):
+  """Compute the ids of positions in sampled_candidates matching true_classes.
+
+  In Candidate Sampling, this operation facilitates virtually removing
+  sampled classes which happen to match target classes.  This is done
+  in Sampled Softmax and Sampled Logistic.
+
+  See our [Candidate Sampling Algorithms
+  Reference](http://www.tensorflow.org/extras/candidate_sampling.pdf).
+
+  We presuppose that the `sampled_candidates` are unique.
+
+  We call it an 'accidental hit' when one of the target classes
+  matches one of the sampled classes.  This operation reports
+  accidental hits as triples `(index, id, weight)`, where `index`
+  represents the row number in `true_classes`, `id` represents the
+  position in `sampled_candidates`, and weight is `-FLOAT_MAX`.
+
+  The result of this op should be passed through a `sparse_to_dense`
+  operation, then added to the logits of the sampled classes. This
+  removes the contradictory effect of accidentally sampling the true
+  target classes as noise classes for the same example.
+
+  Args:
+    true_classes: A `Tensor` of type `int64` and shape `[batch_size,
+      num_true]`. The target classes.
+    sampled_candidates: A tensor of type `int64` and shape `[num_sampled]`.
+      The sampled_candidates output of CandidateSampler.
+    num_true: An `int`.  The number of target classes per training example.
+    seed: An `int`. An operation-specific seed. Default is 0.
+    name: A name for the operation (optional).
+
+  Returns:
+    indices: A `Tensor` of type `int32` and shape `[num_accidental_hits]`.
+      Values indicate rows in `true_classes`.
+    ids: A `Tensor` of type `int64` and shape `[num_accidental_hits]`.
+      Values indicate positions in `sampled_candidates`.
+    weights: A `Tensor` of type `float` and shape `[num_accidental_hits]`.
+      Each value is `-FLOAT_MAX`.
+
+  """
+  seed1, seed2 = random_seed.get_seed(seed)
+  return gen_candidate_sampling_ops._compute_accidental_hits(
+      true_classes, sampled_candidates, num_true, seed=seed1, seed2=seed2,
+      name=name)
+
+
+@ops.RegisterShape("AllCandidateSampler")
+@ops.RegisterShape("FixedUnigramCandidateSampler")
+@ops.RegisterShape("LearnedUnigramCandidateSampler")
+@ops.RegisterShape("LogUniformCandidateSampler")
+@ops.RegisterShape("ThreadUnsafeUnigramCandidateSampler")
+@ops.RegisterShape("UniformCandidateSampler")
+def _CandidateSamplerShape(op):
+  true_classes_shape = op.inputs[0].get_shape().with_rank(2)
+  batch_size = true_classes_shape[0]
+  num_sampled = op.get_attr("num_sampled")
+  num_true = op.get_attr("num_true")
+  return [tensor_shape.vector(num_sampled),
+          tensor_shape.matrix(batch_size, num_true),
+          tensor_shape.vector(num_sampled)]
+
+
+@ops.RegisterShape("ComputeAccidentalHits")
+def _ComputeAccidentalHitsShape(op):
+  num_true = op.get_attr("num_true")
+  # Validate that the input shape matches the attrs, even though it
+  # does not influence the shape of the output.
+  true_candidates_shape = op.inputs[0].get_shape().merge_with(
+      tensor_shape.matrix(None, num_true))
+  output_shape = tensor_shape.vector(None)
+  return [output_shape] * 3
diff --git a/tensorflow/python/ops/clip_ops.py b/tensorflow/python/ops/clip_ops.py
new file mode 100644
index 0000000000..08781932f9
--- /dev/null
+++ b/tensorflow/python/ops/clip_ops.py
@@ -0,0 +1,234 @@
+"""Operations for clipping (gradient, weight) tensors to min/max values."""
+
+import collections
+
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import types
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import constant_op
+from tensorflow.python.ops import math_ops
+
+
+def clip_by_value(t, clip_value_min, clip_value_max,
+                  name=None):
+  """Clips tensor values to a specified min and max.
+
+  Given a tensor `t`, this operation returns a tensor of the same type and
+  shape as `t` with its values clipped to `clip_value_min` and `clip_value_max`.
+  Any values less than `clip_value_min` are set to `clip_value_min`. Any values
+  greater than `clip_value_max` are set to `clip_value_max`.
+
+  Args:
+    t: A `Tensor`.
+    clip_value_min: A 0-D (scalar) `Tensor`. The minimum value to clip by.
+    clip_value_max: A 0-D (scalar) `Tensor`. The maximum value to clip by.
+    name: A name for the operation (optional).
+
+  Returns:
+    A clipped `Tensor`.
+  """
+  with ops.op_scope([t, clip_value_min, clip_value_max], name,
+                   "clip_by_value") as name:
+    t = ops.convert_to_tensor(t, name="t")
+
+    # Go through list of tensors, for each value in each tensor clip
+    t_min = math_ops.minimum(
+        t, array_ops.fill(array_ops.shape(t), clip_value_max))
+    t_max = math_ops.maximum(
+        t_min, array_ops.fill(array_ops.shape(t), clip_value_min),
+        name=name)
+
+  return t_max
+
+
+def clip_by_norm(t, clip_norm, name=None):
+  """Clips tensor values to a maximum L2-norm.
+
+  Given a tensor `t`, and a maximum clip value `clip_norm`, this operation
+  normalizes `t` so that its L2-norm is less than or equal to `clip_norm'.
+  Specifically, if the L2-norm is already less than or equal to `clip_norm`,
+  then `t` is not modified. If the L2-norm is greater than `clip_norm`, then
+  this operation returns a tensor of the same type and shape as `t` with its
+  values set to:
+
+  `t * clip_norm / l2norm(t)`
+
+  In this case, the L2-norm of the output tensor is `clip_norm`.
+
+  This operation is typically used to clip gradients before applying them with
+  an optimizer.
+
+  Args:
+    t: A `Tensor`.
+    clip_norm: A 0-D (scalar) `Tensor` > 0. A maximum clipping value.
+    name: A name for the operation (optional).
+
+  Returns:
+    A clipped `Tensor`.
+  """
+  with ops.op_scope([t, clip_norm], name, "clip_by_norm") as name:
+    t = ops.convert_to_tensor(t, name="t")
+
+    # Calculate L2-norm, clip elements by ratio of clip_norm to L2-norm
+    l2norm_inv = math_ops.rsqrt(
+        math_ops.reduce_sum(t * t, math_ops.range(0, array_ops.rank(t))))
+    tclip = array_ops.identity(t * clip_norm * math_ops.minimum(
+        l2norm_inv, constant_op.constant(1.0 / clip_norm)), name=name)
+
+  return tclip
+
+def global_norm(t_list, name=None):
+  """Computes the global norm of multiple tensors.
+
+  Given a tuple or list of tensors `t_list`, this operation returns the
+  global norm of the elements in all tensors in `t_list`. The global norm is
+  computed as:
+
+  `global_norm = sqrt(sum([l2norm(t)**2 for t in t_list]))`
+
+  Any entries in `t_list` that are of type None are ignored.
+
+  Args:
+    t_list: A tuple or list of mixed `Tensors`, `IndexedSlices`, or None.
+    name: A name for the operation (optional).
+
+  Returns:
+    A 0-D (scalar) `Tensor` of type `float`.
+
+  Raises:
+    TypeError: If `t_list` is not a sequence.
+  """
+  if (not isinstance(t_list, collections.Sequence)
+      or isinstance(t_list, basestring)):
+    raise TypeError("t_list should be a sequence")
+  t_list = list(t_list)
+  with ops.op_scope(t_list, name, "global_norm") as name:
+    values = [
+        ops.convert_to_tensor(
+            t.values if isinstance(t, ops.IndexedSlices) else t,
+            name="t_%d" % i)
+        if t is not None else t
+        for i, t in enumerate(t_list)]
+    squared_norms = array_ops.pack(
+        [math_ops.reduce_sum(v * v) for v in values if v])
+
+    norm = math_ops.sqrt(
+        math_ops.reduce_sum(squared_norms), name="global_norm")
+
+  return norm
+
+def clip_by_global_norm(t_list, clip_norm, use_norm=None, name=None):
+  """Clips values of multiple tensors by the ratio of the sum of their norms.
+
+  Given a tuple or list of tensors `t_list`, and a clipping ratio `clip_norm`,
+  this operation returns a list of clipped tensors `list_clipped`
+  and the global norm (`global_norm`) of all tensors in `t_list`. Optionally,
+  if you've already computed the global norm for `t_list`, you can specify
+  the global norm with `use_norm`.
+
+  To perform the clipping, the values t_list[i] are set to:
+
+  `t_list[i] * clip_norm / max(global_norm, clip_norm)`
+
+  where:
+
+  `global_norm = sqrt(sum([l2norm(t)**2 for t in t_list]))`
+
+  If `clip_norm > global_norm` then the entries in `t_list` remain as they are,
+  otherwise they're all shrunk by the global ratio.
+
+  Any of the entries of `t_list` that are of type None are ignored.
+
+  This is the correct way to perform gradient clipping (for example, see
+  R. Pascanu, T. Mikolov, and Y. Bengio, "On the difficulty of training
+  Recurrent Neural Networks".  http://arxiv.org/abs/1211.5063)
+
+  However, it is slower than `clip_by_norm()` because all the parameters must be
+  ready before the clipping operation can be performed.
+
+  Args:
+    t_list: A tuple or list of mixed `Tensors`, `IndexedSlices`, or None.
+    clip_norm: A 0-D (scalar) `Tensor` > 0. The clipping ratio.
+    use_norm: A 0-D (scalar) `Tensor` of type `float` (optional). The global
+      norm to use. If not provided, `global_norm()` is used to compute the norm.
+    name: A name for the operation (optional).
+
+  Returns:
+    list_clipped: A list of `Tensors` of the same type as `list_t`.
+    global_norm: A 0-D (scalar) `Tensor` representing the global norm.
+
+  Raises:
+    TypeError: If `t_list` is not a sequence.
+  """
+  if (not isinstance(t_list, collections.Sequence)
+      or isinstance(t_list, basestring)):
+    raise TypeError("t_list should be a sequence")
+  t_list = list(t_list)
+  if use_norm is None:
+    use_norm = global_norm(t_list, name)
+
+  with ops.op_scope(t_list + [clip_norm], name, "clip_by_global_norm") as name:
+    # Calculate L2-norm, clip elements by ratio of clip_norm to L2-norm
+    scale = clip_norm * math_ops.minimum(
+        1.0 / use_norm, constant_op.constant(1.0 / clip_norm))
+
+    values = [
+        ops.convert_to_tensor(
+            t.values if isinstance(t, ops.IndexedSlices) else t,
+            name="t_%d" % i)
+        if t is not None else t
+        for i, t in enumerate(t_list)]
+
+    values_clipped = [
+        array_ops.identity(v * scale, name="%s_%d" % (name, i))
+        if v is not None else None
+        for i, v in enumerate(values)]
+
+    list_clipped = [
+        ops.IndexedSlices(c_v, t.indices)
+        if isinstance(t, ops.IndexedSlices)
+        else c_v
+        for (c_v, t) in zip(values_clipped, t_list)]
+
+  return list_clipped, use_norm
+
+
+def clip_by_average_norm(t, clip_norm, name=None):
+  """Clips tensor values to a maximum average L2-norm.
+
+  Given a tensor `t`, and a maximum clip value `clip_norm`, this operation
+  normalizes `t` so that its average L2-norm is less than or equal to
+  `clip_norm'. Specifically, if the average L2-norm is already less than or
+  equal to `clip_norm`, then `t` is not modified. If the average L2-norm is
+  greater than `clip_norm`, then this operation returns a tensor of the same
+  type and shape as `t` with its values set to:
+
+  `t * clip_norm / l2norm_avg(t)`
+
+  In this case, the average L2-norm of the output tensor is `clip_norm`.
+
+  This operation is typically used to clip gradients before applying them with
+  an optimizer.
+
+  Args:
+    t: A `Tensor`.
+    clip_norm: A 0-D (scalar) `Tensor` > 0. A maximum clipping value.
+    name: A name for the operation (optional).
+
+  Returns:
+    A clipped `Tensor`.
+  """
+  with ops.op_scope([t, clip_norm], name, "clip_by_average_norm") as name:
+    t = ops.convert_to_tensor(t, name="t")
+
+    # Calculate L2-norm per element, clip elements by ratio of clip_norm to
+    # L2-norm per element
+    n_element = math_ops.cast(array_ops.size(t), types.float32)
+    l2norm_inv = math_ops.rsqrt(
+        math_ops.reduce_sum(t * t, math_ops.range(0, array_ops.rank(t))))
+    tclip = array_ops.identity(
+        t * clip_norm * math_ops.minimum(
+            l2norm_inv * n_element, constant_op.constant(1.0 / clip_norm)),
+        name=name)
+
+  return tclip
diff --git a/tensorflow/python/ops/common_shapes.py b/tensorflow/python/ops/common_shapes.py
new file mode 100644
index 0000000000..c41d1ff71d
--- /dev/null
+++ b/tensorflow/python/ops/common_shapes.py
@@ -0,0 +1,371 @@
+"""A library of common shape functions."""
+import math
+
+from tensorflow.python.framework import tensor_shape
+
+
+def scalar_shape(unused_op):
+  """Shape function for ops that output a scalar value."""
+  return [tensor_shape.scalar()]
+
+
+def unchanged_shape(op):
+  """Shape function for ops that output an tensor like their first input."""
+  return [op.inputs[0].get_shape()]
+
+
+def unchanged_shape_with_rank(rank):
+  """Returns a shape function for ops that constrain the rank of their input.
+
+  Args:
+    rank: The exact rank of the input and output.
+
+  Returns:
+    A shape function for ops that output a tensor of the same size as their
+    input, with a particular rank.
+  """
+  def _ShapeFunction(op):
+    return [op.inputs[0].get_shape().with_rank(rank)]
+  return _ShapeFunction
+
+
+def unchanged_shape_with_rank_at_least(rank):
+  """Returns a shape function for ops that constrain the rank of their input.
+
+  Args:
+    rank: A lower bound on the rank of the input and output.
+
+  Returns:
+    A shape function for ops that output a tensor of the same size as their
+    input, with a particular rank.
+  """
+  def _ShapeFunction(op):
+    return [op.inputs[0].get_shape().with_rank_at_least(rank)]
+  return _ShapeFunction
+
+
+def unchanged_shape_with_rank_at_most(rank):
+  """Returns a shape function for ops that constrain the rank of their input.
+
+  Args:
+    rank: An upper bound on the rank of the input and output.
+
+  Returns:
+    A shape function for ops that output a tensor of the same size as their
+    input, with a particular rank.
+  """
+  def _ShapeFunction(op):
+    return [op.inputs[0].get_shape().with_rank_at_most(rank)]
+  return _ShapeFunction
+
+
+def matmul_shape(op):
+  """Shape function for a MatMul op."""
+  a_shape = op.inputs[0].get_shape().with_rank(2)
+  transpose_a = op.get_attr("transpose_a")
+  b_shape = op.inputs[1].get_shape().with_rank(2)
+  transpose_b = op.get_attr("transpose_b")
+  output_rows = a_shape[1] if transpose_a else a_shape[0]
+  output_cols = b_shape[0] if transpose_b else b_shape[1]
+  inner_a = a_shape[0] if transpose_a else a_shape[1]
+  inner_b = b_shape[1] if transpose_b else b_shape[0]
+  inner_a.assert_is_compatible_with(inner_b)
+  return [tensor_shape.TensorShape([output_rows, output_cols])]
+
+
+def bias_add_shape(op):
+  """Shape function for a BiasAdd op."""
+  input_shape = op.inputs[0].get_shape().with_rank_at_least(2)
+  bias_shape = op.inputs[1].get_shape().with_rank(1)
+  if input_shape.ndims is not None:
+    # Output has the same shape as input, and matches the length of
+    # bias in its last dimension.
+    output_shape = input_shape[0:-1].concatenate(
+        input_shape[-1].merge_with(bias_shape[0]))
+  else:
+    output_shape = tensor_shape.unknown_shape()
+  return [output_shape]
+
+
+def _Get2DOutputSize(input_height, input_width, filter_height, filter_width,
+                     row_stride, col_stride, padding_type):
+  """Returns the number of rows and columns in a convolution/pooling output."""
+  input_height = tensor_shape.as_dimension(input_height)
+  input_width = tensor_shape.as_dimension(input_width)
+  filter_height = tensor_shape.as_dimension(filter_height)
+  filter_width = tensor_shape.as_dimension(filter_width)
+  row_stride = int(row_stride)
+  col_stride = int(col_stride)
+
+  if filter_height.value == 1 and filter_width.value == 1 and (
+      row_stride == 1 and col_stride == 1):
+    return input_height, input_width
+  else:
+    if filter_height > input_height or filter_width > input_width:
+      raise ValueError("filter must not be larger than the input: ",
+                       "Filter: [", filter_height, "x", filter_width, "] ",
+                       "Input: [", input_height, "x", input_width, "] ")
+    if row_stride > filter_height or col_stride > filter_width:
+      raise ValueError("stride must be less than or equal to filter size",
+                       "stride: [", row_stride, "x", col_stride, "] ",
+                       "filter: [", filter_height, "x", filter_width, "] ")
+
+    # Compute number of rows in the output, based on the padding.
+    if input_height.value is None or filter_height.value is None:
+      out_rows = None
+    elif padding_type == "VALID":
+      out_rows = int(
+          math.ceil((input_height.value - filter_height.value + 1.0)
+                    / row_stride))
+    elif padding_type == "SAME":
+      out_rows = int(math.ceil(input_height.value * 1.0
+                               / row_stride))
+    else:
+      raise ValueError("Invalid value for padding: %r" % padding_type)
+
+    # Compute number of columns in the output, based on the padding.
+    if input_width.value is None or filter_width.value is None:
+      out_cols = None
+    elif padding_type == "VALID":
+      out_cols = int(
+          math.ceil((input_width.value - filter_width.value + 1.0)
+                    / col_stride))
+    elif padding_type == "SAME":
+      out_cols = int(math.ceil(input_width.value * 1.0 / col_stride))
+
+    return out_rows, out_cols
+
+
+def conv2d_shape(op):
+  """Shape function for a Conv2D op.
+
+  This op has two inputs:
+
+  * input, a 4D tensor with shape = [batch_size, rows, cols, depth_in]
+  * filter, a 4D tensor with shape =  [filter_rows, filter_cols,
+    depth_in, depth_out]
+
+  The output is a 4D tensor with shape = [batch_size, out_rows,
+  out_cols, depth_out], where out_rows and out_cols depend on the
+  value of the op's "padding" and "strides" attrs.
+
+  Args:
+    op: A Conv2D Operation.
+
+  Returns:
+    A list containing the Shape of the Conv2D output.
+
+  Raises:
+    ValueError: If the shapes of the input or filter are incompatible.
+  """
+  input_shape = op.inputs[0].get_shape().with_rank(4)
+  filter_shape = op.inputs[1].get_shape().with_rank(4)
+
+  batch_size = input_shape[0]
+  in_rows = input_shape[1]
+  in_cols = input_shape[2]
+
+  filter_rows = filter_shape[0]
+  filter_cols = filter_shape[1]
+  depth_out = filter_shape[3]
+  # Check that the input depths are compatible.
+  input_shape[3].assert_is_compatible_with(filter_shape[2])
+
+  stride_b, stride_r, stride_c, stride_d = op.get_attr("strides")
+  if stride_b != 1 or stride_d != 1:
+    raise ValueError("Current implementation does not yet support "
+                     "strides in the batch and depth dimensions.")
+  if stride_r != stride_c:
+    # TODO(shlens): Add support for this.
+    raise ValueError("Current implementation only supports equal length "
+                     "strides in the row and column dimensions.")
+
+  # TODO(mrry,shlens): Raise an error if the stride would cause
+  # information in the input to be ignored. This will require a change
+  # in the kernel implementation.
+  stride = stride_r
+  padding = op.get_attr("padding")
+  out_rows, out_cols = _Get2DOutputSize(
+      in_rows, in_cols, filter_rows, filter_cols, stride, stride, padding)
+
+  return [tensor_shape.TensorShape([batch_size, out_rows, out_cols, depth_out])]
+
+
+def separable_conv2d_shape(op):
+  """Shape function for a SeparableConv2D op.
+
+  This op has three inputs:
+
+  * input, a 4D tensor with shape = [batch_size, rows, cols, depth_in]
+
+  * depthwise_filter, a 4D tensor with shape = [filter_rows,
+    filter_cols, depth_in, depth_multiplier]
+
+  * pointwise_filter, a 4D tensor with shape = [1, 1, depth_in *
+    depth_multiplier, depth_out]
+
+  The output is a 4D tensor with shape = [batch_size, out_rows,
+  out_cols, depth_out], where out_rows and out_cols depend on the
+  value of the op's "padding" and "strides" attrs.
+
+  Args:
+    op: A SeparableConv2D Operation.
+
+  Returns:
+    A list containing the Shape of the SeparableConv2D output.
+
+  Raises:
+    ValueError: If the shapes of the input or filter are incompatible.
+  """
+  input_shape = op.inputs[0].get_shape().with_rank(4)
+  depthwise_filter_shape = op.inputs[1].get_shape().merge_with(
+      tensor_shape.TensorShape([None, None, input_shape[3], None]))
+  pointwise_depth_in = depthwise_filter_shape[2] * depthwise_filter_shape[3]
+
+  pointwise_filter_shape = op.inputs[2].get_shape().merge_with(
+      tensor_shape.TensorShape([1, 1, pointwise_depth_in, None]))
+
+  batch_size = input_shape[0]
+  in_rows = input_shape[1]
+  in_cols = input_shape[2]
+
+  filter_rows = depthwise_filter_shape[0]
+  filter_cols = depthwise_filter_shape[1]
+  depth_out = pointwise_filter_shape[3]
+
+  stride_b, stride_r, stride_c, stride_d = op.get_attr("strides")
+  if stride_b != 1 or stride_d != 1:
+    raise ValueError("Current implementation does not yet support "
+                     "strides in the batch and depth dimensions.")
+  if stride_r != stride_c:
+    # TODO(shlens): Add support for this.
+    raise ValueError("Current implementation only supports equal length "
+                     "strides in the row and column dimensions.")
+
+  # TODO(mrry,shlens): Raise an error if the stride would cause
+  # information in the input to be ignored. This will require a change
+  # in the kernel implementation.
+  stride = stride_r
+  padding = op.get_attr("padding")
+  out_rows, out_cols = _Get2DOutputSize(
+      in_rows, in_cols, filter_rows, filter_cols, stride, stride, padding)
+
+  return [tensor_shape.TensorShape([batch_size, out_rows, out_cols, depth_out])]
+
+
+def avg_pool_shape(op):
+  """Shape function for an AvgPool op.
+
+  This op has one input:
+
+  * input, a 4D tensor with shape = [batch_size, rows, cols, depth]
+
+  The output is a 4D tensor with shape = [batch_size, out_rows,
+  out_cols, depth_out], where out_rows and out_cols depend on the
+  value of the op's "ksize", "strides", and "padding" attrs.
+
+  Args:
+    op: An AvgPool Operation.
+
+  Returns:
+    A single-element list containing the Shape of the AvgPool output.
+
+  Raises:
+    ValueError: If the shape of the input is invalid or incompatible with
+      the values of the attrs.
+  """
+  input_shape = op.inputs[0].get_shape().with_rank(4)
+  ksize_b, ksize_r, ksize_c, ksize_d = op.get_attr("ksize")
+  stride_b, stride_r, stride_c, stride_d = op.get_attr("strides")
+
+  batch_size = input_shape[0]
+  in_rows = input_shape[1]
+  in_cols = input_shape[2]
+  depth = input_shape[3]
+
+  if ksize_b != 1 or ksize_d != 1:
+    raise ValueError("Current implementation does not support pooling "
+                     "in the batch and depth dimensions.")
+  if stride_b != 1 or stride_d != 1:
+    raise ValueError("Current implementation does not support strides "
+                     "in the batch and depth dimensions.")
+
+  # TODO(mrry,shlens): Raise an error if the stride would cause
+  # information in the input to be ignored. This will require a change
+  # in the kernel implementation.
+  padding = op.get_attr("padding")
+
+  out_rows, out_cols = _Get2DOutputSize(
+      in_rows, in_cols, ksize_r, ksize_c, stride_r, stride_c, padding)
+
+  return [tensor_shape.TensorShape([batch_size, out_rows, out_cols, depth])]
+
+
+def max_pool_shape(op):
+  """Shape function for a MaxPool op.
+
+  This op has one input:
+
+  * input, a 4D tensor with shape = [batch_size, rows, cols, depth_in]
+
+  The output is a 4D tensor with shape = [batch_size, out_rows,
+  out_cols, depth_out], where out_rows, out_cols, and depth_out depend
+  on the value of the op's "ksize", "strides", and "padding" attrs.
+
+  Args:
+    op: A MaxPool Operation.
+
+  Returns:
+    A single-element list containing the Shape of the MaxPool output.
+
+  Raises:
+    ValueError: If the shape of the input is invalid or incompatible with
+      the values of the attrs.
+  """
+  input_shape = op.inputs[0].get_shape().with_rank(4)
+  ksize_b, ksize_r, ksize_c, ksize_d = op.get_attr("ksize")
+  stride_b, stride_r, stride_c, stride_d = op.get_attr("strides")
+
+  batch_size = input_shape[0]
+  in_rows = input_shape[1]
+  in_cols = input_shape[2]
+  depth = input_shape[3]
+
+  if ksize_b != 1:
+    raise ValueError("Current implementation does not support pooling "
+                     "in the batch dimension.")
+  if stride_b != 1:
+    raise ValueError("Current implementation does not support strides "
+                     "in the batch dimension.")
+
+  if not ((ksize_r == 1 and ksize_c == 1) or ksize_d == 1):
+    raise ValueError("MaxPooling supports exactly one of pooling across depth "
+                     "or pooling across width/height.")
+
+  # TODO(mrry,shlens): Raise an error if the stride would cause
+  # information in the input to be ignored. This will require a change
+  # in the kernel implementation.
+  if ksize_d == 1:
+    padding = op.get_attr("padding")
+    out_rows, out_cols = _Get2DOutputSize(
+        in_rows, in_cols, ksize_r, ksize_c, stride_r, stride_c, padding)
+    return [tensor_shape.TensorShape([batch_size, out_rows, out_cols, depth])]
+  else:
+    if depth % ksize_d > 0:
+      raise ValueError("Depthwise max pooling requires the depth window "
+                       "to evenly divide the input depth.")
+    if stride_d != ksize_d:
+      raise ValueError("Depthwise max pooling requires the depth window "
+                       "to equal the depth stride.")
+    return [tensor_shape.TensorShape(
+        [batch_size, in_rows, in_cols, depth / ksize_d])]
+
+
+def no_outputs(unused_op):
+  """Shape function for use with ops that have no outputs."""
+  return []
+
+
+def unknown_shape(op):
+  """Shape function for use with ops whose output shapes are unknown."""
+  return [tensor_shape.unknown_shape() for _ in op.outputs]
diff --git a/tensorflow/python/ops/constant_op.py b/tensorflow/python/ops/constant_op.py
new file mode 100644
index 0000000000..7d9044b689
--- /dev/null
+++ b/tensorflow/python/ops/constant_op.py
@@ -0,0 +1,189 @@
+"""## Constant Value Tensors
+
+TensorFlow provides several operations that you can use to generate constants.
+
+@@zeros
+@@zeros_like
+
+@@ones
+@@ones_like
+
+@@fill
+
+@@constant
+
+## Sequences
+
+@@linspace
+
+@@range
+
+## Random Tensors
+
+TensorFlow has several ops that create random tensors with different
+distributions.  The random ops are stateful, and create new random values each
+time they are evaluated.
+
+The `seed` keyword argument in these functions acts in conjunction with
+the graph-level random seed. Changing either the graph-level seed using
+[`set_random_seed`](constant_op.md#set_random_seed) or the op-level seed
+will change the underlying seed of these operations. Setting neither graph-level
+nor op-level seed, results in a random seed for all operations.
+See [`set_random_seed`](constant_op.md#set_random_seed) for details on the
+interaction between operation-level and graph-level random seeds.
+
+### Examples:
+
+```python
+# Create a tensor of shape [2, 3] consisting of random normal values, with mean
+# -1 and standard deviation 4.
+norm = tf.random_normal([2, 3], mean=-1, stddev=4)
+
+# Shuffle the first dimension of a tensor
+c = tf.constant([[1, 2], [3, 4], [5, 6]])
+shuff = tf.random_shuffle(c)
+
+# Each time we run these ops, different results are generated
+sess = tf.Session()
+print sess.run(norm)
+print sess.run(norm)
+
+# Set an op-level seed to generate repeatable sequences across sessions.
+c = tf.constant([[1, 2], [3, 4], [5, 6]])
+sess = tf.Session()
+norm = tf.random_normal(c, seed=1234)
+print sess.run(norm)
+print sess.run(norm)
+```
+
+Another common use of random values is the intialization of variables. Also see
+the [Variables How To](../../how_tos/variables/index.md).
+
+```python
+# Use random uniform values in [0, 1) as the initializer for a variable of shape
+# [2, 3]. The default type is float32.
+var = tf.Variable(tf.random_uniform([2, 3]), name="var")
+init = tf.initialize_all_variables()
+
+sess = tf.Session()
+sess.run(init)
+print sess.run(var)
+```
+
+@@random_normal
+@@truncated_normal
+@@random_uniform
+@@random_shuffle
+@@set_random_seed
+
+"""
+"""Constant Operation.
+
+Has to be separate from array_ops to avoid a cyclic dependency.
+"""
+import tensorflow.python.platform
+import numpy as np
+
+from tensorflow.core.framework import attr_value_pb2
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.framework import types
+
+
+def constant(value, dtype=None, shape=None, name="Const"):
+  """Creates a constant tensor.
+
+   The resulting tensor is populated with values of type `dtype`, as
+   specified by arguments `value` and (optionally) `shape` (see examples
+   below).
+
+   The argument `value` can be a constant value, or a list of values of type
+   `dtype`. If `value` is a list, then the length of the list must be less
+   than or equal to the number of elements implied by the `shape` argument (if
+   specified). In the case where the list length is less than the number of
+   elements specified by `shape`, the last element in the list will be used
+   to fill the remaining entries.
+
+   The argument `shape` is optional. If present, it specifies the dimensions
+   of the resulting tensor. If not present, then the tensor is a scalar (0-D)
+   if `value` is a scalar, or 1-D otherwise.
+
+   If the argument `dtype` is not specified, then the type is inferred from
+   the type of `value`.
+
+   For example:
+
+   ```python
+   # Constant 1-D Tensor populated with value list.
+   tensor = tf.constant([1, 2, 3, 4, 5, 6, 7]) => [1 2 3 4 5 6 7]
+
+   # Constant 2-D tensor populated with scalar value -1.
+   tensor = tf.constant(-1.0, shape=[2, 3]) => [[-1. -1. -1.]
+                                                [-1. -1. -1.]]
+   ```
+
+  Args:
+    value:     A constant value (or list) of output type `dtype`.
+
+    dtype:     The type of the elements of the resulting tensor.
+
+    shape:     Optional dimensions of resulting tensor.
+
+    name:      Optional name for the tensor.
+
+  Returns:
+    A Constant Tensor.
+  """
+  g = ops.get_default_graph()
+  tensor_value = attr_value_pb2.AttrValue()
+  tensor_value.tensor.CopyFrom(
+      tensor_util.make_tensor_proto(value, dtype=dtype, shape=shape))
+  dtype_value = attr_value_pb2.AttrValue(type=tensor_value.tensor.dtype)
+  const_tensor = g.create_op(
+      "Const", [], [dtype_value.type],
+      attrs={"value": tensor_value, "dtype": dtype_value}, name=name).outputs[0]
+  return const_tensor
+
+
+@ops.RegisterShape("Const")
+def _ConstantShape(op):
+  return [tensor_shape.TensorShape(
+      [d.size for d in op.get_attr("value").tensor_shape.dim])]
+
+
+ops.register_tensor_conversion_function((list, tuple), constant, 100)
+ops.register_tensor_conversion_function(np.ndarray, constant, 100)
+ops.register_tensor_conversion_function(np.generic, constant, 100)
+ops.register_tensor_conversion_function(object, constant, 200)
+
+def _tensor_shape_tensor_conversion_function(s, dtype=None, name=None):
+  if not s.is_fully_defined():
+    raise ValueError(
+        "Cannot convert a partially known TensorShape to a Tensor: %s" % s)
+  if dtype is not None:
+    if dtype not in (types.int32, types.int64):
+      raise TypeError("Cannot convert a TensorShape to dtype: %s" % dtype)
+  else:
+    dtype = types.int32
+  if name is None:
+    name = "shape_as_tensor"
+  return constant(s.as_list(), dtype=dtype, name=name)
+
+ops.register_tensor_conversion_function(
+    tensor_shape.TensorShape, _tensor_shape_tensor_conversion_function, 100)
+
+def _dimension_tensor_conversion_function(d, dtype=None, name=None):
+  if d.value is None:
+    raise ValueError("Cannot convert an unknown Dimension to a Tensor: %s" % d)
+  if dtype is not None:
+    if dtype not in (types.int32, types.int64):
+      raise TypeError("Cannot convert a TensorShape to dtype: %s" % dtype)
+  else:
+    dtype = types.int32
+  if name is None:
+    name = "shape_as_tensor"
+  return constant(d.value, dtype=dtype, name=name)
+
+ops.register_tensor_conversion_function(
+    tensor_shape.Dimension, _dimension_tensor_conversion_function, 100)
diff --git a/tensorflow/python/ops/control_flow_grad.py b/tensorflow/python/ops/control_flow_grad.py
new file mode 100644
index 0000000000..3a1a5b91c0
--- /dev/null
+++ b/tensorflow/python/ops/control_flow_grad.py
@@ -0,0 +1,100 @@
+"""Gradients for operators defined in control_flow_ops.py."""
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import math_ops
+# pylint: disable=wildcard-import,undefined-variable
+from tensorflow.python.ops.control_flow_ops import *
+from tensorflow.python.ops.gen_control_flow_ops import *
+
+
+@ops.RegisterGradient("Switch")
+def _SwitchGrad(op, *grad):
+  op = GetRealOp(op)
+  ctxt = op._get_control_flow_context()  # pylint: disable=protected-access
+  if isinstance(ctxt, WhileContext):
+    merge_op = ctxt.switch_map.get(op)
+    if merge_op:
+      merge_op._update_input(1, grad[1])
+      return None, None
+    else:
+      merge_op = merge(grad, name="b_switch")[0]
+      ctxt.switch_map[op] = merge_op.op
+      return merge_op, None
+  elif isinstance(ctxt, CondContext):
+    good_grad = grad[ctxt.branch]
+    zero_grad = grad[1 - ctxt.branch]
+    zero_grad = switch(zero_grad, ctxt.pred, name="grad_0")[1 - ctxt.branch]
+    return merge([good_grad, zero_grad], name="switch_grad")[0], None
+  else:
+    false_grad = switch(grad[0], op.inputs[1])[0]
+    true_grad = switch(grad[1], op.inputs[1])[1]
+    return merge([false_grad, true_grad])[0], None
+
+
+@ops.RegisterGradient("RefSwitch")
+def _RefSwitchGrad(op, *grad):
+  return _SwitchGrad(op, *grad)
+
+
+@ops.RegisterGradient("Merge")
+def _MergeGrad(op, grad, _):
+  op = GetRealOp(op)
+  input_op = op.inputs[0].op
+  # pylint: disable=protected-access
+  ctxt = input_op._get_control_flow_context()
+  # pylint: enable=protected-access
+  if isinstance(ctxt, WhileContext):
+    grad_ctxt = ctxt.grad_context
+    return switch(grad, grad_ctxt.pivot)
+  elif isinstance(ctxt, CondContext):
+    return switch(grad, ctxt.pred, name="merge_grad")
+  else:
+    num_inputs = len(op.inputs)
+    cond = [math_ops.equal(op.outputs[1], i) for i in xrange(num_inputs)]
+    return [Switch(grad, cond[i])[1] for i in xrange(num_inputs)]
+
+
+@ops.RegisterGradient("Exit")
+def _ExitGrad(op, grad):
+  # pylint: disable=protected-access
+  forward_ctxt = op._get_control_flow_context()
+  # pylint: enable=protected-access
+  if not forward_ctxt.back_prop:
+    return None
+  grad_ctxt = forward_ctxt.grad_context
+  grad_ctxt.AddName(grad.name)
+  return enter(grad, grad_ctxt.name, is_constant=False,
+               parallel_iterations=forward_ctxt.parallel_iterations,
+               name="b_exit")
+
+
+@ops.RegisterGradient("NextIteration")
+def _NextIterationGrad(_, grad):
+  return next_iteration(grad)
+
+
+@ops.RegisterGradient("Enter")
+def _EnterGrad(op, grad):
+  op = GetRealOp(op)
+  # pylint: disable=protected-access
+  forward_ctxt = op._get_control_flow_context()
+  # pylint: enable=protected-access
+  grad_ctxt = forward_ctxt.grad_context
+  if grad_ctxt:
+    if op.get_attr("is_constant"):
+      # Add a gradient accumulator for every loop invariant.
+      result = grad_ctxt.AddBackPropAccumulateLoop(grad)
+    else:
+      result = exit(grad)
+    return result
+  else:
+    return grad
+
+
+@ops.RegisterGradient("RefEnter")
+def _RefEnterGrad(op, grad):
+  return _EnterGrad(op, grad)
+
+
+@ops.RegisterGradient("LoopCond")
+def _LoopCondGrad(_):
+  return None
diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py
new file mode 100644
index 0000000000..068e3b5553
--- /dev/null
+++ b/tensorflow/python/ops/control_flow_ops.py
@@ -0,0 +1,1561 @@
+"""## Control Flow Operations
+
+TensorFlow provides several operations and classes that you can use to control
+the execution of operations and add conditional dependencies to your graph.
+
+@@identity
+@@tuple
+@@group
+@@no_op
+@@count_up_to
+
+## Logical Operators
+
+TensorFlow provides several operations that you can use to add logical operators
+to your graph.
+
+@@logical_and
+@@logical_not
+@@logical_or
+@@logical_xor
+
+## Comparison Operators
+
+TensorFlow provides several operations that you can use to add comparison
+operators to your graph.
+
+@@equal
+@@not_equal
+@@less
+@@less_equal
+@@greater
+@@greater_equal
+@@select
+@@where
+
+## Debugging Operations
+
+TensorFlow provides several operations that you can use to validate values and
+debug your graph.
+
+@@is_finite
+@@is_inf
+@@is_nan
+@@verify_tensor_all_finite
+@@check_numerics
+@@add_check_numerics_ops
+@@Assert
+@@Print
+"""
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import types
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import common_shapes
+from tensorflow.python.ops import constant_op
+from tensorflow.python.ops import gen_control_flow_ops
+from tensorflow.python.ops import gen_array_ops
+from tensorflow.python.ops import logging_ops
+from tensorflow.python.ops import math_ops
+# pylint: disable=wildcard-import,undefined-variable
+from tensorflow.python.ops.gen_control_flow_ops import *
+
+
+# We override the 'tuple' for a control flow op, so we keep python's
+# existing 'tuple' for later use in this module.
+_basetuple = tuple
+
+
+# pylint: disable=protected-access
+def _Identity(data, name=None):
+  """Return a tensor with the same shape and contents as the input tensor.
+
+  Args:
+    data: A Tensor.
+    name: A name for this operation (optional).
+
+  Returns:
+    A Tensor with the same type and value as the input Tensor.
+  """
+  if not data.dtype.is_ref_dtype:
+    return array_ops.identity(data, name=name)
+  else:
+    return gen_array_ops._ref_identity(data, name=name)
+
+
+def _Enter(data, frame_name, is_constant=False, parallel_iterations=10,
+           name=None):
+  """Creates or finds a child frame, and makes 'data' available to it.
+
+  The unique `frame_name` is used by the `Executor` to identify frames. If
+  `is_constant` is true, `output` is a constant in the child frame; otherwise
+  it may be changed in the child frame. At most `parallel_iterations` iterations
+  are run in parallel in the child frame.
+
+  Args:
+    data: The tensor to be made available to the child frame.
+    frame_name: The name of the child frame.
+    is_constant: If true, the output is constant within the child frame.
+    parallel_iterations: The number of iterations allowed to run in parallel.
+    name: A name for this operation (optional).
+
+  Returns:
+    The same tensor as 'data'.
+  """
+  if not data.dtype.is_ref_dtype:
+    return enter(data, frame_name, is_constant, parallel_iterations,
+                 name=name)
+  else:
+    return ref_enter(data, frame_name, is_constant, parallel_iterations,
+                     name=name)
+
+
+def exit(data, name=None):
+  """Exits the current frame to its parent frame.
+
+  Exit makes its input `data` available to the parent frame.
+
+  Args:
+    data: The tensor to be made available to the parent frame.
+    name: A name for this operation (optional).
+
+  Returns:
+    The same tensor as `data`.
+  """
+  return gen_control_flow_ops._exit(data, name)
+
+
+def switch(data, pred, name=None):
+  """Forwards `data` to an output determined by `pred`.
+
+  If `pred` is true, the `data` input is forwared to the first output.
+  Otherwise, the data goes to the second output.
+
+  This op handles `Tensor`s and `IndexedSlices`.
+
+  Args:
+    data: The tensor to be forwarded to the appropriate output.
+    pred: A scalar that specifies which output port will receive data.
+    name: A name for this operation (optional).
+
+  Returns:
+    `(output_true, output_false)`: If `pred` is true, data will be forwarded to
+    `output_true`, otherwise it goes to `output_false`.
+  """
+  with ops.op_scope([data, pred], name, "Switch") as name:
+    data = ops.convert_to_tensor_or_indexed_slices(data, name="data")
+    pred = ops.convert_to_tensor(pred, name="pred")
+    if isinstance(data, ops.Tensor):
+      return gen_control_flow_ops._switch(data, pred, name=name)
+    else:
+      val, ind, dense_shape = data.values, data.indices, data.dense_shape
+      val_f, val_t = gen_control_flow_ops._switch(val, pred, name=name)
+      ind_f, ind_t = gen_control_flow_ops._switch(ind, pred, name="indices")
+      if dense_shape:
+        dense_shape_f, dense_shape_t = gen_control_flow_ops._switch(
+            dense_shape, pred, name="dense_shape")
+      else:
+        dense_shape_f, dense_shape_t = None, None
+      return (ops.IndexedSlices(val_f, ind_f, dense_shape_f),
+              ops.IndexedSlices(val_t, ind_t, dense_shape_t))
+
+
+def merge(inputs, name=None):
+  """Returns the value of an available element of `inputs`.
+
+  This op tests each of the tensors in `inputs` in turn to determine if any of
+  them is available. If it finds an available tensor, it returns it and its
+  index in `inputs`.
+
+  It is an error if more than one tensor in `inputs` is available. If no tensor
+  in `inputs` is available, the returned tensor and index are not set.
+
+  This op handles both `Tensor`s and `IndexedSlices`. If inputs has a mix of
+  `Tensor`s and `IndexedSlices`, all inputs are converted to IndexedSlices
+  before merging.
+
+  Args:
+    inputs: The input tensors, at most one of which is available.
+    name: A name for this operation (optional).
+
+  Returns:
+    A tuple containing the chosen input tensor and its index in `inputs`.
+
+  Raises:
+    ValueError: If inputs are IndexedSlices and some but not all have a
+      dense_shape property.
+  """
+  with ops.op_scope(inputs, name, "Merge") as name:
+    inputs = [ops.convert_to_tensor_or_indexed_slices(inp) for inp in inputs]
+    if all([isinstance(inp, ops.Tensor) for inp in inputs]):
+      return gen_control_flow_ops._merge(inputs, name=name)
+    else:
+      inputs = math_ops._as_indexed_slices_list(inputs)
+      values, _ = gen_control_flow_ops._merge([inp.values for inp in inputs],
+                                              name=name)
+      indices, chosen_index = gen_control_flow_ops._merge(
+          [inp.indices for inp in inputs], name="indices")
+      if any(inp.dense_shape for inp in inputs):
+        if not all(inp.dense_shape for inp in inputs):
+          raise ValueError("Either all merged IndexedSlices must have a "
+                           "dense_shape, or none must have a dense_shape.")
+        dense_shape, _ = gen_control_flow_ops._merge(
+            [inp.dense_shape for inp in inputs], name="dense_shape")
+      else:
+        dense_shape = None
+      return ops.IndexedSlices(values, indices, dense_shape), chosen_index
+
+
+def _SwitchRefOrTensor(data, pred, name="Switch"):
+  """Forwards `data` to an output determined by `pred`.
+
+  If `pred` is true, the `data` input is forwared to the first output.
+  Otherwise, the data goes to the second output.
+
+  This op handles `Tensor`s and `IndexedSlices`.
+
+  Args:
+    data: The tensor to be forwarded to the appropriate output.
+    pred: A scalar that specifies which output port will receive data.
+    name: A name for this operation (optional).
+
+  Returns:
+    `(output_false, output_false)`: If `pred` is true, data will be forwarded to
+    `output_true`, otherwise it goes to `output_false`.
+
+  Raises:
+    TypeError: if data is not a Tensor or IndexedSlices
+  """
+  data = ops.convert_to_tensor_or_indexed_slices(data, name="data")
+  if isinstance(data, ops.Tensor):
+    if not data.dtype.is_ref_dtype:
+      return switch(data, pred, name=name)
+    else:
+      return ref_switch(data, pred, name=name)
+  else:
+    return switch(data, pred, name=name)
+
+
+class ControlFlowOpInputs(object):
+  """An indirection to capture the input tensors needed in backprop."""
+
+  def __init__(self, op):
+    self._op = op
+    self._inputs = None
+
+  def __len__(self):
+    return len(self._op._inputs)
+
+  def __getitem__(self, index):
+    if self._inputs is None:
+      self._inputs = [None for _ in self._op.inputs]
+    if isinstance(index, int):
+      val = self._inputs[index]
+      if val is None:
+        f_val = self._op.inputs[index]
+        val = _GetRealValue(f_val)
+        self._inputs[index] = val
+      return val
+    elif isinstance(index, slice):
+      start, stop, step = index.indices(len(self))
+      vals = [self[i] for i in xrange(start, stop, step)]
+      return vals
+    else:
+      raise TypeError("index must be an integer or slice")
+
+
+class ControlFlowOpOutputs(object):
+  """An indirection to capture the output tensors needed in backprop."""
+
+  def __init__(self, op):
+    self._op = op
+    self._outputs = None
+
+  def __len__(self):
+    return len(self._op._outputs)
+
+  def __getitem__(self, index):
+    if self._outputs is None:
+      self._outputs = [None for _ in self._op.outputs]
+    if isinstance(index, int):
+      val = self._outputs[index]
+      if val is None:
+        f_val = self._op.outputs[index]
+        val = _GetRealValue(f_val)
+        self._outputs[index] = val
+      return val
+    elif isinstance(index, slice):
+      start, stop, step = index.indices(len(self))
+      vals = [self[i] for i in xrange(start, stop, step)]
+      return vals
+    else:
+      raise TypeError("index must be an integer or slice")
+
+
+class ControlFlowOpWrapper(object):
+  """A wrapper class for Operation."""
+
+  def __init__(self, op):
+    self._op = op
+    self._inputs = None
+    self._outputs = None
+
+  @property
+  def inputs(self):
+    if self._inputs is None:
+      self._inputs = ControlFlowOpInputs(self._op)
+    return self._inputs
+
+  @property
+  def outputs(self):
+    if self._outputs is None:
+      self._outputs = ControlFlowOpOutputs(self._op)
+    return self._outputs
+
+  @property
+  def op(self):
+    return self._op
+
+  @property
+  def name(self):
+    """Returns the name of this instance of op."""
+    return self._op.name
+
+  @property
+  def _id(self):
+    """Returns the unique id of this operation."""
+    return self._op._id
+
+  @property
+  def device(self):
+    """Returns the device of this operation.
+
+    Returns:
+      a string or None if the device was not set.
+    """
+    return self._op.device
+
+  @property
+  def output_types(self):
+    return self._op.output_types
+
+  @property
+  def input_types(self):
+    return self._op._input_types
+
+  @property
+  def type(self):
+    """Returns the type of the op."""
+    return self._op.type
+
+  @property
+  def graph(self):
+    """Returns the parent graph."""
+    return self._op.graph
+
+  def GetAttr(self, attr_name):
+    """Returns the value of attribute 'attr_name' of NodeDef."""
+    return self._op.get_attr(attr_name)
+
+  def _get_control_flow_context(self):
+    return self._op._get_control_flow_context()
+
+
+def GetRealOp(op):
+  while isinstance(op, ControlFlowOpWrapper):
+    op = op.op
+  return op
+
+
+def MakeWrapper(op):
+  """Make a wrapper for op if it is in a WhileContext."""
+  forward_ctxt = op._get_control_flow_context()
+  if forward_ctxt and isinstance(forward_ctxt, WhileContext):
+    return ControlFlowOpWrapper(op)
+  return op
+
+
+def EnterGradWhileContext(op):
+  """Enter the WhileContext for gradient computation."""
+  forward_ctxt = op._get_control_flow_context()
+  if forward_ctxt and isinstance(forward_ctxt, WhileContext):
+    grad_ctxt = forward_ctxt.CreateGradWhileContext()
+    grad_ctxt.Enter()
+
+
+def ExitGradWhileContext(op):
+  """Exit the WhileContext for gradient computation."""
+  forward_ctxt = op._get_control_flow_context()
+  if forward_ctxt and isinstance(forward_ctxt, WhileContext):
+    assert forward_ctxt.grad_context
+    forward_ctxt.grad_context.Exit()
+
+
+def _GetRealValue(value):
+  """Get the real value.
+
+  If backprop "uses" a value produced by forward inference, an
+  accumulator is added in the forward loop to accumulate its values,
+  so we use the accumulated value, indexed by the backprop counter.
+
+  Args:
+    value: A tensor to be captured.
+
+  Returns:
+    The same tensor value from the saved history.
+  """
+  real_value = value
+  forward_ctxt = value.op._get_control_flow_context()
+  real_value = forward_ctxt.history_map.get(value.name)
+  assert value.op.type != "Variable"
+  if real_value is None:
+    if value.op.type == "Enter" and value.op.get_attr("is_constant"):
+      # Use the input of this Enter node
+      real_value = GetRealOp(value.op).inputs[0]
+    else:
+      # Accumulate the history of this value.
+      # NOTE(yuanbyu): Don't accumulate for constants. One approach is
+      # to deepcopy the constants for the grad while context.
+      history_value = forward_ctxt.AddForwardAccumulateLoop(value)
+
+      # The shapes of the whole history and a single event element.
+      forward_ctxt.grad_context.Exit()
+      elem_rank = array_ops.rank(history_value) - 1
+      elem_rank_vec = array_ops.expand_dims(elem_rank, 0)
+      elem_shape = array_ops.slice(array_ops.shape(history_value), [1],
+                                   elem_rank_vec)
+      slice_shape = array_ops.concat(0, [[1], elem_shape])
+      forward_ctxt.grad_context.Enter()
+
+      # The begin position of the slice at slice_index.
+      slice_index = forward_ctxt.grad_context.index
+      b1 = array_ops.zeros(elem_rank_vec, dtype=types.int32)
+      b = array_ops.concat(0, [array_ops.expand_dims(slice_index, 0), b1])
+
+      # The slice at slice_index.
+      # TODO(irving): Replace with gather once that's GPU accelerated
+      real_value = array_ops.squeeze(
+          array_ops.slice(history_value,
+                          b,
+                          slice_shape,
+                          name="real"),
+          squeeze_dims=[0])
+  forward_ctxt.history_map[value.name] = real_value
+  return real_value
+
+
+def IsLoopSwitch(op):
+  """Returns true if `op` is the Switch for a While loop."""
+  if op.type == "Switch":
+    ctxt = op._get_control_flow_context()
+    return ctxt and isinstance(ctxt, WhileContext)
+  return False
+
+
+class ControlFlowContext(object):
+  """The base class for control flow context.
+
+  The usage pattern is a sequence of (Enter, Exit) followed by a final
+  ExitResult.
+  """
+
+  def AddName(self, name):
+    self._values.add(name)
+
+  # pylint: disable=protected-access
+  def Enter(self):
+    """Enter the current context."""
+    self._outer_context = ops.get_default_graph()._get_control_flow_context()
+    ops.get_default_graph()._set_control_flow_context(self)
+
+  def Exit(self):
+    """Exit the current context."""
+    ops.get_default_graph()._set_control_flow_context(self._outer_context)
+  # pylint: enable=protected-access
+
+  def ExitResult(self, result):
+    """Make a list of tensors available in the outer context."""
+    if self._outer_context is not None:
+      for x in result:
+        self._outer_context.AddName(x.name)
+
+  def GetWhileContext(self):
+    """Get the current while context."""
+    if self._outer_context is not None:
+      return self._outer_context.GetWhileContext()
+    return None
+
+  def AddToWhileContext(self, op):
+    """Add a control dependency to the containing WhileContext.
+
+    The added control dependency ensures that the outputs of this op
+    belong to the WhileContext.
+
+    Args:
+      op: An operation.
+    """
+    while_ctxt = self.GetWhileContext()
+    if while_ctxt is not None:
+      # pylint: disable=protected-access
+      op._add_control_input(while_ctxt.GetControlPivot().op)
+      # pylint: enable=protected-access
+
+
+class CondContext(ControlFlowContext):
+  """The context for the conditional construct."""
+
+  def __init__(self, pred, pivot, branch):
+    self._pred = pred
+    self._outer_context = None
+    self._pivot = pivot
+    self._branch = branch
+    self._values = set()
+    self._values.add(pred.name)
+    self._values.add(pivot.name)
+    self._external_values = {}
+
+  @property
+  def pred(self):
+    return self._pred
+
+  @property
+  def pivot(self):
+    return self._pivot
+
+  @property
+  def branch(self):
+    return self._branch
+
+  def AddValue(self, val):
+    """Add 'val' to the current context and its outer context recursively."""
+    result = val
+    if val.name not in self._values:
+      self._values.add(val.name)
+      if self._outer_context is not None:
+        result = self._outer_context.AddValue(val)
+      result = with_dependencies([self._pivot], result)
+      self._external_values[val.name] = result
+    return result
+
+  def AddOp(self, op):
+    """Add 'op' to the current context."""
+    if not op.inputs:
+      # Add this op to the enclosing while context
+      self.AddToWhileContext(op)
+      # pylint: disable=protected-access
+      op._add_control_input(self._pivot.op)
+      # pylint: enable=protected-access
+      for x in op.outputs:
+        self._values.add(x.name)
+    else:
+      for index in range(len(op.inputs)):
+        x = op.inputs[index]
+        if x.name not in self._values:
+          self._values.add(x.name)
+          # Add this value to the parent contexts up to the context that
+          # creates this value.
+          real_x = x
+          if self._outer_context is not None:
+            real_x = self._outer_context.AddValue(x)
+          real_x = _SwitchRefOrTensor(real_x, self._pred)[self._branch]
+          self._external_values[x.name] = real_x
+        x = self._external_values.get(x.name)
+        if x is not None:
+          op._update_input(index, x)
+      for x in op.outputs:
+        self._values.add(x.name)
+
+  def BuildCondBranch(self, fn):
+    """Add the subgraph defined by fn() to the graph."""
+    r = fn()
+    result = []
+    if r is not None:
+      if not isinstance(r, list) and not isinstance(r, _basetuple):
+        r = [r]
+      for v in r:
+        if isinstance(v, ops.Operation):
+          v = with_dependencies([v], self._pivot)
+        elif v.name not in self._values:
+          self._values.add(v.name)
+          if self._outer_context is not None:
+            v = self._outer_context.AddValue(v)
+          v = _SwitchRefOrTensor(v, self._pred)[self._branch]
+        else:
+          external_v = self._external_values.get(v.name)
+          if external_v is not None:
+            v = external_v
+        result.append(v)
+    return result
+
+
+def cond(pred, fn1, fn2, name=None):
+  """Return either 'fn1()' or 'fn2()' based on the boolean predicate 'pred'.
+
+  `fn1` and `fn2` both return lists of output tensors. `fn1` and `fn2` must have
+  the same number and type of outputs.
+
+  Args:
+    pred: A scalar determining whether to return the result of `fn1` or `fn2`.
+    fn1: The function to be performed if pred is true.
+    fn2: The function to be performed if pref is false.
+    name: Optional name prefix for the returned tensors.
+
+  Returns:
+    Tensors returned by the call to either `fn1` or `fn2`. If the functions
+    return a singleton list, the element is extracted from the list.
+
+  Raises:
+    TypeError: if `fn1` or `fn2` is not callable.
+    ValueError: if `fn1` and `fn2` do not return the same number of tensors, or
+                return tensors of different types.
+
+  Example:
+  ```python
+    x = constant(2)
+    y = constant(5)
+    def f1(): return constant(17)
+    def f2(): return constant(23)
+    r = cond(math_ops.less(x, y), f1, f2)
+    # r is set to f1()
+  ```
+  """
+  with ops.op_scope([pred], name, "Cond") as name:
+    if not callable(fn1):
+      raise TypeError("fn1 must be callable.")
+    if not callable(fn2):
+      raise TypeError("fn2 must be callable.")
+
+    # Add the Switch to the graph.
+    p_2, p_1 = switch(pred, pred)
+    pivot_1 = array_ops.identity(p_1, name="switch_t")
+    pivot_2 = array_ops.identity(p_2, name="switch_f")
+    pred = array_ops.identity(pred, name="pred_id")
+
+    # Build the graph for the true branch in a new context.
+    context_t = CondContext(pred, pivot_1, 1)
+    context_t.Enter()
+    res_t = context_t.BuildCondBranch(fn1)
+    context_t.ExitResult(res_t)
+    context_t.Exit()
+
+    # Build the graph for the false branch in a new context.
+    context_f = CondContext(pred, pivot_2, 0)
+    context_f.Enter()
+    res_f = context_f.BuildCondBranch(fn2)
+    context_t.ExitResult(res_f)
+    context_f.Exit()
+
+    # Add the final merge to the graph.
+    if len(res_t) != len(res_f):
+      raise ValueError("fn1 and fn2 must return the same number of tensors.")
+    for x, y in zip(res_f, res_t):
+      assert ((isinstance(x, ops.IndexedSlices) and
+               isinstance(y, ops.IndexedSlices)) or
+              (isinstance(x, ops.Tensor) and isinstance(y, ops.Tensor)))
+      val_x = x if isinstance(x, ops.Tensor) else x.values
+      val_y = y if isinstance(y, ops.Tensor) else y.values
+      if val_x.dtype.base_dtype != val_y.dtype.base_dtype:
+        raise ValueError("Outputs of fn1 and fn2 must have the same type: "
+                         "%s, %s" % (val_x.dtype.name, val_y.dtype.name))
+    merges = [merge([x[0], x[1]])[0] for x in zip(res_f, res_t)]
+    return merges[0] if len(merges) == 1 else merges
+
+
+# TODO(yuanbyu): We should probably separate the notion of context so it
+# could be used not only for conditionals and loops but also subgraphs.
+class WhileContext(ControlFlowContext):
+  """The context for the loop construct."""
+
+  def __init__(self, parallel_iterations, back_prop, name):
+    self._name = ops.get_default_graph().unique_name(name)
+    self._parallel_iterations = parallel_iterations
+    self._back_prop = back_prop
+    self._outer_context = None
+    # We use this node to control constants created by the pred lambda.
+    self._pivot_for_pred = None
+    # We use this node to control constants created by the body lambda.
+    self._pivot_for_body = None
+    # The boolean tensor for loop termination condition. Used in code
+    # generation for gradient computation
+    self._pivot = None
+
+    # The tensors for the counters added by AddForwardCounterLoop or
+    # AddBackPropCounterLoop
+    self._index = None
+
+    # Information needed by backprop
+    self._grad_context = None
+    self._total_iterations = None
+    self._history_map = {}
+    self._switch_map = {}
+
+    # values considered to have been already seen in this context
+    self._values = set()
+
+    # values referenced by but external to this context
+    self._external_values = {}
+
+  @property
+  def name(self):
+    return self._name
+
+  @property
+  def parallel_iterations(self):
+    """The number of iterations allowed to run in parallel."""
+    return self._parallel_iterations
+
+  @property
+  def back_prop(self):
+    """True iff backprop is enabled for this While loop."""
+    return self._back_prop
+
+  @property
+  def pivot(self):
+    """The boolean tensor representing the loop termination condition."""
+    return self._pivot
+
+  @property
+  def index(self):
+    """The loop index representing the current iteration."""
+    return self._index
+
+  @property
+  def grad_context(self):
+    """The corresponding WhileContext for gradient."""
+    return self._grad_context
+
+  @property
+  def history_map(self):
+    """The map that records all the tensors needed for backprop."""
+    return self._history_map
+
+  @property
+  def switch_map(self):
+    """The map that records all the Switch ops in the While loop."""
+    return self._switch_map
+
+  @property
+  def total_iterations(self):
+    """The total number of iterations of the while loop."""
+    return self._total_iterations
+
+  def GetWhileContext(self):
+    return self
+
+  def GetControlPivot(self):
+    if self._pivot_for_body:
+      return self._pivot_for_body
+    return self._pivot_for_pred
+
+  def AddValue(self, val):
+    """Add 'val' to the current context and its outer context recursively."""
+    result = val
+    if val.name not in self._values:
+      self._values.add(val.name)
+      if self._outer_context is not None:
+        result = self._outer_context.AddValue(val)
+      # Create an Enter that makes 'result' known to this context.
+      enter = _Enter(result, self._name, is_constant=True,
+                     parallel_iterations=self._parallel_iterations)
+      self._values.add(enter.name)
+      self._external_values[val.name] = enter
+      result = enter
+    else:
+      actual_val = self._external_values.get(val.name)
+      if actual_val is not None:
+        result = actual_val
+    return result
+
+  def AddOp(self, op):
+    """Adds 'op' to the current context."""
+    if not op.inputs:
+      if not op.control_inputs:
+        # Add a control edge from the control pivot to this op.
+        # pylint: disable=protected-access
+        op._add_control_input(self.GetControlPivot().op)
+        # pylint: enable=protected-access
+      else:
+        # Control edges must be in the same context.
+        for x in op.control_inputs:
+          assert x._get_control_flow_context() == self, (
+              "Control inputs must come from Operations in the same while "
+              "loop context (not an outer context).")
+      for x in op.outputs:
+        self._values.add(x.name)
+    else:
+      for index in range(len(op.inputs)):
+        x = op.inputs[index]
+        self.AddValue(x)
+        real_x = self._external_values.get(x.name)
+        if real_x is not None:
+          op._update_input(index, real_x)
+          # Add a control dependency to prevent loop invariants from
+          # enabling ops that should not be executed.
+          if real_x.op.type == "RefEnter" and real_x.op.get_attr("is_constant"):
+            # pylint: disable=protected-access
+            op._add_control_input(self.GetControlPivot().op)
+            # pylint: enable=protected-access
+      for x in op.outputs:
+        self._values.add(x.name)
+
+  def CreateGradWhileContext(self):
+    """Creates the WhileContext for backprop gradient computation."""
+    if self._grad_context is None:
+      cnt = self.AddForwardCounterLoop()
+      self._grad_context = WhileContext(self._parallel_iterations,
+                                        self._back_prop, self._name)
+      self._grad_context.AddBackPropCounterLoop(cnt)
+    return self._grad_context
+
+  def AddForwardCounterLoop(self):
+    """Adds a loop that counts the number of iterations.
+
+    This is added to the forward loop at the time when we start to
+    create the loop for backprop gradient computation.
+
+    The pseudocode is:
+      `n = 0; while (_pivot) { n++; }`
+
+    Returns:
+      The number of iterations taken by the forward loop.
+    """
+    n = constant_op.constant(0, name="f_count")
+    self.Enter()
+    self.AddName(n.name)
+    enter_n = _Enter(n, self._name, is_constant=False,
+                     parallel_iterations=self._parallel_iterations,
+                     name="f_count")
+    merge_n = merge([enter_n, enter_n])[0]
+    switch_n = switch(merge_n, self._pivot)
+    self._index = switch_n[1]
+
+    add_n = math_ops.add(self._index, 1)
+    next_n = next_iteration(add_n)
+    merge_n.op._update_input(1, next_n)
+
+    self._total_iterations = exit(switch_n[0], name="f_count")
+    self.Exit()
+    return self._total_iterations
+
+  def AddForwardAccumulateLoop(self, value):
+    """Add an accumulation loop for each value needed in backprop.
+
+    This is added to the forward loop at the first time when a value
+    in the forward loop is used by backprop gradient computation loop.
+
+    The pseudocode is:
+    ```
+      acc;
+      while (_pivot) {
+        if (index == 0) [value] else Concat(acc, [value]);
+      }
+    ```
+
+    Args:
+      value: The tensor that is accumulated.
+
+    Returns:
+      The accumulated history of value.
+
+    Raises:
+      ValueError: If the shape of "value" is not known statically.
+    """
+    if not value.get_shape().is_fully_defined():
+      raise ValueError("Must have known shape: %s" % value)
+    self._grad_context.Exit()
+    # TODO(irving): Now that acc starts out empty, most of the
+    # conditional logic can go away.
+    acc = constant_op.constant([],
+                               value.dtype,
+                               shape=[0] + value.get_shape().as_list(),
+                               name="f_acc")
+    self.Enter()
+    self.AddName(acc.name)
+    enter_acc = _Enter(acc, self._name, is_constant=False,
+                       parallel_iterations=self._parallel_iterations,
+                       name="f_acc")
+    merge_acc = merge([enter_acc, enter_acc])[0]
+    switch_acc = switch(merge_acc, self._pivot)
+
+    # If index = 0 then [value] else Concat(acc, [value]).
+    cond = math_ops.greater(self._index, 0)
+    switch_add_acc = switch(switch_acc[1], cond)
+    expand_value = array_ops.expand_dims(value, 0)
+    true_branch = array_ops.concat(0, [switch_add_acc[1], expand_value])
+    false_branch = array_ops.identity(switch_add_acc[0])
+    false_branch = with_dependencies([false_branch], expand_value)
+    add_acc = merge([false_branch, true_branch])[0]
+
+    next_acc = next_iteration(add_acc)
+    merge_acc.op._update_input(1, next_acc)
+
+    exit_acc = exit(switch_acc[0], name="f_acc")
+    self.Exit()
+    self._grad_context.Enter()
+    return exit_acc
+
+  def AddForwardAccumulateCondLoop(self, value):
+    """Add an accumulation loop for each conditional switch.
+
+    This is added to the forward loop at the first time when a conditional
+    switch in the forward loop is used by backprop gradient computation loop.
+
+    The pseudocode is:
+      ```
+      acc;
+      while (_pivot) {
+        Concat(acc, value);
+      }
+      ```
+
+    Args:
+      value: The boolean tensor that is accumulated.
+
+    Returns:
+      The accumulated history of value.
+    """
+    self._grad_context.Exit()
+    acc = constant_op.constant(False, name="f_acc")
+    self.Enter()
+    self.AddName(acc.name)
+    enter_acc = _Enter(acc, self._name, is_constant=False,
+                       parallel_iterations=self._parallel_iterations,
+                       name="f_acc")
+    merge_acc = merge([enter_acc, enter_acc])[0]
+    switch_acc = switch(merge_acc, self._pivot)
+    acc = array_ops.concat(0, [switch_add_acc[1], value])
+    next_acc = next_iteration(acc)
+    merge_acc.op._update_input(1, next_acc)
+
+    exit_acc = exit(switch_acc[0], name="f_acc")
+    self.Exit()
+    self._grad_context.Enter()
+    return exit_acc
+
+  def AddBackPropCounterLoop(self, count):
+    """Add the backprop loop that controls the iterations.
+
+    This is added to the backprop loop. It is used to control the loop
+    termination and the slice index.
+
+    The pseudocode is:
+      `n = count; while (n >= 1) { n--; }`
+
+    Args:
+      count: The number of iterations for backprop.
+
+    Returns:
+      always 0.
+    """
+    one = constant_op.constant(1, name="b_count")
+    self.Enter()
+    self.AddName(count.name)
+    enter_count = _Enter(count, self._name, is_constant=False,
+                         parallel_iterations=self._parallel_iterations,
+                         name="b_count")
+    merge_count = merge([enter_count, enter_count])[0]
+    self._pivot_for_pred = merge_count
+
+    cond = math_ops.greater_equal(merge_count, one)
+    self._pivot = loop_cond(cond, name="b_count")
+    switch_count = switch(merge_count, self._pivot)
+
+    # Add next_iteration right after Switch to match the gradient function.
+    next_count = next_iteration(switch_count[1])
+    self._pivot_for_body = next_count
+    self._index = math_ops.sub(next_count, one)
+    merge_count.op._update_input(1, self._index)
+
+    exit_count = exit(switch_count[0], name="b_count")
+    self.Exit()
+    return exit_count
+
+  def AddBackPropAccumulateLoop(self, value):
+    """Add an accumulation loop for every loop invariant.
+
+    This is added to the backprop loop. It is used to accumulate partial
+    gradients for each loop iteration. Called when in the while context
+    for gradient.
+
+    The pseudocode is:
+      ```
+      acc = 0;
+      while (_pivot) {
+        acc += value;
+      }
+      ```
+
+    Args:
+      value: The partial gradient of an iteration for a loop invariant.
+
+    Returns:
+      The gradient for a loop invariant.
+    """
+    self.Exit()
+    acc = constant_op.constant(0, value.dtype, name="b_acc")
+    self.Enter()
+    self.AddName(acc.name)
+    enter_acc = _Enter(acc, self._name, is_constant=False,
+                       parallel_iterations=self._parallel_iterations,
+                       name="b_acc")
+    merge_acc = merge([enter_acc, enter_acc], name="b_acc")[0]
+    switch_acc = switch(merge_acc, self._pivot)
+
+    next_acc = next_iteration(switch_acc[1])
+    add_acc = math_ops.add(next_acc, value)
+    merge_acc.op._update_input(1, add_acc)
+
+    exit_acc = exit(switch_acc[0], name="b_acc")
+    return exit_acc
+
+  def BuildLoop(self, pred, body, loop_vars):
+    """Add the loop termination condition and body to the graph."""
+
+    loop_vars = ops.convert_n_to_tensor_or_indexed_slices(loop_vars)
+    # Let the context know the loop variabes so the _Enter nodes below
+    # would be added into the context correctly.
+    self._values = set([x.name for x in loop_vars])
+    if self._outer_context is not None:
+      real_vars = [self._outer_context.AddValue(x) for x in loop_vars]
+    else:
+      real_vars = loop_vars
+    enter_vars = [_Enter(x, self._name, is_constant=False,
+                         parallel_iterations=self._parallel_iterations)
+                  for x in real_vars]
+    self._values = set([x.name for x in enter_vars])
+
+    merge_vars = [merge([x, x])[0] for x in enter_vars]
+    self._pivot_for_pred = merge_vars[0]
+
+    # Build the graph for pred.
+    c = ops.convert_to_tensor(pred(*merge_vars))
+    self._pivot = loop_cond(c, name="LoopCond")
+    switch_vars = [_SwitchRefOrTensor(x, self._pivot) for x in merge_vars]
+
+    # Build the graph for body.
+    vars_for_body = [_Identity(x[1]) for x in switch_vars]
+    self._pivot_for_body = vars_for_body[0]
+
+    body_result = body(*vars_for_body)
+    if not isinstance(body_result, (list, _basetuple)):
+      body_result = [body_result]
+    result = ops.convert_n_to_tensor_or_indexed_slices(body_result)
+    next_vars = [next_iteration(x) for x in result]
+
+    # Add the back edges to complete the loop.
+    assert len(merge_vars) == len(next_vars)
+    for x in zip(merge_vars, next_vars):
+      x[0].op._update_input(1, x[1])
+
+    # Add the exit ops.
+    exit_vars = [exit(x[0]) for x in switch_vars]
+
+    for m_var, n_var, e_var in zip(merge_vars, next_vars, exit_vars):
+      if m_var.get_shape().is_compatible_with(n_var.get_shape()):
+        e_var.set_shape(m_var.get_shape().merge_with(n_var.get_shape()))
+
+    # Exit the loop.
+    self.ExitResult(exit_vars)
+    self.Exit()
+    return exit_vars[0] if len(exit_vars) == 1 else exit_vars
+
+
+def While(cond, body, loop_vars, parallel_iterations=10, back_prop=True,
+          name=None):
+  """Repeat `body` while the condition `cond` is true.
+
+  `cond` is a function taking a list of tensors and returning a boolean scalar
+  tensor. `body` is a function taking a list of tensors and returning a list of
+  tensors of the same length and with the same types as the input. `loop_vars`
+  is a list of tensors that is passed to both `cond` and `body`.
+
+  While `cond` evaluates to true, `body` is executed.
+
+  Args:
+    cond: The termination condition of the loop.
+    body: A function that represents the loop body.
+    loop_vars: The list of variable input tensors.
+    parallel_iterations: The number of iterations allowed to run in parallel.
+    back_prop: Whether backprop is enabled for this while loop.
+    name: Optional name prefix for the returned tensors.
+
+  Returns:
+    The output tensors for the loop variables after the loop.
+
+  Raises:
+    TypeError: if `cond` or `body` is not callable.
+    ValueError: if `loop_var` is empty.
+
+  Example:
+    ```python
+    i =  Constant(0)
+    c = lambda i: math_ops.less(i, 10)
+    b = lambda i: math_ops.add(i, 1)
+    r = While(c, b, [i])
+    ```
+  """
+  with ops.op_scope(loop_vars, name, "While") as name:
+    if not loop_vars:
+      raise ValueError("No loop variables provided")
+    if not callable(cond):
+      raise TypeError("cond must be callable.")
+    if not callable(body):
+      raise TypeError("body must be callable.")
+
+    context = WhileContext(parallel_iterations, back_prop, name)
+    context.Enter()
+    return context.BuildLoop(cond, body, loop_vars)
+
+
+def _AsTensorList(x, p):
+  """Return x as a list of Tensors or IndexedSlices.
+
+  For entries of `x` that are Operations, this returns an Identity of `p`
+  with a dependency on the operation.
+
+  Args:
+    x: A Tensor/IndexedSlices/Operation or a list or tuple of them.
+    p: A Tensor to return for entries in `x` that are Operations.
+
+  Returns:
+    A list of Tensors or IndexedSlices.
+  """
+  if not isinstance(x, list) and not isinstance(x, _basetuple):
+    x = [x]
+
+  l = []
+  for v in x:
+    if isinstance(v, ops.Operation):
+      v = with_dependencies([v], p)
+    v = ops.convert_to_tensor_or_indexed_slices(v)
+    if isinstance(v, ops.Tensor):
+      l.append(array_ops.identity(v))
+    else:
+      l.append(ops.IndexedSlices(array_ops.identity(v.values),
+                                 array_ops.identity(v.indices)))
+  return l
+
+
+def _CheckResults(a, b):
+  assert len(a) == len(b), (
+      "Values returned by a() and b() must have the same length.")
+  for x, y in zip(a, b):
+    assert x.dtype == y.dtype, (
+        "Values returned by a() [%s] and b() [%s] must have "
+        "the same type: %s, %s." %
+        (x.name, y.name, x.dtype.name, y.dtype.name))
+
+
+def with_dependencies(dependencies, output_tensor, name=None):
+  """Produces the content of `output_tensor` only after `dependencies`.
+
+  In some cases, a user may want the output of an operation to be
+  consumed externally only after some other dependencies have run
+  first. This function ensures returns `output_tensor`, but only after all
+  operations in `dependencies` have run. Note that this means that there is
+  no guarantee that `output_tensor` will be evaluated after any `dependencies`
+  have run.
+
+  See also `tuple` and `group`.
+
+  Args:
+    dependencies: A list of operations to run before this op finishes.
+    output_tensor: A `Tensor` or `IndexedSlices` that will be returned.
+    name: (Optional) A name for this operation.
+
+  Returns:
+    Same as `output_tensor`.
+
+  Raises:
+    TypeError: if `output_tensor` is not a `Tensor` or `IndexedSlices`.
+  """
+  with ops.op_scope(dependencies + [output_tensor], name,
+                   "control_dependency") as name:
+    with ops.device(output_tensor.device
+                    or ops.get_default_graph().get_default_device()):
+      with ops.control_dependencies(dependencies):
+        output_tensor = ops.convert_to_tensor_or_indexed_slices(output_tensor)
+        if isinstance(output_tensor, ops.Tensor):
+          return _Identity(output_tensor, name=name)
+        else:
+          return ops.IndexedSlices(_Identity(output_tensor.values, name=name),
+                                   output_tensor.indices,
+                                   output_tensor.dense_shape)
+
+
+def _GroupControlDeps(dev, deps, name=None):
+  with ops.control_dependencies(deps):
+    if dev is None:
+      return no_op(name=name)
+    else:
+      with ops.device(dev):
+        return no_op(name=name)
+
+
+# TODO(mdevin): Accept "inputs" as a list.
+def group(*inputs, **kwargs):
+  """Create an op that groups multiple operations.
+
+  When this op finishes, all ops in `input` have finished. This op has no
+  output.
+
+  See also `tuple` and `with_dependencies`.
+
+  Args:
+    *inputs: One or more tensors to group.
+    **kwargs: Optional parameters to pass when constructing the NodeDef.
+    name: A name for this operation (optional).
+
+  Returns:
+    An Operation that executes all its inputs.
+
+  Raises:
+    ValueError: If an unknown keyword argument is provided, or if there are
+                no inputs.
+  """
+  name = kwargs.pop("name", None)
+  if kwargs:
+    raise ValueError("Unknown keyword arguments: " + ", ".join(kwargs.keys()))
+  if not inputs:
+    # TODO(mdevin): Would make sense to return a NoOp.
+    raise ValueError("No inputs provided")
+  with ops.op_scope(inputs, name, "group_deps") as name:
+    # Sorts *inputs according to their devices.
+    ops_on_device = {}  # device -> operations specified on the device.
+    for inp in inputs:
+      dev = inp.device
+      if dev in ops_on_device:
+        ops_on_device[dev].append(inp)
+      else:
+        ops_on_device[dev] = [inp]
+    if len(ops_on_device) == 1:
+      # 1-level tree. The root node is the returned NoOp node.
+      dev, deps = ops_on_device.items()[0]
+      return _GroupControlDeps(dev, deps, name=name)
+    # 2-level tree. The root node is the returned NoOp node.
+    # deps contains 1 NoOp node for each device.
+    deps = []
+    for dev in sorted(ops_on_device.iterkeys()):
+      deps.append(_GroupControlDeps(dev, ops_on_device[dev]))
+    return _GroupControlDeps(None, deps, name=name)
+
+def tuple(tensors, name=None, control_inputs=None):
+  """Group tensors together.
+
+  This creates a tuple of tensors with the same values as the `tensors`
+  argument, except that the value of each tensor is only returned after the
+  values of all tensors have been computed.
+
+  `control_inputs` contains additional ops that have to finish before this op
+  finishes, but whose outputs are not returned.
+
+  This can be used as a "join" mechanism for parallel computations: all the
+  argument tensors can be computed in parallel, but the values of any tensor
+  returned by `tuple` are only available after all the parallel computations
+  are done.
+
+  See also `group` and `with_dependencies`.
+
+  Args:
+    tensors: A list of `Tensor`s or `IndexedSlices`, some entries can be `None`.
+    name: (optional) A name to use as a `name_scope` for the operation.
+    control_inputs: List of additional ops to finish before returning.
+
+  Returns:
+    Same as `tensors`.
+
+  Raises:
+    ValueError: If `tensors` does not contain any `Tensor` or `IndexedSlices`.
+
+  """
+  with ops.op_scope(tensors, name, "tuple") as name:
+    gating_ops = [t.op for t in tensors if t]
+    if control_inputs:
+      gating_ops += control_inputs
+    # Note that in order to ensure ordering in the pbtxt, we must take care to
+    # ensure the order here.
+    gating_ops = sorted(set(gating_ops), key=lambda op: op._id)  # Uniquify ops.
+    if not gating_ops:
+      raise ValueError("Must have at least one Tensor: %s" % tensors)
+    gate = group(*gating_ops)
+    tpl = []
+    for t in tensors:
+      if t:
+        tpl.append(with_dependencies([gate], t))
+      else:
+        tpl.append(None)
+    return tpl
+
+
+# TODO(yuanbyu): It would be nicer if we could have the distributed list
+# support that Derek has been proposing.
+# TODO(yuanbyu, mrry): Handle stride to support sliding windows.
+def fold(fn, elems, elem_shape, name=None):
+  """The fold operator on slices of a tensor.
+
+  This fold operator applies the function `fn` to slices of `elems` on
+  dimension 0. The shape of the slices is specified by `elem_shape`. `elems`
+  must contain at least one slice (`shape(elems)[0] / elem_shape[0] > 0`).
+
+  Args:
+    fn: The function to be performed on each slice of the tensor.
+    elems: The tensor to whose slices we want to apply `fn`.
+    elem_shape: The shape definition for the slices.
+    name: Optional name prefix for the returned tensors.
+
+  Returns:
+    A tensor resulting from applying `fn` consecutively on each slice of
+    `elems`.
+
+  Raises:
+    TypeError: if `fn` is not callable.
+  """
+  with ops.op_scope([elems], name, "Fold") as name:
+    if not callable(fn):
+      raise TypeError("fn must be callable.")
+
+    s0 = array_ops.shape(elems)[0]
+    d0 = elem_shape[0]
+    n = math_ops.div(s0, d0)
+    b1 = array_ops.zeros(array_ops.expand_dims(array_ops.rank(elems) - 1, 0),
+                         dtype=types.int32)
+    # Initialize the output with slice 0
+    b = array_ops.concat(0, [[0], b1])
+    o = array_ops.slice(elems, b, elem_shape)
+    i = ops.convert_to_tensor(d0)
+
+    def Compute(i, o):
+      b = array_ops.concat(0, [array_ops.expand_dims(i, 0), b1])
+      x = array_ops.slice(elems, b, elem_shape)
+      o = fn(o, x)
+      i = math_ops.add(i, d0)
+      return [i, o]
+    r = While(lambda i, o: math_ops.less(i, n), Compute, [i, o])
+    return r[1]
+
+
+def case(pred_fn_pairs, default, exclusive=False, name="Case"):
+  """Create a Case operation.
+
+  The `pred_fn_pairs` parameter is a dict or list of pairs of size N.
+  Each pair contains a boolean scalar tensor and a python callable that
+  creates the tensors to be returned if the boolean evaluates to True. `default`
+  is a callable generating a list of tensors. All the callables in
+  `pred_fn_pairs` as well as `default` should return the same number and types
+  of tensors.
+
+  If `exclusive==True`, all predicates are evaluated, and a logging operation
+  with an error is returned if more than one of the predicates evaluates to
+  True. If `exclusive==False`, execution stops are the first predicate which
+  evaluates to True, and the tensors generated by the corresponding function
+  are returned immediately. If none of the predicates evaluate to True, this
+  operation returns the tensors generated by `default`.
+
+  Example 1:
+    Pseudocode:
+    ```
+      if (x < y) return 17;
+      else return 23;
+    ```
+
+    Expressions:
+    ```
+      f1 = lambda: Constant(17)
+      f2 = lambda: Constant(23)
+      r = Case([(math_ops.less(x, y), f1)], default=f2)
+    ```
+
+  Example 2:
+    Pseudocode:
+    ```
+      if (x < y && x > z) raise OpError("Only one predicate may evaluate true");
+      if (x < y) return 17;
+      else if (x > z) return 23;
+      else return -1;
+    ```
+
+    Expressions:
+    ```
+      def f1(): return Constant(17)
+      def f2(): return Constant(23)
+      def f3(): return Constant(-1)
+      r = Case({math_ops.less(x, y): f1, math_ops.greater(x, z): f2},
+               default=f3, exclusive=True)
+    ```
+
+  Args:
+    pred_fn_pairs: Dict or list of pairs of a boolean scalar tensor and a
+                   callable which returns a list of tensors.
+    default: A callable that returns a list of tensors.
+    exclusive: True iff more than one predicate is allowed to evaluate to True.
+    name: A name for this operation (optional).
+
+  Returns:
+    The tensors returned by the first pair whose predicate evaluated to True, or
+    those returned by `default` if none does.
+
+  Raises:
+    TypeError: If `pred_fn_pairs` is not a list/dictionary.
+    TypeError: If `pred_fn_pairs` is a list but does not contain 2-tuples.
+    TypeError: If `fns[i]` is not callable for any i, or `default` is not
+               callable.
+  """
+  pfp = pred_fn_pairs  # For readability
+  if not (isinstance(pfp, list) or isinstance(pfp, _basetuple)
+          or isinstance(pfp, dict)):
+    raise TypeError("fns must be a list, tuple, or dict")
+  if isinstance(pfp, dict):
+    pfp = pfp.items()
+    if not exclusive:
+      logging.warn("%s: Provided dictionary of predicate/fn pairs, but "
+                   "exclusive=False.  Order of conditional tests is "
+                   "not guaranteed." % name)
+  for tup in pfp:
+    if not isinstance(tup, _basetuple) or len(tup) != 2:
+      raise TypeError("Each entry in pred_fn_pairs must be a 2-tuple")
+    pred, fn = tup
+    if pred.dtype != types.bool:
+      raise TypeError("pred must be of type bool: %s", pred.name)
+    if not callable(fn):
+      raise TypeError("fn for pred %s must be callable." % pred.name)
+  if not callable(default):
+    raise TypeError("default must be callable.")
+
+  preds, fns = map(list, zip(*pfp))
+  with ops.op_scope([[f() for f in fns] + preds + [default()]], name, "Case"):
+    if not preds:
+      return default()
+    not_preds = []
+    for i, p in enumerate(preds):
+      with ops.name_scope("not_%d" % i):
+        not_preds.append(math_ops.logical_not(p))
+    and_not_preds = [constant_op.constant(True, name="and_not_true")]
+    for i, notp in enumerate(not_preds[:-1]):
+      with ops.name_scope("and_not_%d" % i):
+        and_not_preds.append(math_ops.logical_and(and_not_preds[-1], notp))
+
+    # preds = [p1, p2, p3]
+    # fns = [f1, f2, f3]
+    # not_preds = [~p1, ~p2, ~p3]
+    # case_preds = [p1 & True,
+    #               p2 & ~p1,
+    #               p3 & ~p1 & ~ p2]
+    case_preds = []
+    for i, (p, and_not_p_prev) in enumerate(zip(preds, and_not_preds)):
+      with ops.name_scope("case_%d" % i):
+        case_preds.append(math_ops.logical_and(p, and_not_p_prev))
+
+    # case_sequence = [Cond(p3 & ..., f3, default),
+    #                  Cond(p2 & ..., f2, lambda: case_sequence[0]),
+    #                  ...
+    #                  Cond(p1 & True, f1, lambda: case_sequence[i-1])]
+    # and prev_case_seq will loop from case_sequence[0] to case_sequence[-1]
+    if exclusive:
+      # TODO(ebrevdo): Add Where() for DT_BOOL, replace with Size(Where(preds))
+      preds_c = array_ops.concat(0, preds, name="preds_c")
+      num_true_conditions = math_ops.reduce_sum(
+          math_ops.cast(preds_c, types.int32), name="num_true_conds")
+      at_most_one_true_condition = math_ops.less(
+          num_true_conditions, constant_op.constant(2, name="two_true_conds"))
+
+      error_msg = [
+          ("More than one condition evaluated as True but "
+           "exclusive=True.  Conditions: (%s), Values:"
+           % ", ".join([p.name for p in preds])),
+          preds_c]
+      with ops.control_dependencies([
+          logging_ops.Assert(condition=at_most_one_true_condition,
+                             data=error_msg, summarize=len(preds))]):
+        prev_case_seq = default()
+        for i, (cp, fn) in enumerate(zip(case_preds, fns)[::-1]):
+          prev_case_seq = cond(cp, fn, lambda: prev_case_seq, name="If_%d" % i)
+    else:
+      prev_case_seq = default()
+      for i, (cp, fn) in enumerate(zip(case_preds, fns)[::-1]):
+        prev_case_seq = cond(cp, fn, lambda: prev_case_seq, name="If_%d" % i)
+
+    return prev_case_seq
+
+
+ops.RegisterShape("Enter")(common_shapes.unchanged_shape)
+ops.RegisterShape("Exit")(common_shapes.unknown_shape)
+ops.RegisterShape("NextIteration")(common_shapes.unchanged_shape)
+ops.RegisterShape("RefEnter")(common_shapes.unchanged_shape)
+ops.RegisterShape("ControlTrigger")(common_shapes.no_outputs)
+ops.RegisterShape("NoOp")(common_shapes.no_outputs)
+
+
+@ops.RegisterShape("LoopCond")
+def _LoopCondShape(op):
+  """Shape function for the LoopCond op."""
+  return [op.inputs[0].get_shape().merge_with(tensor_shape.scalar())]
+
+
+@ops.RegisterShape("Merge")
+def _MergeShape(op):
+  """Shape function for the Merge op.
+
+  The Merge op takes many inputs of arbitrary shapes, and produces a
+  first output that is one of those inputs, and a second scalar
+  output.
+
+  This function conservatively assumes that if any of its inputs is
+  not fully defined, the output shape is unknown. If all of the inputs
+  have the exact same known shape, the output must have that shape.
+
+  Args:
+    op: A Merge Operation.
+
+  Returns:
+    A single-element list containing the Shape of the Merge op.
+
+  """
+  first_input_shape = op.inputs[0].get_shape()
+  if first_input_shape.is_fully_defined():
+    for input_ in op.inputs[1:]:
+      input_shape = input_.get_shape()
+      if (not input_shape.is_fully_defined()
+          or not input_shape.is_compatible_with(first_input_shape)):
+        return [tensor_shape.unknown_shape(), tensor_shape.scalar()]
+    return [first_input_shape, tensor_shape.scalar()]
+  else:
+    return [tensor_shape.unknown_shape(), tensor_shape.scalar()]
+
+
+@ops.RegisterShape("RefSelect")
+def _RefSelectShape(op):
+  """Shape function for the RefSelect op.
+
+  The RefSelect takes one scalar input and N inputs of arbitrary
+  shapes, and produces one output, which is one of those N inputs.
+
+  This function conservatively assumes that if any of the N inputs is
+  not fully defined, the output shape is unknown. If all of the N
+  inputs have the exact same known shape, the output must have that
+  shape.
+
+  Args:
+    op: A RefSelect Operation.
+
+  Returns:
+    A single-element list containing the Shape of the RefSelect op.
+  """
+  unused_shape = op.inputs[0].get_shape().merge_with(tensor_shape.scalar())
+  first_input_shape = op.inputs[1].get_shape()
+  if first_input_shape.is_fully_defined():
+    for input_ in op.inputs[2:]:
+      input_shape = input_.get_shape()
+      if (not input_shape.is_fully_defined()
+          or not input_shape.is_compatible_with(first_input_shape)):
+        return [tensor_shape.unknown_shape()]
+    return [first_input_shape]
+  else:
+    return [tensor_shape.unknown_shape()]
+
+
+@ops.RegisterShape("RefSwitch")
+@ops.RegisterShape("Switch")
+def _SwitchShape(op):
+  input_shape = op.inputs[0].get_shape()
+  unused_pred_shape = op.inputs[1].get_shape().merge_with(tensor_shape.scalar())
+  return [input_shape] * 2
diff --git a/tensorflow/python/ops/control_flow_ops_test.py b/tensorflow/python/ops/control_flow_ops_test.py
new file mode 100644
index 0000000000..34b1ab0a25
--- /dev/null
+++ b/tensorflow/python/ops/control_flow_ops_test.py
@@ -0,0 +1,88 @@
+"""Tests for control_flow_ops.py."""
+import tensorflow.python.platform
+
+from tensorflow.core.framework import graph_pb2
+from tensorflow.python.framework import ops
+from tensorflow.python.framework.test_util import TensorFlowTestCase
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import standard_ops as tf
+from tensorflow.python.platform import googletest
+
+
+class GroupTestCase(TensorFlowTestCase):
+
+  def _StripNode(self, nd):
+    snode = graph_pb2.NodeDef(name=nd.name, op=nd.op, input=nd.input)
+    if nd.device:
+      snode.device = nd.device
+    return snode
+
+  def _StripGraph(self, gd):
+    """Copy gd keeping only, node.name, node.op, node.input, and node.device."""
+    return graph_pb2.GraphDef(node=[self._StripNode(nd) for nd in gd.node])
+
+  def testGroup_NoDevices(self):
+    with ops.Graph().as_default() as g:
+      a = tf.constant(0, name="a")
+      b = tf.constant(0, name="b")
+      c = tf.constant(0, name="c")
+      tf.group(a.op, b.op, c.op, name="root")
+    gd = g.as_graph_def()
+    self.assertProtoEquals("""
+      node { name: "a" op: "Const"}
+      node { name: "b" op: "Const"}
+      node { name: "c" op: "Const"}
+      node { name: "root" op: "NoOp" input: "^a" input: "^b" input: "^c" }
+    """, self._StripGraph(gd))
+
+  def testGroup_OneDevice(self):
+    with ops.Graph().as_default() as g:
+      with g.device("/task:0"):
+        a = tf.constant(0, name="a")
+        b = tf.constant(0, name="b")
+      tf.group(a.op, b.op, name="root")
+    gd = g.as_graph_def()
+    self.assertProtoEquals("""
+      node { name: "a" op: "Const" device: "/task:0" }
+      node { name: "b" op: "Const" device: "/task:0" }
+      node { name: "root" op: "NoOp" input: "^a" input: "^b" device: "/task:0" }
+    """, self._StripGraph(gd))
+
+  def testGroup_MultiDevice(self):
+    with ops.Graph().as_default() as g:
+      with g.device("/task:0"):
+        a = tf.constant(0, name="a")
+        b = tf.constant(0, name="b")
+      with g.device("/task:1"):
+        c = tf.constant(0, name="c")
+        d = tf.constant(0, name="d")
+      with g.device("/task:2"):
+        tf.group(a.op, b.op, c.op, d.op, name="root")
+    gd = g.as_graph_def()
+    self.assertProtoEquals("""
+      node { name: "a" op: "Const" device: "/task:0"}
+      node { name: "b" op: "Const" device: "/task:0"}
+      node { name: "c" op: "Const" device: "/task:1"}
+      node { name: "d" op: "Const" device: "/task:1"}
+      node { name: "root/NoOp" op: "NoOp" input: "^a" input: "^b"
+             device: "/task:0" }
+      node { name: "root/NoOp_1" op: "NoOp" input: "^c" input: "^d"
+             device: "/task:1" }
+      node { name: "root" op: "NoOp" input: "^root/NoOp" input: "^root/NoOp_1"
+             device: "/task:2" }
+    """, self._StripGraph(gd))
+
+
+class ShapeTestCase(TensorFlowTestCase):
+
+  def testShape(self):
+    with ops.Graph().as_default():
+      tensor = tf.constant([1.0, 2.0])
+      self.assertEquals([2], tensor.get_shape())
+      self.assertEquals([2],
+                        control_flow_ops.with_dependencies(
+                            [tf.constant(1.0)], tensor).get_shape())
+
+
+if __name__ == "__main__":
+  googletest.main()
diff --git a/tensorflow/python/ops/data_flow_grad.py b/tensorflow/python/ops/data_flow_grad.py
new file mode 100644
index 0000000000..d2473490ce
--- /dev/null
+++ b/tensorflow/python/ops/data_flow_grad.py
@@ -0,0 +1,37 @@
+"""Gradients for operators defined in data_flow_ops.py."""
+
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import types
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import constant_op
+from tensorflow.python.ops import data_flow_ops
+from tensorflow.python.ops import gen_data_flow_ops
+from tensorflow.python.ops import math_ops
+
+
+@ops.RegisterGradient("DynamicStitch")
+def _DynamicStitchGrads(op, grad):
+  """Gradients for DynamicStitch."""
+
+  num_values = len(op.inputs) / 2
+  indices_grad = [None] * num_values
+
+  def AsInt32(x):
+    return (x if op.inputs[0].dtype == types.int32 else
+            math_ops.cast(x, types.int32))
+  inputs = [AsInt32(op.inputs[i]) for i in range(num_values)]
+  if isinstance(grad, ops.IndexedSlices):
+    output_shape = array_ops.shape(op.outputs[0])
+    output_rows = output_shape[0]
+    grad = math_ops.unsorted_segment_sum(grad.values, grad.indices, output_rows)
+  values_grad = [array_ops.gather(grad, inp) for inp in inputs]
+  return indices_grad + values_grad
+
+
+ops.NoGradient("Queue")
+ops.NoGradient("QueueEnqueue")
+ops.NoGradient("QueueEnqueueMany")
+ops.NoGradient("QueueDequeue")
+ops.NoGradient("QueueDequeueMany")
+ops.NoGradient("QueueClose")
+ops.NoGradient("QueueSize")
diff --git a/tensorflow/python/ops/data_flow_ops.py b/tensorflow/python/ops/data_flow_ops.py
new file mode 100644
index 0000000000..5c8ab66297
--- /dev/null
+++ b/tensorflow/python/ops/data_flow_ops.py
@@ -0,0 +1,680 @@
+"""Data Flow Operations."""
+# pylint: disable=g-bad-name
+import re
+
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import random_seed
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.framework import types
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import common_shapes
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import gen_data_flow_ops
+# pylint: disable=wildcard-import
+from tensorflow.python.ops.gen_data_flow_ops import *
+
+
+def _as_type_list(dtypes):
+  """Convert dtypes to a list of types."""
+  assert dtypes is not None
+  if not (isinstance(dtypes, list) or isinstance(dtypes, tuple)):
+    # We have a single type.
+    return [dtypes]
+  else:
+    # We have a list or tuple of types.
+    return list(dtypes)
+
+
+def _as_shape_list(shapes, dtypes):
+  """Convert shapes to a list of tuples of int (or None)."""
+  if shapes is None: return None
+  if isinstance(shapes, tensor_shape.TensorShape):
+    shapes = [shapes]
+  if not isinstance(shapes, (tuple, list)):
+    raise TypeError(
+        "shapes must be a TensorShape or a list or tuple of TensorShapes.")
+  if all(isinstance(shape, int) for shape in shapes):
+    # We have a single shape.
+    shapes = [shapes]
+  shapes = [tensor_shape.as_shape(shape) for shape in shapes]
+  if any(not shape.is_fully_defined() for shape in shapes):
+    raise ValueError("All shapes must be fully defined.")
+  return shapes
+
+
+# pylint: disable=protected-access
+class QueueBase(object):
+  """Base class for queue implementations.
+
+  A queue is a TensorFlow data structure that stores tensors across
+  multiple steps, and exposes operations that enqueue and dequeue
+  tensors.
+
+  Each queue element is a tuple of one or more tensors, where each
+  tuple component has a static dtype, and may have a static shape. The
+  queue implementations support versions of enqueue and dequeue that
+  handle single elements, versions that support enqueuing and
+  dequeuing a batch of elements at once.
+
+  See [`tf.FIFOQueue`](#FIFOQueue) and
+  [`tf.RandomShuffleQueue`](#RandomShuffleQueue) for concrete
+  implementations of this class, and instructions on how to create
+  them.
+
+  @@enqueue
+  @@enqueue_many
+
+  @@dequeue
+  @@dequeue_many
+
+  @@size
+
+  @@close
+
+  """
+
+  def __init__(self, dtypes, shapes, queue_ref):
+    """Constructs a queue object from a queue reference.
+
+    Args:
+      dtypes:  A list of types.  The length of dtypes must equal the number
+        of tensors in each element.
+      shapes: Constraints on the shapes of tensors in an element:
+        A list of shape tuples or None. This list is the same length
+        as dtypes.  If the shape of any tensors in the element are constrained,
+        all must be; shapes can be None if the shapes should not be constrained.
+      queue_ref: The queue reference, i.e. the output of the queue op.
+    """
+    self._dtypes = dtypes
+    if shapes is not None:
+      self._shapes = [tensor_shape.TensorShape(s) for s in shapes]
+    else:
+      self._shapes = [tensor_shape.unknown_shape() for _ in self._dtypes]
+    self._queue_ref = queue_ref
+    self._name = self._queue_ref.op.name.split("/")[-1]
+
+  @staticmethod
+  def from_list(index, queues):
+    """Create a queue using the queue reference from `queues[index]`.
+
+    Args:
+      index: An integer scalar tensor that determines the input that gets
+        selected.
+      queues: A list of `QueueBase` objects.
+
+    Returns:
+      A `QueueBase` object.
+
+    Raises:
+      TypeError: when `queues` is not a list of `QueueBase` objects,
+        or when the data types of `queues` are not all the same.
+    """
+    if ((not queues) or
+        (not isinstance(queues, list)) or
+        (not all([isinstance(x, QueueBase) for x in queues]))):
+      raise TypeError("A list of queues expected")
+
+    dtypes = queues[0].dtypes
+    if not all([dtypes == q.dtypes for q in queues[1:]]):
+      raise TypeError("Queues do not have matching component dtypes.")
+
+    queue_refs = [x.queue_ref for x in queues]
+    selected_queue = control_flow_ops.ref_select(index, queue_refs)
+    # TODO(josh11b): Unify the shapes of the queues too?
+    return QueueBase(dtypes=dtypes, shapes=None, queue_ref=selected_queue)
+
+  @property
+  def queue_ref(self):
+    """The underlying queue reference."""
+    return self._queue_ref
+
+  @property
+  def name(self):
+    """The name of the underlying queue."""
+    return self._queue_ref.op.name
+
+  @property
+  def dtypes(self):
+    """The list of dtypes for each component of a queue element."""
+    return self._dtypes
+
+  def enqueue(self, vals, name=None):
+    """Enqueues one element to this queue.
+
+    If the queue is full when this operation executes, it will block
+    until the element has been enqueued.
+
+    Args:
+      vals: The tuple of `Tensor` objects to be enqueued.
+      name: A name for the operation (optional).
+
+    Returns:
+      The operation that enqueues a new tuple of tensors to the queue.
+    """
+    if name is None:
+      name = "%s_enqueue" % self._name
+    ret = gen_data_flow_ops._queue_enqueue(self._queue_ref, vals, name=name)
+
+    # NOTE(mrry): Not using a shape function because we need access to
+    # the Queue object.
+    for val, shape in zip(ret.inputs[1:], self._shapes):
+      val.get_shape().assert_is_compatible_with(shape)
+
+    return ret
+
+  def enqueue_many(self, vals, name=None):
+    """Enqueues zero or elements to this queue.
+
+    This operation slices each component tensor along the 0th dimension to
+    make multiple queue elements. All of the tensors in `vals` must have the
+    same size in the 0th dimension.
+
+    If the queue is full when this operation executes, it will block
+    until all of the elements have been enqueued.
+
+    Args:
+      vals: The tensor or tuple of tensors from which the queue elements
+        are taken.
+      name: A name for the operation (optional).
+
+    Returns:
+      The operation that enqueues a batch of tuples of tensors to the queue.
+    """
+    if name is None:
+      name = "%s_EnqueueMany" % self._name
+
+    ret = gen_data_flow_ops._queue_enqueue_many(
+        self._queue_ref, vals, name=name)
+
+    # NOTE(mrry): Not using a shape function because we need access to
+    # the `QueueBase` object.
+    batch_dim = ret.inputs[1].get_shape()[0]
+    for val, shape in zip(ret.inputs[1:], self._shapes):
+      batch_dim.merge_with(val.get_shape()[0])
+      val.get_shape()[1:].assert_is_compatible_with(shape)
+
+    return ret
+
+  def dequeue(self, name=None):
+    """Dequeues one element from this queue.
+
+    If the queue is empty when this operation executes, it will block
+    until there is an element to dequeue.
+
+    Args:
+      name: A name for the operation (optional).
+
+    Returns:
+      The tuple of tensors that was dequeued.
+    """
+    if name is None:
+      name = "%s_Dequeue" % self._name
+    ret = gen_data_flow_ops._queue_dequeue(
+        self._queue_ref, self._dtypes, name=name)
+
+    # NOTE(mrry): Not using a shape function because we need access to
+    # the `QueueBase` object.
+    op = ret[0].op
+    for output, shape in zip(op.values(), self._shapes):
+      output.set_shape(shape)
+
+    return ret if len(ret) != 1 else ret[0]
+
+  def dequeue_many(self, n, name=None):
+    """Dequeues and concatenates `n` elements from this queue.
+
+    This operation concatenates queue-element component tensors along
+    the 0th dimension to make a single component tensor.  All of the
+    components in the dequeued tuple will have size `n` in the 0th dimension.
+
+    If the queue contains fewer than `n` elements when this operation
+    executes, it will block until `n` elements have been dequeued.
+
+    Args:
+      n: A scalar `Tensor` containing the number of elements to dequeue.
+      name: A name for the operation (optional).
+
+    Returns:
+      The tuple of concatenated tensors that was dequeued.
+    """
+    if name is None:
+      name = "%s_DequeueMany" % self._name
+
+    ret = gen_data_flow_ops._queue_dequeue_many(
+        self._queue_ref, n, self._dtypes, name=name)
+
+    # NOTE(mrry): Not using a shape function because we need access to
+    # the Queue object.
+    op = ret[0].op
+    batch_dim = tensor_shape.Dimension(tensor_util.ConstantValue(op.inputs[1]))
+    for output, shape in zip(op.values(), self._shapes):
+      output.set_shape(tensor_shape.TensorShape([batch_dim]).concatenate(shape))
+
+    return ret if len(ret) != 1 else ret[0]
+
+  def close(self, cancel_pending_enqueues=False, name=None):
+    """Closes this queue.
+
+    This operation signals that no more elements will be enqueued in
+    the given queue. Subsequent `enqueue` and `enqueue_many`
+    operations will fail. Subsequent `dequeue` and `dequeue_many`
+    operations will continue to succeed if sufficient elements remain
+    in the queue. Subsequent `dequeue` and `dequeue_many` operations
+    that would block will fail immediately.
+
+    If `cancel_pending_enqueues` is `True`, all pending requests will also
+    be cancelled.
+
+    Args:
+      cancel_pending_enqueues: (Optional.) A boolean, defaulting to
+        `False` (described above).
+      name: A name for the operation (optional).
+
+    Returns:
+      The operation that closes the queue.
+    """
+    if name is None:
+      name = "%s_Close" % self._name
+    return gen_data_flow_ops._queue_close(
+        self._queue_ref, cancel_pending_enqueues=cancel_pending_enqueues,
+        name=name)
+
+  def size(self, name=None):
+    """Compute the number of elements in this queue.
+
+    Args:
+      name: A name for the operation (optional).
+
+    Returns:
+      A scalar tensor containing the number of elements in this queue.
+    """
+    if name is None:
+      name = "%s_Size" % self._name
+    return gen_data_flow_ops._queue_size(self._queue_ref, name=name)
+
+
+class RandomShuffleQueue(QueueBase):
+  """A queue implementation that dequeues elements in a random order.
+
+  See [`tf.QueueBase`](#QueueBase) for a description of the methods on
+  this class.
+
+  @@__init__
+  """
+
+  def __init__(self, capacity, min_after_dequeue, dtypes, shapes=None,
+               seed=None, shared_name=None, name="random_shuffle_queue"):
+    """Create a queue that dequeues elements in a random order.
+
+    A `RandomShuffleQueue` has bounded capacity; supports multiple
+    concurrent producers and consumers; and provides exactly-once
+    delivery.
+
+    A `RandomShuffleQueue` holds a list of up to `capacity`
+    elements. Each element is a fixed-length tuple of tensors whose
+    dtypes are described by `dtypes`, and whose shapes are optionally
+    described by the `shapes` argument.
+
+    If the `shapes` argument is specified, each component of a queue
+    element must have the respective fixed shape. If it is
+    unspecified, different queue elements may have different shapes,
+    but the use of `dequeue_many` is disallowed.
+
+    The `min_after_dequeue` argument allows the caller to specify a
+    minimum number of elements that will remain in the queue after a
+    `dequeue` or `dequeue_many` operation completes, to ensure a
+    minimum level of mixing of elements. This invariant is maintained
+    by blocking those operations until sufficient elements have been
+    enqueued. The `min_after_dequeue` argument is ignored after the
+    queue has been closed.
+
+    Args:
+      capacity: An integer. The upper bound on the number of elements
+        that may be stored in this queue.
+      min_after_dequeue: An integer (described above).
+      dtypes:  A list of `DType` objects. The length of `dtypes` must equal
+        the number of tensors in each queue element.
+      shapes: (Optional.) A list of fully-defined `TensorShape` objects,
+        with the same length as `dtypes` or `None`.
+      seed: A Python integer. Used to create a random seed.
+        See [`set_random_seed`](constant_op.md#set_random_seed) for behavior.
+      shared_name: (Optional.) If non-empty, this queue will be shared under
+        the given name across multiple sessions.
+      name: Optional name for the queue operation.
+    """
+    dtypes = _as_type_list(dtypes)
+    shapes = _as_shape_list(shapes, dtypes)
+    seed1, seed2 = random_seed.get_seed(seed)
+    queue_ref = gen_data_flow_ops._random_shuffle_queue(
+        component_types=dtypes, shapes=shapes, capacity=capacity,
+        min_after_dequeue=min_after_dequeue, seed=seed1, seed2=seed2,
+        shared_name=shared_name, name=name)
+
+    super(RandomShuffleQueue, self).__init__(dtypes, shapes, queue_ref)
+
+
+class FIFOQueue(QueueBase):
+  """A queue implementation that dequeues elements in first-in-first out order.
+
+  See [`tf.QueueBase`](#QueueBase) for a description of the methods on
+  this class.
+
+  @@__init__
+  """
+
+  def __init__(self, capacity, dtypes, shapes=None, shared_name=None,
+               name="fifo_queue"):
+    """Creates a queue that dequeues elements in a first-in first-out order.
+
+    A `FIFOQueue` has bounded capacity; supports multiple concurrent
+    producers and consumers; and provides exactly-once delivery.
+
+    A `FIFOQueue` holds a list of up to `capacity` elements. Each
+    element is a fixed-length tuple of tensors whose dtypes are
+    described by `dtypes`, and whose shapes are optionally described
+    by the `shapes` argument.
+
+    If the `shapes` argument is specified, each component of a queue
+    element must have the respective fixed shape. If it is
+    unspecified, different queue elements may have different shapes,
+    but the use of `dequeue_many` is disallowed.
+
+    Args:
+      capacity: An integer. The upper bound on the number of elements
+        that may be stored in this queue.
+      dtypes:  A list of `DType` objects. The length of `dtypes` must equal
+        the number of tensors in each queue element.
+      shapes: (Optional.) A list of fully-defined `TensorShape` objects,
+        with the same length as `dtypes` or `None`.
+      shared_name: (Optional.) If non-empty, this queue will be shared under
+        the given name across multiple sessions.
+      name: Optional name for the queue operation.
+    """
+    dtypes = _as_type_list(dtypes)
+    shapes = _as_shape_list(shapes, dtypes)
+    queue_ref = gen_data_flow_ops._fifo_queue(
+        component_types=dtypes, shapes=shapes, capacity=capacity,
+        shared_name=shared_name, name=name)
+
+    super(FIFOQueue, self).__init__(dtypes, shapes, queue_ref)
+
+
+# TODO(josh11b): class BatchQueue(QueueBase):
+
+
+# pylint: disable=protected-access
+class LookupTableBase(object):
+  """Represents a lookup table that persists across different steps."""
+
+  def __init__(self, key_dtype, value_dtype, default_value, table_ref):
+    """Construct a table object from a table reference.
+
+    Args:
+      key_dtype:  The key data type of the table.
+      value_dtype:  The kvalue data type of the table.
+      default_value: The scalar tensor to be used when a key is not present in
+        the table.
+      table_ref: The table reference, i.e. the output of the lookup table ops.
+    """
+    self._key_dtype = types.as_dtype(key_dtype)
+    self._value_dtype = types.as_dtype(value_dtype)
+    self._shapes = [tensor_shape.TensorShape([1])]
+    self._table_ref = table_ref
+    self._name = self._table_ref.op.name.split("/")[-1]
+    self._default_value = ops.convert_to_tensor(default_value,
+                                                dtype=self._value_dtype)
+    self._default_value.get_shape().merge_with(tensor_shape.scalar())
+
+  @property
+  def table_ref(self):
+    """Get the underlying table reference."""
+    return self._table_ref
+
+  @property
+  def key_dtype(self):
+    """The key dtype supported by the table."""
+    return self._key_dtype
+
+  @property
+  def value_dtype(self):
+    """The value dtype supported by the table."""
+    return self._value_dtype
+
+  @property
+  def name(self):
+    """The name of the table."""
+    return self._name
+
+  @property
+  def default_value(self):
+    """The default value of the table."""
+    return self._default_value
+
+  def size(self, name=None):
+    """Compute the number of elements in this table.
+
+    Args:
+      name: A name for the operation (optional).
+
+    Returns:
+      A scalar tensor containing the number of elements in this table.
+    """
+    if name is None:
+      name = "%s_Size" % self._name
+    return gen_data_flow_ops._lookup_table_size(self._table_ref, name=name)
+
+  def lookup(self, keys, name=None):
+    """Returns the values for the given 'keys' tensor.
+
+    If an element on the key tensor is not found in the table, the default_value
+    is used.
+
+    Args:
+      keys: The tensor for the keys.
+      name: Optional name for the op.
+
+    Returns:
+      The operation that looks up the keys.
+
+    Raises:
+      TypeError: when 'keys' or 'default_value' doesn't match the table data
+        types.
+    """
+    if name is None:
+      name = "%s_lookup_table_find" % self._name
+
+    if keys.dtype != self._key_dtype:
+      raise TypeError("Signature mismatch. Keys must be dtype %s, got %s." % (
+          self._key_dtype, keys.dtype))
+
+    return gen_data_flow_ops._lookup_table_find(
+        self._table_ref, keys, self._default_value, name=name)
+
+  def initialize_from(self, keys, values, name=None):
+    """Initialize the lookup table with the provided keys and values tensors.
+
+    Construct an initializer object from keys and value tensors.
+
+    Args:
+      keys: The tensor for the keys.
+      values: The tensor for the values.
+      name: Optional name for the op.
+
+    Returns:
+      The operation that initializes a lookup table.
+
+    Raises:
+      TypeError: when the 'keys' and 'values' data type do not match the table
+      key and value data types.
+    """
+    if name is None:
+      name = "%s_initialize_table" % self.name
+    with ops.op_scope([keys, values], None, name):
+      keys = ops.convert_to_tensor(keys, dtype=self.key_dtype, name="keys")
+      values = ops.convert_to_tensor(values, dtype=self.value_dtype,
+                                     name="values")
+
+    init_op = gen_data_flow_ops._initialize_table(
+        self.table_ref, keys, values, name=name)
+    ops.add_to_collection(ops.GraphKeys.TABLE_INITIALIZERS, init_op)
+    return init_op
+
+  def _check_table_dtypes(self, key_dtype, value_dtype):
+    """Check that the given key_dtype and value_dtype matches the table dtypes'.
+
+    Args:
+      key_dtype: The key data type to check.
+      value_dtype: The value data type to check.
+
+    Raises:
+      TypeError: when 'key_dtype' or 'value_dtype' doesn't match the table data
+        types.
+    """
+    if key_dtype != self.key_dtype:
+      raise TypeError("Invalid key dtype, expected %s but got %s." % (
+          self.key_dtype, key_dtype))
+    if value_dtype != self.value_dtype:
+      raise TypeError("Invalid value dtype, expected %s but got %s." % (
+          self.value_dtype, value_dtype))
+
+
+class HashTable(LookupTableBase):
+  """A generic hash table implementation."""
+
+  def __init__(self, key_dtype, value_dtype, default_value, shared_name=None,
+               name="hash_table"):
+    """Create a generic hash table.
+
+    A table holds a key-value pairs. The key and value types are
+    described by key_dtype and value_dtype respectively.
+
+    Args:
+      key_dtype:  The key data type of the table.
+      value_dtype:  The kvalue data type of the table.
+      default_value: The scalar tensor to be used when a key is not present in
+        the table.
+      shared_name: Optional. If non-empty, this table will be shared under
+        the given name across multiple sessions.
+      name: Optional name for the hash table op.
+
+    Returns:
+      A table object that can be used to lookup data.
+    """
+    table_ref = gen_data_flow_ops._hash_table(
+        shared_name=shared_name, key_dtype=key_dtype,
+        value_dtype=value_dtype, name=name)
+
+    super(HashTable, self).__init__(key_dtype, value_dtype, default_value,
+                                    table_ref)
+
+
+def initialize_all_tables(name="init_all_tables"):
+  """Returns an Op that initializes all tables of the default graph.
+
+  Returns:
+    An Op that initializes all tables.  Note that if there are
+    not tables the returned Op is a NoOp.
+  """
+  initializers = ops.get_collection(ops.GraphKeys.TABLE_INITIALIZERS)
+  if initializers:
+    return control_flow_ops.group(*initializers, name=name)
+  return control_flow_ops.no_op(name=name)
+
+
+ops.NoGradient("LookupTableFind")
+ops.NoGradient("LookupTableSize")
+ops.NoGradient("HashTable")
+ops.NoGradient("InitializeTable")
+
+
+ops.RegisterShape("QueueSize")(common_shapes.scalar_shape)
+ops.RegisterShape("Queue")(common_shapes.scalar_shape)
+ops.RegisterShape("FIFOQueue")(common_shapes.scalar_shape)
+ops.RegisterShape("RandomShuffleQueue")(common_shapes.scalar_shape)
+
+
+# NOTE(mrry): The following ops use higher-level information in the
+# Queue class to provide shape information.
+ops.RegisterShape("QueueDequeue")(common_shapes.unknown_shape)
+ops.RegisterShape("QueueDequeueMany")(common_shapes.unknown_shape)
+ops.RegisterShape("QueueEnqueue")(common_shapes.unknown_shape)
+ops.RegisterShape("QueueEnqueueMany")(common_shapes.unknown_shape)
+
+
+@ops.RegisterShape("QueueClose")
+def _ScalarToVoidShape(op):
+  """Shape function for ops that take a scalar and produce no outputs."""
+  unused_input_shape = op.inputs[0].get_shape().merge_with(
+      tensor_shape.scalar())
+  return []
+
+
+@ops.RegisterShape("DynamicPartition")
+def _DynamicPartitionShape(op):
+  """Shape function for data_flow_ops.dynamic_partition."""
+  data_shape = op.inputs[0].get_shape()
+  partitions_shape = op.inputs[1].get_shape()
+  # If we don't know the rank of partitions, we don't know anything
+  mid = partitions_shape.ndims
+  if mid is None:
+    result_shape = tensor_shape.unknown_shape()
+  else:
+    # data_shape must start with partitions_shape
+    partitions_shape.assert_is_compatible_with(data_shape[:mid])
+    # The partition shape is dynamic in the 0th dimension, and matches
+    # data_shape in the remaining dimensions.
+    result_shape = tensor_shape.TensorShape([None]).concatenate(
+        data_shape[mid:])
+  return [result_shape] * op.get_attr("num_partitions")
+
+
+@ops.RegisterShape("DynamicStitch")
+def _DynamicStitchShape(op):
+  """Shape function for data_flow_ops.dynamic_stitch."""
+  num_partitions = op.get_attr("N")
+  indices_shapes = [t.get_shape() for t in op.inputs[0:num_partitions]]
+  data_shapes = [t.get_shape() for t in op.inputs[num_partitions:]]
+  output_shape = tensor_shape.unknown_shape()
+  extra_shape = tensor_shape.TensorShape(None)
+  for indices_shape, data_shape in zip(indices_shapes, data_shapes):
+    indices_ndims = indices_shape.ndims
+    if indices_ndims is not None:
+      # Assert that data_shape starts with indices_shape
+      indices_shape.merge_with(data_shape[:indices_ndims])
+      # The rest belongs to output
+      extra_shape = extra_shape.merge_with(data_shape[indices_ndims:])
+  return [tensor_shape.TensorShape([None]).concatenate(extra_shape)]
+
+
+@ops.RegisterShape("LookupTableFind")
+def _LookupTableFindShape(op):
+  """Shape function for data_flow_ops._lookup_table_find."""
+  unused_table_shape = op.inputs[0].get_shape().merge_with(
+      tensor_shape.scalar())
+  shape_in = op.inputs[1].get_shape()
+  return [shape_in]
+
+
+@ops.RegisterShape("LookupTableSize")
+def _LookupTableSizeShape(op):
+  """Shape function for data_flow_ops._lookup_table_find."""
+  unused_table_shape = op.inputs[0].get_shape().merge_with(
+      tensor_shape.scalar())
+  return [tensor_shape.scalar()]
+
+
+@ops.RegisterShape("HashTable")
+def _HashTableShape(unused_op):
+  """Shape function for data_flow_ops._hash_table."""
+  return [tensor_shape.scalar()]
+
+
+@ops.RegisterShape("InitializeTable")
+def _InitializeLookupTableShape(op):
+  """Shape function for data_flow_ops._initialize_table."""
+  unused_table_shape = op.inputs[0].get_shape().merge_with(
+      tensor_shape.scalar())
+  keys_shape = op.inputs[1].get_shape().with_rank(1)
+  unused_values_shape = op.inputs[2].get_shape().merge_with(keys_shape)
+  return []
diff --git a/tensorflow/python/ops/embedding_ops.py b/tensorflow/python/ops/embedding_ops.py
new file mode 100644
index 0000000000..bc64593d23
--- /dev/null
+++ b/tensorflow/python/ops/embedding_ops.py
@@ -0,0 +1,197 @@
+"""Operations for embeddings."""
+
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import types
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import data_flow_ops
+from tensorflow.python.ops import math_ops
+
+
+def embedding_lookup(params, ids, name=None):
+  """Return a tensor of embedding values by looking up "ids" in "params".
+
+  Args:
+    params: List of tensors of the same shape.  A single tensor is
+            treated as a singleton list.
+    ids: Tensor of integers containing the ids to be looked up in
+         'params'.  Let P be len(params).  If P > 1, then the ids are
+         partitioned by id % P, and we do separate lookups in params[p]
+         for 0 <= p < P, and then stitch the results back together into
+         a single result tensor.
+    name: Optional name for the op.
+
+  Returns:
+    A tensor of shape ids.shape + params[0].shape[1:] containing the
+    values params[i % P][i] for each i in ids.
+
+  Raises:
+    ValueError: if some parameters are invalid.
+  """
+  if not isinstance(params, list):
+    params = [params]
+  with ops.op_scope(params + [ids], name, "embedding_lookup") as name:
+    if not params:
+      raise ValueError("Need at least one param")
+    np = len(params)  # Number of partitions
+    params = ops.convert_n_to_tensor_or_indexed_slices(params, name="params")
+    if np == 1:
+      with ops.device(params[0].device):
+        return array_ops.gather(params[0], ids, name=name)
+    else:
+      ids = ops.convert_to_tensor(ids, name="ids")
+      flat_ids = array_ops.reshape(ids, [-1])
+      original_indices = math_ops.range(0, array_ops.size(flat_ids))
+      # Compute flat_ids % partitions for each id
+      ids_mod_p = flat_ids % np
+      if ids_mod_p.dtype != types.int32:
+        ids_mod_p = math_ops.cast(ids_mod_p, types.int32)
+      # Partition single list of ids based on ids % np into np separate lists
+      plist = data_flow_ops.dynamic_partition(flat_ids, ids_mod_p, np)
+      # Similarly, partition the original indices.
+      pindices = data_flow_ops.dynamic_partition(original_indices, ids_mod_p,
+                                                 np)
+      # Do np separate lookups, finding embeddings for plist[p] in params[p]
+      partitioned_result = []
+      for p in range(np):
+        # TODO(agarwal): handle device allocations here and later in the
+        # colocate code.
+        gather_ids = plist[p] / np
+        with ops.device(params[p].device):
+          partitioned_result.append(array_ops.gather(params[p], gather_ids))
+      # Stitch these back together
+      ret = data_flow_ops.dynamic_stitch(pindices, partitioned_result,
+                                         name=name)
+      # Reshape to reverse the flattening of ids.
+      # It's important that we compute params[0].shape on the right device
+      # to avoid data motion.
+      with ops.device(params[0].device):
+        params_shape = array_ops.shape(params[0])
+      ret = array_ops.reshape(ret, array_ops.concat(0, [
+          array_ops.shape(ids), array_ops.slice(params_shape, [1], [-1])]))
+      # output shape = ids.shape + params[*].shape[1:]
+      # Normally the reshape is sufficient, but setting shape explicitly
+      # teaches shape inference that params[1:].get_shape() matters.
+      element_shape = params[0].get_shape()[1:]
+      for p in params[1:]:
+        element_shape = element_shape.merge_with(p.get_shape()[1:])
+      ret.set_shape(ids.get_shape().concatenate(element_shape))
+      return ret
+
+
+# TODO(lif): Add support for higher-rank SparseTensors
+def embedding_lookup_sparse(params, sp_ids, sp_weights,
+                            name=None,
+                            combiner="mean"):
+  """Computes embeddings for the given ids and weights.
+
+  This op assumes that there is at least one id for each row in the dense tensor
+  represented by sp_ids (i.e. there are no rows with empty features), and that
+  all the indices of sp_ids are in canonical row-major order.
+
+  It also assumes that all id values lie in the range [0, p0), where p0
+  is the sum of the size of params along dimension 0.
+
+  Args:
+    params: A single tensor representing the complete embedding tensor,
+      or a list of P tensors all of same shape except for the first dimension,
+      representing sharded embedding tensors. In the latter case, the ids are
+      partitioned by id % P, and we do separate lookups in params[p] for
+      0 <= p < P, and then stitch the results back together into a single
+      result tensor. The first dimension is allowed to vary as the vocab
+      size is not necessarily a multiple of P.
+    sp_ids: N x M SparseTensor of int64 ids (typically from FeatureValueToId),
+      where N is typically batch size and M is arbitrary.
+    sp_weights: either a SparseTensor of float / double weights, or None to
+      indicate all weights should be taken to be 1. If specified, sp_weights
+      must have exactly the same shape and indices as sp_ids.
+    name: Optional name for the op.
+    combiner: A string specifying the reduction op. Currently "mean" and "sum"
+      are supported.
+      "sum" computes the weighted sum of the embedding results for each row.
+      "mean" is the weighted sum divided by the total weight.
+
+  Returns:
+    A dense tensor representing the combined embeddings for the
+    sparse ids. For each row in the dense tensor represented by sp_ids, the op
+    looks up the embeddings for all ids in that row, multiplies them by the
+    corresponding weight, and combines these embeddings as specified.
+
+    In other words, if
+      shape(combined params) = [p0, p1, ..., pm]
+    and
+      shape(sp_ids) = shape(sp_weights) = [d0, d1, ..., dn]
+    then
+      shape(output) = [d0, d1, ..., dn-1, p1, ..., pm].
+
+    For instance, if params is a 10x20 matrix, and sp_ids / sp_weights are
+
+      [0, 0]: id 1, weight 2.0
+      [0, 1]: id 3, weight 0.5
+      [1, 0]: id 0, weight 1.0
+      [2, 3]: id 1, weight 3.0
+
+    with combiner="mean", then the output will be a 3x20 matrix where
+      output[0, :] = (params[1, :] * 2.0 + params[3, :] * 0.5) / (2.0 + 0.5)
+      output[1, :] = params[0, :] * 1.0
+      output[2, :] = params[1, :] * 3.0
+
+  Raises:
+    TypeError: If sp_ids is not a SparseTensor, or if sp_weights is neither
+      None nor SparseTensor.
+    ValueError: If combiner is not one of {"mean", "sum"}.
+  """
+  if combiner not in ("mean", "sum"):
+    raise ValueError("combiner must be one of 'mean' or 'sum'")
+  if not isinstance(params, list):
+    params = [params]
+  if not isinstance(sp_ids, ops.SparseTensor):
+    raise TypeError("sp_ids must be SparseTensor")
+  ignore_weights = sp_weights is None
+  if not ignore_weights and not isinstance(sp_weights, ops.SparseTensor):
+    raise TypeError("sp_weights must be either None or SparseTensor")
+
+  with ops.op_scope(params + [sp_ids], name, "embedding_lookup_sparse") as name:
+    segment_ids = sp_ids.indices[:, 0]
+    if segment_ids.dtype != types.int32:
+      segment_ids = math_ops.cast(segment_ids, types.int32)
+
+    ids = sp_ids.values
+    if ignore_weights:
+      ids, idx = array_ops.unique(ids)
+    else:
+      idx = None
+
+    embeddings = embedding_lookup(params, ids)
+    if not ignore_weights:
+      weights = sp_weights.values
+      if weights.dtype != embeddings.dtype:
+        weights = math_ops.cast(weights, embeddings.dtype)
+
+      # Reshape weights to allow broadcast
+      ones = array_ops.fill(
+          array_ops.expand_dims(array_ops.rank(embeddings) - 1, 0), 1)
+      bcast_weights_shape = array_ops.concat(0, [
+          array_ops.shape(weights), ones])
+      weights = array_ops.reshape(weights, bcast_weights_shape)
+      embeddings *= weights
+
+      if combiner == "sum":
+        embeddings = math_ops.segment_sum(embeddings, segment_ids, name=name)
+      elif combiner == "mean":
+        embeddings = math_ops.segment_sum(embeddings, segment_ids)
+        weight_sum = math_ops.segment_sum(weights, segment_ids)
+        embeddings = math_ops.div(embeddings, weight_sum, name=name)
+      else:
+        assert False, "Unrecognized combiner"
+    else:
+      assert idx is not None
+      if combiner == "sum":
+        embeddings = math_ops.sparse_segment_sum(embeddings, idx, segment_ids,
+                                                 name=name)
+      elif combiner == "mean":
+        embeddings = math_ops.sparse_segment_mean(embeddings, idx, segment_ids,
+                                                  name=name)
+      else:
+        assert False, "Unrecognized combiner"
+
+    return embeddings
diff --git a/tensorflow/python/ops/gradients.py b/tensorflow/python/ops/gradients.py
new file mode 100644
index 0000000000..ffa7828c04
--- /dev/null
+++ b/tensorflow/python/ops/gradients.py
@@ -0,0 +1,661 @@
+"""Implements the graph generation for computation of gradients."""
+
+import collections
+import warnings
+
+import tensorflow.python.platform
+
+import numpy as np
+
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.framework import types
+# pylint: disable=unused-import
+from tensorflow.python.ops import array_grad
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import constant_op
+from tensorflow.python.ops import control_flow_grad
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import logging_ops
+from tensorflow.python.ops import linalg_grad
+from tensorflow.python.ops import math_grad
+# pylint: enable=unused-import
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import linalg_ops
+from tensorflow.python.platform import logging
+
+
+# Warn the user if we convert a sparse representation to dense with at
+# least this number of elements.
+_LARGE_SPARSE_NUM_ELEMENTS = 100000000
+
+
+def _IndexedSlicesToTensor(value, dtype=None, name=None):
+  """Converts an IndexedSlices object `value` to a Tensor.
+
+  NOTE(mrry): This function is potentially expensive.
+
+  Args:
+    value: An ops.IndexedSlices object.
+    dtype: The dtype of the Tensor to be returned.
+    name: Optional name to use for the returned Tensor.
+
+  Returns:
+    A dense Tensor representing the values in the given IndexedSlices.
+
+  Raises:
+    ValueError: If the IndexedSlices does not have the same dtype.
+  """
+  if dtype and not dtype.is_compatible_with(value.dtype):
+    raise ValueError(
+        "Tensor conversion requested dtype %s for IndexedSlices with dtype %s"
+        % (dtype.name, value.dtype.name))
+  if value.dense_shape is None:
+    raise ValueError(
+        "Tensor conversion requested for IndexedSlices without dense_shape: %s"
+        % str(value))
+  # TODO(mrry): Consider adding static shape information to
+  # IndexedSlices, to avoid using numpy here.
+  dense_shape_value = tensor_util.ConstantValue(value.dense_shape)
+  if dense_shape_value is not None:
+    num_elements = np.prod(dense_shape_value)
+    if num_elements >= _LARGE_SPARSE_NUM_ELEMENTS:
+      warnings.warn(
+          "Converting sparse IndexedSlices to a dense Tensor with %d elements. "
+          "This may consume a large amount of memory." % num_elements)
+  else:
+    warnings.warn(
+        "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
+        "This may consume a large amount of memory.")
+  return math_ops.unsorted_segment_sum(
+      value.values, value.indices, value.dense_shape[0], name=name)
+
+
+ops.register_tensor_conversion_function(ops.IndexedSlices, _IndexedSlicesToTensor)
+
+
+def _MarkReachedOps(from_ops, reached_ops):
+  """Mark all ops reached from "from_ops".
+
+  Args:
+    from_ops: list of Operations.
+    reached_ops: list of booleans, indexed by operation id.
+  """
+  queue = collections.deque()
+  queue.extend(from_ops)
+  while queue:
+    op = queue.popleft()
+    if not reached_ops[op._id]:
+      reached_ops[op._id] = True
+      for output in op.outputs:
+        queue.extend(output.consumers())
+
+
+def _GatherInputs(to_ops, reached_ops):
+  """List all inputs of to_ops that are in reached_ops.
+
+  Args:
+    to_ops: list of Operations.
+    reached_ops: list of booleans, indexed by operation id.
+
+  Returns:
+    The list of all inputs of to_ops that are in reached_ops.
+    That list includes all elements of to_ops.
+  """
+  inputs = []
+  queue = collections.deque()
+  queue.extend(to_ops)
+  while queue:
+    op = queue.popleft()
+    # We are interested in this op.
+    if reached_ops[op._id]:
+      inputs.append(op)
+      # Clear the boolean so we won't add the inputs again.
+      reached_ops[op._id] = False
+      for inp in op.inputs:
+        queue.append(inp.op)
+  return inputs
+
+
+def _GetGradsDevice(op, colocate_gradients_with_ops):
+  """Gets the device to which to assign gradients of "op".
+
+  Args:
+    op: an Operation.
+    colocate_gradients_with_ops: If True, try colocating gradients with the
+      corresponding op.
+
+  Returns:
+    A device string.
+  """
+  if colocate_gradients_with_ops and op.device:
+    return op.device
+  else:
+    return op.graph.get_default_device()
+
+
+def _PendingCount(graph, to_ops, from_ops):
+  """Initialize the pending count for ops between two lists of Operations.
+
+  'pending_count[op._id]' indicates the number of backprop inputs
+  to this operation.
+
+  Args:
+    graph: a Graph.
+    to_ops: list of Operations.
+    from_ops: list of Operations.
+
+  Returns:
+    A tuple containing: (1) a list of integers indexed by operation id,
+    indicating the number of backprop inputs to this operation, and (2)
+    a boolean which is True if any of the ops in between from_ops and to_ops
+    contain control flow loops.
+  """
+  # Mark reachable ops from from_ops.
+  reached_ops = [False] * (graph._last_id + 1)
+  for op in to_ops:
+    reached_ops[op._id] = True
+  _MarkReachedOps(from_ops, reached_ops)
+
+  # Mark between ops.
+  between_ops = [False] * (graph._last_id + 1)
+  between_op_list = []
+  queue = collections.deque()
+  queue.extend(to_ops)
+  while queue:
+    op = queue.popleft()
+    # We are interested in this op.
+    if reached_ops[op._id]:
+      between_ops[op._id] = True
+      between_op_list.append(op)
+      # Clear the boolean so we won't add the inputs again.
+      reached_ops[op._id] = False
+      for inp in op.inputs:
+        queue.append(inp.op)
+
+  # Initialize pending count for between ops.
+  pending_count = [0] * (graph._last_id + 1)
+  has_control_flow = False
+  for op in between_op_list:
+    for x in op.inputs:
+      if between_ops[x.op._id]:
+        pending_count[x.op._id] += 1
+    for x in op.control_inputs:
+      if between_ops[x._id]:
+        pending_count[x._id] += 1
+    if op.type == "Exit":
+      has_control_flow = True
+  return pending_count, has_control_flow
+
+
+def _AsList(x):
+  return x if isinstance(x, (list, tuple)) else [x]
+
+
+def _DefaultGradYs(grad_ys, ys, colocate_gradients_with_ops):
+  """Fill in default values for grad_ys.
+
+  Args:
+    grad_ys: List of gradients, can contain None.
+    ys: List of tensors.
+    colocate_gradients_with_ops: If True, try colocating gradients with
+      the corresponding op.
+
+  Returns:
+    A list of gradients to use, without None.
+
+  Raises:
+    ValueError: If one of the grad_ys is invalid.
+  """
+  if len(grad_ys) != len(ys):
+    raise ValueError("Passed %d grad_ys for %d ys" % (len(grad_ys), len(ys)))
+  grad_ys = ops.convert_n_to_tensor_or_indexed_slices(grad_ys, name="grad_y")
+  for i in xrange(len(grad_ys)):
+    grad_y = grad_ys[i]
+    y = ys[i]
+    if grad_y is None:
+      with ops.device(_GetGradsDevice(y.op, colocate_gradients_with_ops)):
+        grad_ys[i] = array_ops.fill(array_ops.shape(y),
+                                    constant_op.constant(1, dtype=y.dtype))
+    else:
+      if grad_y.dtype != y.dtype:
+        raise ValueError("Y and ys_grad must be of the same type, "
+                         "not y: %s, ys_grad: %s " %
+                         (types.as_dtype(y.dtype).name,
+                          types.as_dtype(grad_y.dtype).name))
+  return grad_ys
+
+
+def _VerifyGeneratedGradients(grads, op):
+  """Verify that gradients are valid in number and type.
+
+  Args:
+    grads: List of generated gradients.
+    op: Operation for which the gradients where generated.
+
+  Raises:
+    ValueError: if the gradients are invalid.
+  """
+  if len(grads) != len(op.inputs):
+    raise ValueError("Num gradients %d generated for op %s do not match num "
+                     "inputs %d" % (len(grads), op.node_def, len(op.inputs)))
+  for i in xrange(len(grads)):
+    grad = grads[i]
+    inp = op.inputs[i]
+    if grad is not None:
+      if not grad.dtype.is_compatible_with(inp.dtype):
+        raise ValueError(
+            "Gradient type %s generated for op %s does "
+            "not match input type %s" %
+            (types.as_dtype(grad.dtype).name, op.node_def,
+             types.as_dtype(inp.dtype).name))
+
+
+def _StopOps(from_ops, pending_count):
+  """The set of ops that terminate the gradient computation.
+
+  This computes the frontier of the forward graph *before* which backprop
+  should stop. Operations in the returned set will not be differentiated.
+  This set is defined as the subset of `from_ops` containing ops that have
+  no predecessor in `from_ops`. `pending_count` is the result of
+  `_PendingCount(g, xs, from_ops)`. An 'op' has predecessors in `from_ops`
+  iff pending_count[op._id] > 0.
+
+  Args:
+    from_ops: list of Operations.
+    pending_count: List of integers, indexed by operation id.
+
+  Returns:
+    The set of operations.
+  """
+  stop_ops = set()
+  for op in from_ops:
+    is_stop_op = True
+    for inp in op.inputs:
+      if pending_count[inp.op._id] > 0:
+        is_stop_op = False
+        break
+    if is_stop_op:
+      stop_ops.add(op._id)
+  return stop_ops
+
+
+def gradients(ys, xs, grad_ys=None, name="gradients",
+              colocate_gradients_with_ops=False,
+              gate_gradients=False,
+              aggregation_method=None):
+  """Constructs symbolic partial derivatives of `ys` w.r.t. x in `xs`.
+
+  `ys` and `xs` are each a `Tensor` or a list of tensors.  `grad_ys`
+  is a list of `Tensor`, holding the gradients received by the
+  `ys`. The list must be the same length as `ys`.
+
+  `gradients()` adds ops to the graph to output the partial
+  derivatives of `ys` with respect to `xs`.  It returns a list of
+  `Tensor` of length `len(xs)` where each tensor is the `sum(dy/dx)`
+  for y in `ys`.
+
+  `grad_ys` is a list of tensors of the same length as `ys` that holds
+  the initial gradients for each y in `ys`.  When `grad_ys` is None,
+  we fill in a tensor of '1's of the shape of y for each y in `ys`.  A
+  user can provide their own initial 'grad_ys` to compute the
+  derivatives using a different initial gradient for each y (e.g., if
+  one wanted to weight the gradient differently for each value in
+  each y).
+
+  Args:
+    ys: A `Tensor` or list of tensors to be differentiated.
+    xs: A `Tensor` or list of tensors to be used for differentiation.
+    grad_ys: Optional. A `Tensor` or list of tensors the same size as
+      `ys` and holding the gradients computed for each y in `ys`.
+    name: Optional name to use for grouping all the gradient ops together.
+      defaults to 'gradients'.
+    colocate_gradients_with_ops: If True, try colocating gradients with
+      the corresponding op.
+    gate_gradients: If True, add a tuple around the gradients returned
+      for an operations.  This avoids some race conditions.
+    aggregation_method: Specifies the method used to combine gradient terms.
+      Accepted values are constants defined in the class `AggregationMethod`.
+
+  Returns:
+    A list of `sum(dy/dx)` for each x in `xs`.
+
+  Raises:
+    LookupError: if one of the operations between `x` and `y` does not
+      have a registered gradient function.
+    ValueError: if the arguments are invalid.
+
+  """
+  ys = _AsList(ys)
+  xs = _AsList(xs)
+  if grad_ys is None:
+    grad_ys = [None] * len(ys)
+  else:
+    grad_ys = _AsList(grad_ys)
+  with ops.op_scope(ys + xs + grad_ys, name, "gradients"):
+    ys = ops.convert_n_to_tensor_or_indexed_slices(ys, name="y")
+    xs = ops.convert_n_to_tensor_or_indexed_slices(xs, name="x")
+    grad_ys = _DefaultGradYs(grad_ys, ys, colocate_gradients_with_ops)
+
+    # The approach we take here is as follows: Create a list of all ops in the
+    # subgraph between the ys and xs.  Visit these ops in reverse order of ids
+    # to ensure that when we visit an op the gradients w.r.t its outputs have
+    # been collected.  Then aggregate these gradients if needed, call the op's
+    # gradient function, and add the generated gradients to the gradients for
+    # its input.
+
+    # Initialize the pending count for ops in the connected subgraph from ys
+    # to the xs.
+    to_ops = [t.op for t in ys]
+    from_ops = [t.op for t in xs]
+    pending_count, has_control_flow = _PendingCount(
+        ops.get_default_graph(), to_ops, from_ops)
+
+    # Iterate over the collected ops.
+    #
+    # grads: op => list of gradients received on each output endpoint of the
+    # op.  The gradients for each endpoint are initially collected as a list.
+    # When it is time to call the op's gradient function, for each endpoint we
+    # aggregate the list of received gradients into a Add() Operation if there
+    # is more than one.
+    grads = {}
+
+    # Add the initial gradients for the ys.
+    for y, grad_y in zip(ys, grad_ys):
+      _SetGrad(grads, y, grad_y)
+
+    # Initialize queue with to_ops.
+    queue = collections.deque()
+    # Add the ops in 'to_ops' into the queue.
+    to_ops_set = set()
+    for op in to_ops:
+      if op._id not in to_ops_set:
+        to_ops_set.add(op._id)
+        queue.append(op)
+    # The set of 'from_ops'.
+    stop_ops = _StopOps(from_ops, pending_count)
+    while queue:
+      # generate gradient subgraph for op.
+      op = queue.popleft()
+      with ops.device(_GetGradsDevice(op, colocate_gradients_with_ops)):
+        if has_control_flow:
+          control_flow_ops.EnterGradWhileContext(op)
+        out_grads = _AggregatedGrads(grads, op, has_control_flow,
+                                     aggregation_method)
+        grad_fn = None
+        if any(out_grads) and op._id not in stop_ops:
+          # A grad_fn must be defined, either as a function or as None
+          # for ops that do not have gradients.
+          try:
+            grad_fn = ops.get_gradient_function(op)
+          except LookupError:
+            raise LookupError(
+                "No gradient defined for operation '%s' (op type: %s)" %
+                (op.name, op.type))
+        if grad_fn and any(out_grads):
+          # NOTE: If _AggregatedGrads didn't compute a value for the i'th
+          # output, it means that the cost does not depend on output[i],
+          # therefore dC/doutput[i] is 0.
+          for i, out_grad in enumerate(out_grads):
+            if (not out_grad
+                and types.as_dtype(op.outputs[i].dtype).base_dtype in (
+                    types.float32, types.float64)):
+              # Only floating-point outputs get a zero gradient. Gradient
+              # functions should ignore the gradient for other outputs.
+              out_grads[i] = array_ops.zeros_like(op.outputs[i])
+          with ops.name_scope(op.name + "_grad"):
+            # pylint: disable=protected-access
+            with ops.get_default_graph()._original_op(op):
+            # pylint: enable=protected-access
+              op_wrapper = op
+              if has_control_flow:
+                op_wrapper = control_flow_ops.MakeWrapper(op)
+              in_grads = _AsList(grad_fn(op_wrapper, *out_grads))
+              _VerifyGeneratedGradients(in_grads, op)
+              if gate_gradients and len(in_grads) > 1:
+                in_grads = control_flow_ops.tuple(in_grads)
+          logging.vlog(1, "Gradient for '" + op.name + "'")
+          logging.vlog(1, "  in  --> %s",
+                       ", ".join([x.name for x in out_grads if x]))
+          logging.vlog(1, "  out --> %s",
+                       ", ".join([x.name for x in in_grads if x]))
+        else:
+          # If no grad_fn is defined or none of out_grads is available,
+          # just propagates a list of None backwards.
+          in_grads = [None] * len(op.inputs)
+        for t_in, in_grad in zip(op.inputs, in_grads):
+          if in_grad:
+            _SetGrad(grads, t_in, in_grad)
+        if has_control_flow:
+          control_flow_ops.ExitGradWhileContext(op)
+
+      # update pending count for the inputs of op.
+      for x in op.inputs:
+        pending_count[x.op._id] -= 1
+        ready = (pending_count[x.op._id] == 0)
+        if has_control_flow and not ready:
+          ready = (pending_count[x.op._id] > 0 and
+                   control_flow_ops.IsLoopSwitch(x.op))
+        if ready:
+          queue.append(x.op)
+      for x in op.control_inputs:
+        pending_count[x._id] -= 1
+        if pending_count[x._id] is 0:
+          queue.append(x)
+  return [_GetGrad(grads, x) for x in xs]
+
+
+def _SetGrad(grads, t, grad):
+  """Sets gradient "grad" in "grads" for tensor "t"."""
+  op = t.op
+  op_grads = grads.get(op)
+  if not op_grads:
+    op_grads = [[] for _ in xrange(len(op.outputs))]
+    grads[op] = op_grads
+  t_grads = op_grads[t.value_index]
+  if isinstance(t_grads, list):
+    t_grads.append(grad)
+  else:
+    assert op.type == "Switch"
+    op_grads[t.value_index] = grad
+
+
+def _GetGrad(grads, t):
+  """Gets gradient for tensor "t"."""
+  op = t.op
+  op_grads = grads.get(op)
+  if not op_grads: return None
+  t_grad = op_grads[t.value_index]
+  assert not isinstance(t_grad, list), (
+      "gradients list should have been aggregated by now.")
+  return t_grad
+
+
+def _GetGrads(grads, op):
+  """Gets all gradients for op."""
+  if op in grads:
+    return grads[op]
+  else:
+    return [[] for _ in xrange(len(op.outputs))]
+
+
+def _HandleNestedIndexedSlices(grad):
+  assert isinstance(grad, ops.IndexedSlices)
+  if isinstance(grad.values, ops.Tensor):
+    return grad
+  else:
+    assert isinstance(grad.values, ops.IndexedSlices)
+    g = _HandleNestedIndexedSlices(grad.values)
+    return ops.IndexedSlices(
+        g.values, array_ops.gather(grad.indices, g.indices), g.dense_shape)
+
+
+def _AccumulatorShape(inputs):
+  shape = tensor_shape.unknown_shape()
+  for i in inputs:
+    if isinstance(i, ops.Tensor):
+      shape = shape.merge_with(i.get_shape())
+  return shape
+
+
+class AggregationMethod(object):
+  """A class listing aggregation methods used to combine gradients.
+
+  Computing partial derivatives can require aggregating gradient
+  contributions. This class lists the various methods that can
+  be used to combine gradients in the graph:
+
+  *  `ADD_N`: All of the gradient terms are summed as part of one
+     operation using the "AddN" op. It has the property that all
+     gradients must be ready before any aggregation is performed.
+  *  `DEFAULT`: The system-chosen default aggregation method.
+  """
+  ADD_N = 0
+  DEFAULT = ADD_N
+  # The following are experimental and may not be supported in future releases.
+  EXPERIMENTAL_TREE = 1
+  EXPERIMENTAL_ACCUMULATE_N = 2
+
+
+def _AggregatedGrads(grads, op, has_control_flow, aggregation_method=None):
+  """Get the aggregated gradients for op.
+
+  Args:
+    grads: The map of memoized gradients.
+    op: The op to get gradients for.
+    has_control_flow: True iff the graph contains control flow ops.
+    aggregation_method: Specifies the method used to combine gradient terms.
+      Accepted values are constants defined in the class `AggregationMethod`.
+
+  Returns:
+    A list of gradients, one per each output of `op`. If the gradients
+      for a particular output is a list, this function aggregates it
+      before returning.
+
+  Raises:
+    TypeError: if the incoming grads are not Tensors or IndexedSlices.
+    ValueError: if the arguments are invalid.
+
+  """
+  if aggregation_method is None:
+    aggregation_method = AggregationMethod.DEFAULT
+  if aggregation_method not in [AggregationMethod.ADD_N,
+                                AggregationMethod.EXPERIMENTAL_TREE,
+                                AggregationMethod.EXPERIMENTAL_ACCUMULATE_N]:
+    raise ValueError("Invalid aggregation_method specified.")
+  out_grads = _GetGrads(grads, op)
+  for i, out_grad in enumerate(out_grads):
+    if has_control_flow:
+      if isinstance(out_grad, (ops.Tensor, ops.IndexedSlices)):
+        assert op.type == "Switch"
+        continue
+    # Grads have to be Tensors or IndexedSlices
+    if not all([isinstance(g, (ops.Tensor, ops.IndexedSlices))
+                for g in out_grad if g]):
+      raise TypeError("gradients have to be either all Tensors "
+                      "or all IndexedSlices")
+    # Aggregate multiple gradients, and convert [] to None.
+    if out_grad:
+      if all([isinstance(g, ops.Tensor) for g in out_grad if g]):
+        tensor_shape = _AccumulatorShape(out_grad)
+        if len(out_grad) < 2:
+          used = "nop"
+          out_grads[i] = out_grad[0]
+        elif (aggregation_method == AggregationMethod.EXPERIMENTAL_ACCUMULATE_N
+              and len(out_grad) > 2 and tensor_shape.is_fully_defined()):
+          # The benefit of using AccumulateN is that its inputs can be combined
+          # in any order and this can allow the expression to be evaluated with
+          # a smaller memory footprint.  When used with gpu_allocator_retry,
+          # it is possible to compute a sum of terms which are much larger than
+          # total GPU memory.
+          # AccumulateN can currently only be used if we know the shape for
+          # an accumulator variable.  If this is not known, or if we only have
+          # 2 grads then we fall through to the "tree" case below.
+          used = "accumulate_n"
+          out_grads[i] = math_ops.accumulate_n(out_grad)
+        elif aggregation_method in [AggregationMethod.EXPERIMENTAL_TREE,
+                                    AggregationMethod.EXPERIMENTAL_ACCUMULATE_N
+                                   ]:
+          # Aggregate all gradients by doing pairwise sums: this may
+          # reduce performance, but it can improve memory because the
+          # gradients can be released earlier.
+          #
+          # TODO(vrv): Consider replacing this with a version of
+          # tf.AddN() that eagerly frees its inputs as soon as they are
+          # ready, so the order of this tree does not become a problem.
+          used = "tree"
+          with ops.name_scope(op.name + "_gradient_sum"):
+            running_sum = out_grad[0]
+            for grad in out_grad[1:]:
+              running_sum = math_ops.add_n([running_sum, grad])
+            out_grads[i] = running_sum
+        else:
+          used = "add_n"
+          out_grads[i] = math_ops.add_n(out_grad)
+        logging.vlog(2, "  _AggregatedGrads %d x %s using %s", len(out_grad),
+                     tensor_shape, used)
+      else:
+        out_grad = math_ops._as_indexed_slices_list([g for g in out_grad if g])
+        out_grad = [_HandleNestedIndexedSlices(x) for x in out_grad]
+        # Form IndexedSlices out of the concatenated values and
+        # indices.
+        out_grads[i] = ops.IndexedSlices(
+            array_ops.concat(0, [x.values for x in out_grad]),
+            array_ops.concat(0, [x.indices for x in out_grad]),
+            out_grad[0].dense_shape)
+    else:
+      out_grads[i] = []
+  return out_grads
+
+
+# TODO(vrv): Make this available when we want to make it public.
+def _hessian_vector_product(ys, xs, v):
+  """Multiply the Hessian of `ys` wrt `xs` by `v`.
+
+  This is an efficient construction that uses a backprop-like approach
+  to compute the product between the Hessian and another vector. The
+  Hessian is usually too large to be explicitly computed or even
+  represented, but this method allows us to at least multiply by it
+  for the same big-O cost as backprop.
+
+  Implicit Hessian-vector products are the main practical, scalable way
+  of using second derivatives with neural networks. They allow us to
+  do things like construct Krylov subspaces and approximate conjugate
+  gradient descent.
+
+  Example: if `y` = 1/2 `x`^T A `x`, then `hessian_vector_product(y,
+  x, v)` will return an expression that evaluates to the same values
+  as (A + A.T) `v`.
+
+  Args:
+    ys: A scalar value, or a tensor or list of tensors to be summed to
+        yield a scalar.
+    xs: A list of tensors that we should construct the Hessian over.
+    v: A list of tensors, with the same shapes as xs, that we want to
+       multiply by the Hessian.
+
+  Returns:
+    A list of tensors (or if the list would be length 1, a single tensor)
+    containing the product between the Hessian and `v`.
+
+  Raises:
+    ValueError: `xs` and `v` have different length.
+
+  """
+
+  # Validate the input
+  length = len(xs)
+  if len(v) != length:
+    raise ValueError("xs and v must have the same length.")
+
+  # First backprop
+  grads = gradients(ys, xs)
+
+  assert len(grads) == length
+  elemwise_products = [math_ops.mul(grad_elem, array_ops.stop_gradient(v_elem))
+                       for grad_elem, v_elem in zip(grads, v)
+                       if grad_elem is not None]
+
+  # Second backprop
+  return gradients(elemwise_products, xs)
diff --git a/tensorflow/python/ops/gradients_test.py b/tensorflow/python/ops/gradients_test.py
new file mode 100644
index 0000000000..dac0ebbb60
--- /dev/null
+++ b/tensorflow/python/ops/gradients_test.py
@@ -0,0 +1,337 @@
+"""Tests for tensorflow.ops.gradients."""
+import warnings
+
+import tensorflow.python.platform
+
+import numpy as np
+
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.framework import types
+# pylint: disable=unused-import
+from tensorflow.python.ops import array_grad
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import constant_op
+from tensorflow.python.ops import data_flow_grad
+from tensorflow.python.ops import data_flow_ops
+from tensorflow.python.ops import gradients
+from tensorflow.python.ops import math_grad
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_grad
+from tensorflow.python.ops import state_grad
+# pylint: enable=unused-import
+from tensorflow.python.ops.constant_op import constant
+from tensorflow.python.ops.nn_ops import bias_add
+from tensorflow.python.platform import googletest
+
+
+def _OpsBetween(graph, to_ops, from_ops):
+  """Build the list of operations between two lists of Operations.
+
+  Args:
+    graph: a Graph.
+    to_ops: list of Operations.
+    from_ops: list of Operations.
+
+  Returns:
+    The list of operations between "from_ops" and "to_ops", sorted by
+    decreasing operation id. This list contains all elements of to_ops.
+
+    TODO(mdevin): Think about returning an empty list if from_ops are not
+    reachable from to_ops.  Presently it returns to_ops in that case.
+  """
+  # List of booleans, indexed by operation id, indicating if
+  # an op is reached from the output of "input_ops".
+  reached_ops = [False] * (graph._last_id + 1)
+  # We only care to reach up to "output_ops" so we mark the
+  # output ops as reached to avoid recursing past them.
+  for op in to_ops:
+    reached_ops[op._id] = True
+  gradients._MarkReachedOps(from_ops, reached_ops)
+  between_ops = gradients._GatherInputs(to_ops, reached_ops)
+  between_ops.sort(lambda x, y: y._id - x._id)
+  return between_ops
+
+
+class GradientsTest(test_util.TensorFlowTestCase):
+
+  def _OpNames(self, op_list):
+    return ["%s/%d" % (str(op.name), op._id) for op in op_list]
+
+  def _assertOpListEqual(self, ops1, ops2):
+    self.assertEquals(self._OpNames(ops1), self._OpNames(ops2))
+
+  def testOpsBetweenSimple(self):
+    with ops.Graph().as_default() as g:
+      t1 = constant(1.0)
+      t2 = constant(2.0)
+      t3 = array_ops.pack([t1, t2])
+    # Full graph
+    self._assertOpListEqual([t3.op, t2.op, t1.op],
+                            _OpsBetween(g, [t3.op], [t1.op, t2.op]))
+    # Only t1, t3.
+    self._assertOpListEqual([t3.op, t1.op],
+                            _OpsBetween(g, [t3.op], [t1.op]))
+
+  def testOpsBetweenUnreachable(self):
+    with ops.Graph().as_default() as g:
+      t1 = constant(1.0)
+      t2 = constant(2.0)
+      _ = array_ops.pack([t1, t2])
+      t4 = constant(1.0)
+      t5 = constant(2.0)
+      t6 = array_ops.pack([t4, t5])
+    # Elements of to_ops are always listed.
+    self._assertOpListEqual([t6.op], _OpsBetween(g, [t6.op], [t1.op]))
+
+  def testOpsBetweenCut(self):
+    with ops.Graph().as_default() as g:
+      t1 = constant(1.0)
+      t2 = constant(2.0)
+      t3 = array_ops.pack([t1, t2])
+      t4 = constant([1.0])
+      t5 = array_ops.concat(0, [t4, t3])
+      t6 = constant([2.0])
+      t7 = array_ops.concat(0, [t5, t6])
+    self._assertOpListEqual([t7.op, t5.op, t4.op],
+                            _OpsBetween(g, [t7.op], [t4.op]))
+
+  def testOpsBetweenCycle(self):
+    with ops.Graph().as_default() as g:
+      t1 = constant(1.0)
+      t2 = constant(2.0)
+      t3 = array_ops.pack([t1, t2])
+      t4 = array_ops.concat(0, [t3, t3, t3])
+      t5 = constant([1.0])
+      t6 = array_ops.concat(0, [t4, t5])
+      t7 = array_ops.concat(0, [t6, t3])
+    self._assertOpListEqual([t6.op, t4.op, t3.op],
+                            _OpsBetween(g, [t6.op], [t3.op]))
+    self._assertOpListEqual([t7.op, t6.op, t5.op, t4.op, t3.op, t1.op],
+                            _OpsBetween(g, [t7.op], [t1.op, t5.op]))
+    self._assertOpListEqual([t6.op, t5.op, t4.op, t3.op, t2.op],
+                            _OpsBetween(g, [t6.op], [t2.op, t5.op]))
+
+  def testGradients(self):
+    with ops.Graph().as_default():
+      inp = constant(1.0, shape=[32, 100], name="in")
+      w = constant(1.0, shape=[100, 10], name="w")
+      b = constant(1.0, shape=[10], name="b")
+      xw = math_ops.matmul(inp, w, name="xw")
+      h = bias_add(xw, b, name="h")
+      w_grad = gradients.gradients(h, w)[0]
+    self.assertEquals("MatMul", w_grad.op.type)
+    self.assertEquals(w_grad.op._original_op, xw.op)
+    self.assertTrue(w_grad.op.get_attr("transpose_a"))
+    self.assertFalse(w_grad.op.get_attr("transpose_b"))
+
+  def testUnusedOutput(self):
+    with ops.Graph().as_default():
+      w = constant(1.0, shape=[2, 2])
+      x = constant(1.0, shape=[2, 2])
+      wx = math_ops.matmul(w, x)
+      split_wx = array_ops.split(0, 2, wx)
+      c = math_ops.reduce_sum(split_wx[1])
+      gw = gradients.gradients(c, [w])[0]
+    self.assertEquals("MatMul", gw.op.type)
+
+  def testColocateGradients(self):
+    with ops.Graph().as_default() as g:
+      w = constant(1.0, shape=[1, 1])
+      x = constant(1.0, shape=[1, 2])
+      with g.device("/gpu:0"):
+        wx = math_ops.matmul(w, x)
+      gw = gradients.gradients(wx, [w], colocate_gradients_with_ops=True)[0]
+    self.assertEquals("/gpu:0", gw.device)
+
+  def testColocateGradientsWithAggregation(self):
+    with ops.Graph().as_default() as g:
+      with g.device("/gpu:1"):
+        w = constant(1.0, shape=[1, 1])
+      x = constant(1.0, shape=[1, 2])
+      y = constant(1.0, shape=[1, 2])
+      wx = math_ops.matmul(w, x)
+      wy = math_ops.matmul(w, y)
+      with g.device("/gpu:0"):
+        z = wx + wy
+      gw1 = gradients.gradients(z, [w], colocate_gradients_with_ops=True)[0]
+      self.assertEquals("/gpu:1", gw1.device)
+      gw2 = gradients.gradients(z, [w], colocate_gradients_with_ops=False)[0]
+      self.assertEquals(None, gw2.device)
+
+  def testBoundaryStop(self):
+    # Test that we don't differentiate 'x'. The gradient function for 'x' is
+    # set explicitly to None so we will get an exception if the gradient code
+    # tries to differentiate 'x'.
+    with ops.Graph().as_default() as g:
+      c = constant(1.0)
+      x = array_ops.identity(c)
+      y = x + 1.0
+      z = y + 1
+      grads = gradients.gradients(z, [x])
+      self.assertTrue(all([x for x in grads]))
+
+  def testBoundaryContinue(self):
+    # Test that we differentiate both 'x' and 'y' correctly when x is a
+    # predecessor of y.
+    with self.test_session():
+      x = constant(1.0)
+      y = x * 2.0
+      z = y * 3.0
+      grads = gradients.gradients(z, [x, y])
+      self.assertTrue(all([x for x in grads]))
+      self.assertEqual(6.0, grads[0].eval())
+
+  def testAggregationMethodAccumulateN(self):
+    with self.test_session():
+      x = constant(1.0)
+      y = x * 2.0
+      z = y + y + y + y + y + y + y + y + y + y
+      grads = gradients.gradients(
+          z,
+          [x, y],
+          aggregation_method=
+          gradients.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N)
+      self.assertTrue(all([x for x in grads]))
+      self.assertEqual(20.0, grads[0].eval())
+      self.assertEqual(10.0, grads[1].eval())
+
+  def testAggregationMethodAddN(self):
+    with self.test_session():
+      x = constant(1.0)
+      y = x * 2.0
+      z = y + y + y + y + y + y + y + y + y + y
+      grads = gradients.gradients(
+          z,
+          [x, y],
+          aggregation_method=gradients.AggregationMethod.ADD_N)
+      self.assertTrue(all([x for x in grads]))
+      self.assertEqual(20.0, grads[0].eval())
+      self.assertEqual(10.0, grads[1].eval())
+
+  def testAggregationMethodTree(self):
+    with self.test_session():
+      x = constant(1.0)
+      y = x * 2.0
+      z = y + y + y + y + y + y + y + y + y + y
+      grads = gradients.gradients(
+          z,
+          [x, y],
+          aggregation_method=gradients.AggregationMethod.EXPERIMENTAL_TREE)
+      self.assertTrue(all([x for x in grads]))
+      self.assertEqual(20.0, grads[0].eval())
+      self.assertEqual(10.0, grads[1].eval())
+
+  def testNoGradientForStringOutputs(self):
+    with ops.Graph().as_default() as g:
+      @ops.RegisterGradient("TestOp")
+      def _TestOpGrad(op, float_grad, string_grad):
+        """Gradient function for TestOp."""
+        self.assertEquals(float_grad.dtype, types.float32)
+        self.assertFalse(string_grad)
+        return float_grad
+      ops.RegisterShape("TestOp")(None)
+
+      c = constant(1.0)
+      x, y = g.create_op("TestOp", [c], [types.float32, types.string]).outputs
+      z = x * 2.0
+      w = z * 3.0
+      grads = gradients.gradients(z, [c])
+      self.assertTrue(isinstance(grads[0], ops.Tensor))
+
+
+class StopGradientTest(test_util.TensorFlowTestCase):
+
+  def testStopGradient(self):
+    with ops.Graph().as_default():
+      inp = constant(1.0, shape=[100, 32], name="in")
+      out = array_ops.stop_gradient(inp)
+      igrad = gradients.gradients(out, inp)[0]
+    assert igrad is None
+
+
+class HessianVectorProductTest(test_util.TensorFlowTestCase):
+
+  def testHessianVectorProduct(self):
+    # Manually compute the Hessian explicitly for a low-dimensional problem
+    # and check that HessianVectorProduct matches multiplication by the
+    # explicit Hessian.
+    # Specifically, the Hessian of f(x) = x^T A x is
+    # H = A + A^T.
+    # We expect HessianVectorProduct(f(x), x, v) to be H v.
+    m = 4
+    rng = np.random.RandomState([1, 2, 3])
+    mat_value = rng.randn(m, m).astype("float32")
+    v_value = rng.randn(m, 1).astype("float32")
+    x_value = rng.randn(m, 1).astype("float32")
+    hess_value = mat_value + mat_value.T
+    hess_v_value = np.dot(hess_value, v_value)
+    for use_gpu in [False, True]:
+      with self.test_session(use_gpu=use_gpu):
+        mat = constant_op.constant(mat_value)
+        v = constant_op.constant(v_value)
+        x = constant_op.constant(x_value)
+        mat_x = math_ops.matmul(mat, x, name="Ax")
+        x_mat_x = math_ops.matmul(array_ops.transpose(x), mat_x, name="xAx")
+        hess_v = gradients._hessian_vector_product(x_mat_x, [x], [v])[0]
+        hess_v_actual = hess_v.eval()
+      self.assertAllClose(hess_v_value, hess_v_actual)
+
+
+class IndexedSlicesToTensorTest(test_util.TensorFlowTestCase):
+
+  def testIndexedSlicesToTensor(self):
+    with self.test_session():
+      np_val = np.random.rand(4, 4, 4, 4).astype(np.float32)
+      c = constant_op.constant(np_val)
+      c_sparse = math_ops._as_indexed_slices(c)
+      self.assertAllEqual(np_val.shape, c_sparse.dense_shape.eval())
+      c_dense = math_ops.mul(c_sparse, 1.0)
+      self.assertAllClose(np_val, c_dense.eval())
+
+  def testInt64Indices(self):
+    with self.test_session():
+      np_val = np.random.rand(4, 4, 4, 4).astype(np.float32)
+      c = constant_op.constant(np_val)
+      c_sparse = math_ops._as_indexed_slices(c)
+      c_sparse = ops.IndexedSlices(
+          c_sparse.values, math_ops.cast(c_sparse.indices, types.int64),
+          c_sparse.dense_shape)
+      self.assertAllEqual(np_val.shape, c_sparse.dense_shape.eval())
+      c_dense = math_ops.mul(c_sparse, 1.0)
+      self.assertAllClose(np_val, c_dense.eval())
+
+  def testWarnings(self):
+    # Smaller than the threshold: no warning.
+    c_sparse = ops.IndexedSlices(array_ops.placeholder(types.float32),
+                                 array_ops.placeholder(types.int32),
+                                 constant([4, 4, 4, 4]))
+    with warnings.catch_warnings(record=True) as w:
+      math_ops.mul(c_sparse, 1.0)
+    self.assertEqual(0, len(w))
+
+    # Greater than or equal to the threshold: warning.
+    c_sparse = ops.IndexedSlices(array_ops.placeholder(types.float32),
+                                 array_ops.placeholder(types.int32),
+                                 constant([100, 100, 100, 100]))
+    with warnings.catch_warnings(record=True) as w:
+      math_ops.mul(c_sparse, 1.0)
+    self.assertEqual(1, len(w))
+    self.assertTrue(
+        "with 100000000 elements. This may consume a large amount of memory."
+        in str(w[0].message))
+
+    # Unknown dense shape: warning.
+    c_sparse = ops.IndexedSlices(array_ops.placeholder(types.float32),
+                                 array_ops.placeholder(types.int32),
+                                 array_ops.placeholder(types.int32))
+    with warnings.catch_warnings(record=True) as w:
+      math_ops.mul(c_sparse, 1.0)
+    self.assertEqual(1, len(w))
+    self.assertTrue(
+        "of unknown shape. This may consume a large amount of memory."
+        in str(w[0].message))
+
+
+if __name__ == "__main__":
+  googletest.main()
diff --git a/tensorflow/python/ops/image_ops.py b/tensorflow/python/ops/image_ops.py
new file mode 100644
index 0000000000..1b4f4aef22
--- /dev/null
+++ b/tensorflow/python/ops/image_ops.py
@@ -0,0 +1,786 @@
+"""## Encoding and Decoding.
+
+TensorFlow provides Ops to decode and encode JPEG and PNG formats.  Encoded
+images are represented by scalar string Tensors, decoded images by 3-D uint8
+tensors of shape `[height, width, channels]`.
+
+The encode and decode Ops apply to one image at a time.  Their input and output
+are all of variable size.  If you need fixed size images, pass the output of
+the decode Ops to one of the cropping and resizing Ops.
+
+Note: The PNG encode and decode Ops support RGBA, but the conversions Ops
+presently only support RGB, HSV, and GrayScale.
+
+@@decode_jpeg
+@@encode_jpeg
+
+@@decode_png
+@@encode_png
+
+## Resizing.
+
+The resizing Ops accept input images as tensors of several types.  They always
+output resized images as float32 tensors.
+
+The convenience function [resize_images()](#resize_images) supports both 4-D
+and 3-D tensors as input and output.  4-D tensors are for batches of images,
+3-D tensors for individual images.
+
+Other resizing Ops only support 3-D individual images as input:
+[resize_area](#resize_area), [resize_bicubic](#resize_bicubic),
+[resize_bilinear](#resize_bilinear),
+[resize_nearest_neighbor](#resize_nearest_neighbor).
+
+Example:
+
+```python
+# Decode a JPG image and resize it to 299 by 299.
+image = tf.image.decode_jpeg(...)
+resized_image = tf.image.resize_bilinear(image, [299, 299])
+```
+
+<i>Maybe refer to the Queue examples that show how to add images to a Queue
+after resizing them to a fixed size, and how to dequeue batches of resized
+images from the Queue.</i>
+
+@@resize_images
+
+@@resize_area
+@@resize_bicubic
+@@resize_bilinear
+@@resize_nearest_neighbor
+
+
+## Cropping.
+
+@@resize_image_with_crop_or_pad
+
+@@pad_to_bounding_box
+@@crop_to_bounding_box
+@@random_crop
+@@extract_glimpse
+
+## Flipping and Transposing.
+
+@@flip_up_down
+@@random_flip_up_down
+
+@@flip_left_right
+@@random_flip_left_right
+
+@@transpose_image
+
+## Image Adjustments.
+
+TensorFlow provides functions to adjust images in various ways: brightness,
+contrast, hue, and saturation.  Each adjustment can be done with predefined
+parameters or with random parameters picked from predefined intervals.  Random
+adjustments are often useful to expand a training set and reduce overfitting.
+
+@@adjust_brightness
+@@random_brightness
+
+@@adjust_contrast
+@@random_contrast
+
+@@per_image_whitening
+"""
+import math
+
+import tensorflow.python.platform
+
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import random_seed
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.framework import types
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import clip_ops
+from tensorflow.python.ops import common_shapes
+from tensorflow.python.ops import constant_op
+from tensorflow.python.ops import gen_image_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import random_ops
+
+
+# pylint: disable=wildcard-import
+from tensorflow.python.ops.gen_image_ops import *
+from tensorflow.python.ops.gen_attention_ops import *
+# pylint: enable=wildcard-import
+
+ops.NoGradient('ResizeBilinear')
+ops.NoGradient('RandomCrop')
+
+
+def _ImageDimensions(images):
+  """Returns the dimensions of an image tensor.
+
+  Args:
+    images: 4-D Tensor of shape [batch, height, width, channels]
+
+  Returns:
+    list of integers [batch, height, width, channels]
+  """
+  # A simple abstraction to provide names for each dimension. This abstraction
+  # should make it simpler to switch dimensions in the future (e.g. if we ever
+  # want to switch height and width.)
+  return images.get_shape().as_list()
+
+
+def _Check3DImage(image):
+  """Assert that we are working with properly shaped image.
+
+  Args:
+    image: 3-D Tensor of shape [height, width, channels]
+
+  Raises:
+    ValueError: if image.shape is not a [3] vector.
+  """
+  if not image.get_shape().is_fully_defined():
+    raise ValueError('\'image\' must be fully defined.')
+  if image.get_shape().ndims != 3:
+    raise ValueError('\'image\' must be three-dimensional.')
+  if not all(x > 0 for x in image.get_shape()):
+    raise ValueError('all dims of \'image.shape\' must be > 0: %s' %
+                     image.get_shape())
+
+
+def _CheckAtLeast3DImage(image):
+  """Assert that we are working with properly shaped image.
+
+  Args:
+    image: >= 3-D Tensor of size [*, height, width, depth]
+
+  Raises:
+    ValueError: if image.shape is not a [>= 3] vector.
+  """
+  if not image.get_shape().is_fully_defined():
+    raise ValueError('\'image\' must be fully defined.')
+  if image.get_shape().ndims < 3:
+    raise ValueError('\'image\' must be at least three-dimensional.')
+  if not all(x > 0 for x in image.get_shape()):
+    raise ValueError('all dims of \'image.shape\' must be > 0: %s' %
+                     image.get_shape())
+
+
+def random_flip_up_down(image, seed=None):
+  """Randomly flips an image vertically (upside down).
+
+  With a 1 in 2 chance, outputs the contents of `image` flipped along the first
+  dimension, which is `height`.  Otherwise output the image as-is.
+
+  Args:
+    image: A 3-D tensor of shape `[height, width, channels].`
+    seed: A Python integer. Used to create a random seed.
+      See [`set_random_seed`](constant_op.md#set_random_seed) for behavior.
+
+  Returns:
+    A 3-D tensor of the same type and shape as `image`.
+
+  Raises:
+    ValueError: if the shape of `image` not supported.
+  """
+  _Check3DImage(image)
+  uniform_random = random_ops.random_uniform([], 0, 1.0, seed=seed)
+  mirror = math_ops.less(array_ops.pack([uniform_random, 1.0, 1.0]), 0.5)
+  return array_ops.reverse(image, mirror)
+
+
+def random_flip_left_right(image, seed=None):
+  """Randomly flip an image horizontally (left to right).
+
+  With a 1 in 2 chance, outputs the contents of `image` flipped along the
+  second dimension, which is `width`.  Otherwise output the image as-is.
+
+  Args:
+    image: A 3-D tensor of shape `[height, width, channels].`
+    seed: A Python integer. Used to create a random seed.
+      See [`set_random_seed`](constant_op.md#set_random_seed) for behavior.
+
+  Returns:
+    A 3-D tensor of the same type and shape as `image`.
+
+  Raises:
+    ValueError: if the shape of `image` not supported.
+  """
+  _Check3DImage(image)
+  uniform_random = random_ops.random_uniform([], 0, 1.0, seed=seed)
+  mirror = math_ops.less(array_ops.pack([1.0, uniform_random, 1.0]), 0.5)
+  return array_ops.reverse(image, mirror)
+
+
+def flip_left_right(image):
+  """Flip an image horizontally (left to right).
+
+  Outputs the contents of `image` flipped along the second dimension, which is
+  `width`.
+
+  See also `reverse()`.
+
+  Args:
+    image: A 3-D tensor of shape `[height, width, channels].`
+
+  Returns:
+    A 3-D tensor of the same type and shape as `image`.
+
+  Raises:
+    ValueError: if the shape of `image` not supported.
+  """
+  _Check3DImage(image)
+  return array_ops.reverse(image, [False, True, False])
+
+
+def flip_up_down(image):
+  """Flip an image horizontally (upside down).
+
+  Outputs the contents of `image` flipped along the first dimension, which is
+  `height`.
+
+  See also `reverse()`.
+
+  Args:
+    image: A 3-D tensor of shape `[height, width, channels].`
+
+  Returns:
+    A 3-D tensor of the same type and shape as `image`.
+
+  Raises:
+    ValueError: if the shape of `image` not supported.
+  """
+  _Check3DImage(image)
+  return array_ops.reverse(image, [True, False, False])
+
+
+def transpose_image(image):
+  """Transpose an image by swapping the first and second dimension.
+
+  See also `transpose()`.
+
+  Args:
+    image: 3-D tensor of shape `[height, width, channels]`
+
+  Returns:
+    A 3-D tensor of shape `[width, height, channels]`
+
+  Raises:
+    ValueError: if the shape of `image` not supported.
+  """
+  _Check3DImage(image)
+  return array_ops.transpose(image, [1, 0, 2], name='transpose_image')
+
+
+def pad_to_bounding_box(image, offset_height, offset_width, target_height,
+                        target_width):
+  """Pad `image` with zeros to the specified `height` and `width`.
+
+  Adds `offset_height` rows of zeros on top, `offset_width` columns of
+  zeros on the left, and then pads the image on the bottom and right
+  with zeros until it has dimensions `target_height`, `target_width`.
+
+  This op does nothing if `offset_*` is zero and the image already has size
+  `target_height` by `target_width`.
+
+  Args:
+    image: 3-D tensor with shape `[height, width, channels]`
+    offset_height: Number of rows of zeros to add on top.
+    offset_width: Number of columns of zeros to add on the left.
+    target_height: Height of output image.
+    target_width: Width of output image.
+
+  Returns:
+    3-D tensor of shape `[target_height, target_width, channels]`
+
+  Raises:
+    ValueError: If the shape of `image` is incompatible with the `offset_*` or
+      `target_*` arguments
+  """
+  _Check3DImage(image)
+  height, width, depth = _ImageDimensions(image)
+
+  if target_width < width:
+    raise ValueError('target_width must be >= width')
+  if target_height < height:
+    raise ValueError('target_height must be >= height')
+
+  after_padding_width = target_width - offset_width - width
+  after_padding_height = target_height - offset_height - height
+
+  if after_padding_width < 0:
+    raise ValueError('target_width not possible given '
+                     'offset_width and image width')
+  if after_padding_height < 0:
+    raise ValueError('target_height not possible given '
+                     'offset_height and image height')
+
+  # Do not pad on the depth dimensions.
+  if (offset_width or offset_height or after_padding_width or
+      after_padding_height):
+    paddings = [[offset_height, after_padding_height],
+                [offset_width, after_padding_width], [0, 0]]
+    padded = array_ops.pad(image, paddings)
+    padded.set_shape([target_height, target_width, depth])
+  else:
+    padded = image
+
+  return padded
+
+
+def crop_to_bounding_box(image, offset_height, offset_width, target_height,
+                         target_width):
+  """Crops an image to a specified bounding box.
+
+  This op cuts a rectangular part out of `image`. The top-left corner of the
+  returned image is at `offset_height, offset_width` in `image`, and its
+  lower-right corner is at
+  `offset_height + target_height, offset_width + target_width'.
+
+  Args:
+    image: 3-D tensor with shape `[height, width, channels]`
+    offset_height: Vertical coordinate of the top-left corner of the result in
+                   the input.
+    offset_width: Horizontal coordinate of the top-left corner of the result in
+                  the input.
+    target_height: Height of the result.
+    target_width: Width of the result.
+
+  Returns:
+    3-D tensor of image with shape `[target_height, target_width, channels]`
+
+  Raises:
+    ValueError: If the shape of `image` is incompatible with the `offset_*` or
+    `target_*` arguments
+  """
+  _Check3DImage(image)
+  height, width, _ = _ImageDimensions(image)
+
+  if offset_width < 0:
+    raise ValueError('offset_width must be >= 0.')
+  if offset_height < 0:
+    raise ValueError('offset_height must be >= 0.')
+
+  if width < (target_width + offset_width):
+    raise ValueError('width must be >= target + offset.')
+  if height < (target_height + offset_height):
+    raise ValueError('height must be >= target + offset.')
+
+  cropped = array_ops.slice(image, [offset_height, offset_width, 0],
+                            [target_height, target_width, -1])
+
+  return cropped
+
+
+def resize_image_with_crop_or_pad(image, target_height, target_width):
+  """Crops and/or pads an image to a target width and height.
+
+  Resizes an image to a target width and height by either centrally
+  cropping the image or padding it evenly with zeros.
+
+  If `width` or `height` is greater than the specified `target_width` or
+  `target_height` respectively, this op centrally crops along that dimension.
+  If `width` or `height` is smaller than the specified `target_width` or
+  `target_height` respectively, this op centrally pads with 0 along that
+  dimension.
+
+  Args:
+    image: 3-D tensor of shape [height, width, channels]
+    target_height: Target height.
+    target_width: Target width.
+
+  Raises:
+    ValueError: if `target_height` or `target_width` are zero or negative.
+
+  Returns:
+    Cropped and/or padded image of shape
+    `[target_height, target_width, channels]`
+  """
+  _Check3DImage(image)
+  original_height, original_width, _ = _ImageDimensions(image)
+
+  if target_width <= 0:
+    raise ValueError('target_width must be > 0.')
+  if target_height <= 0:
+    raise ValueError('target_height must be > 0.')
+
+  offset_crop_width = 0
+  offset_pad_width = 0
+  if target_width < original_width:
+    offset_crop_width = int((original_width - target_width) / 2)
+  elif target_width > original_width:
+    offset_pad_width = int((target_width - original_width) / 2)
+
+  offset_crop_height = 0
+  offset_pad_height = 0
+  if target_height < original_height:
+    offset_crop_height = int((original_height - target_height) / 2)
+  elif target_height > original_height:
+    offset_pad_height = int((target_height - original_height) / 2)
+
+  # Maybe crop if needed.
+  cropped = crop_to_bounding_box(image, offset_crop_height, offset_crop_width,
+                                 min(target_height, original_height),
+                                 min(target_width, original_width))
+
+  # Maybe pad if needed.
+  resized = pad_to_bounding_box(cropped, offset_pad_height, offset_pad_width,
+                                target_height, target_width)
+
+  if resized.get_shape().ndims is None:
+    raise ValueError('resized contains no shape.')
+  if not resized.get_shape()[0].is_compatible_with(target_height):
+    raise ValueError('resized height is not correct.')
+  if not resized.get_shape()[1].is_compatible_with(target_width):
+    raise ValueError('resized width is not correct.')
+  return resized
+
+
+class ResizeMethod(object):
+  BILINEAR = 0
+  NEAREST_NEIGHBOR = 1
+  BICUBIC = 2
+  AREA = 3
+
+
+def resize_images(images, new_height, new_width, method=ResizeMethod.BILINEAR):
+  """Resize `images` to `new_width`, `new_height` using the specified `method`.
+
+  Resized images will be distorted if their original aspect ratio is not
+  the same as `new_width`, `new_height`.  To avoid distortions see
+  [resize_image_with_crop_or_pad](#resize_image_with_crop_or_pad).
+
+  `method` can be one of:
+
+  *   <b>ResizeMethod.BILINEAR</b>: [Bilinear interpolation.]
+      (https://en.wikipedia.org/wiki/Bilinear_interpolation)
+  *   <b>ResizeMethod.NEAREST_NEIGHBOR</b>: [Nearest neighbor interpolation.]
+      (https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation)
+  *   <b>ResizeMethod.BICUBIC</b>: [Bicubic interpolation.]
+      (https://en.wikipedia.org/wiki/Bicubic_interpolation)
+  *   <b>ResizeMethod.AREA</b>: Area interpolation.
+
+  Args:
+    images: 4-D Tensor of shape `[batch, height, width, channels]` or
+            3-D Tensor of shape `[height, width, channels]`.
+    new_height: integer.
+    new_width: integer.
+    method: ResizeMethod.  Defaults to `ResizeMethod.BILINEAR`.
+
+  Raises:
+    ValueError: if the shape of `images` is incompatible with the
+      shape arguments to this function
+    ValueError: if an unsupported resize method is specified.
+
+  Returns:
+    If `images` was 4-D, a 4-D float Tensor of shape
+    `[batch, new_height, new_width, channels]`.
+    If `images` was 3-D, a 3-D float Tensor of shape
+    `[new_height, new_width, channels]`.
+  """
+  if images.get_shape().ndims is None:
+    raise ValueError('\'images\' contains no shape.')
+  # TODO(shlens): Migrate this functionality to the underlying Op's.
+  is_batch = True
+  if len(images.get_shape()) == 3:
+    is_batch = False
+    images = array_ops.expand_dims(images, 0)
+
+  _, height, width, depth = _ImageDimensions(images)
+
+  if width == new_width and height == new_height:
+    return images
+
+  if method == ResizeMethod.BILINEAR:
+    images = gen_image_ops.resize_bilinear(images, [new_height, new_width])
+  elif method == ResizeMethod.NEAREST_NEIGHBOR:
+    images = gen_image_ops.resize_nearest_neighbor(images, [new_height,
+                                                            new_width])
+  elif method == ResizeMethod.BICUBIC:
+    images = gen_image_ops.resize_bicubic(images, [new_height, new_width])
+  elif method == ResizeMethod.AREA:
+    images = gen_image_ops.resize_area(images, [new_height, new_width])
+  else:
+    raise ValueError('Resize method is not implemented.')
+
+  if not is_batch:
+    images = array_ops.reshape(images, [new_height, new_width, depth])
+  return images
+
+
+def per_image_whitening(image):
+  """Linearly scales `image` to have zero mean and unit norm.
+
+  This op computes `(x - mean) / adjusted_stddev`, where `mean` is the average
+  of all values in image, and
+  `adjusted_stddev = max(stddev, 1.0/srqt(image.NumElements()))`.
+
+  `stddev` is the standard deviation of all values in `image`. It is capped
+  away from zero to protect against division by 0 when handling uniform images.
+
+  Note that this implementation is limited:
+  *  It only whitens based on the statistics of an individual image.
+  *  It does not take into account the covariance structure.
+
+  Args:
+    image: 3-D tensor of shape `[height, width, channels]`.
+
+  Returns:
+    The whitened image with same shape as `image`.
+
+  Raises:
+    ValueError: if the shape of 'image' is incompatible with this function.
+  """
+  _Check3DImage(image)
+  height, width, depth = _ImageDimensions(image)
+  num_pixels = height * width * depth
+
+  image = math_ops.cast(image, dtype=types.float32)
+  image_mean = math_ops.reduce_mean(image)
+
+  variance = (math_ops.reduce_mean(math_ops.square(image)) -
+              math_ops.square(image_mean))
+  stddev = math_ops.sqrt(variance)
+
+  # Apply a minimum normalization that protects us against uniform images.
+  min_stddev = constant_op.constant(1.0 / math.sqrt(num_pixels))
+  pixel_value_scale = math_ops.maximum(stddev, min_stddev)
+  pixel_value_offset = image_mean
+
+  image = math_ops.sub(image, pixel_value_offset)
+  image = math_ops.div(image, pixel_value_scale)
+  return image
+
+
+def random_brightness(image, max_delta, seed=None):
+  """Adjust the brightness of images by a random factor.
+
+  Equivalent to `adjust_brightness()` using a `delta` randomly picked in the
+  interval `[-max_delta, max_delta)`.
+
+  Note that `delta` is picked as a float. Because for integer type images,
+  the brightness adjusted result is rounded before casting, integer images may
+  have modifications in the range `[-max_delta,max_delta]`.
+
+  Args:
+    image: 3-D tensor of shape `[height, width, channels]`.
+    max_delta: float, must be non-negative.
+    seed: A Python integer. Used to create a random seed.
+      See [`set_random_seed`](constant_op.md#set_random_seed) for behavior.
+
+  Returns:
+    3-D tensor of images of shape `[height, width, channels]`
+
+  Raises:
+    ValueError: if max_delta is negative.
+  """
+  _Check3DImage(image)
+
+  if max_delta < 0:
+    raise ValueError('max_delta must be non-negative.')
+
+  delta = random_ops.random_uniform([], -max_delta, max_delta, seed=seed)
+  return adjust_brightness(image, delta)
+
+
+def random_contrast(image, lower, upper, seed=None):
+  """Adjust the contrase of an image by a random factor.
+
+  Equivalent to `adjust_constrast()` but uses a `contrast_factor` randomly
+  picked in the interval `[lower, upper]`.
+
+  Args:
+    image: 3-D tensor of shape `[height, width, channels]`.
+    lower: float.  Lower bound for the random contrast factor.
+    upper: float.  Upper bound for the random contrast factor.
+    seed: A Python integer. Used to create a random seed.
+      See [`set_random_seed`](constant_op.md#set_random_seed) for behavior.
+
+  Returns:
+    3-D tensor of shape `[height, width, channels]`.
+
+  Raises:
+    ValueError: if `upper <= lower` or if `lower < 0`.
+  """
+  _Check3DImage(image)
+
+  if upper <= lower:
+    raise ValueError('upper must be > lower.')
+
+  if lower < 0:
+    raise ValueError('lower must be non-negative.')
+
+  # Generate an a float in [lower, upper]
+  contrast_factor = random_ops.random_uniform([], lower, upper, seed=seed)
+  return adjust_contrast(image, contrast_factor)
+
+
+def adjust_brightness(image, delta, min_value=None, max_value=None):
+  """Adjust the brightness of RGB or Grayscale images.
+
+  The value `delta` is added to all components of the tensor `image`. `image`
+  and `delta` are cast to `float` before adding, and the resulting values are
+  clamped to `[min_value, max_value]`. Finally, the result is cast back to
+  `images.dtype`.
+
+  If `min_value` or `max_value` are not given, they are set to the minimum and
+  maximum allowed values for `image.dtype` respectively.
+
+  Args:
+    image: A tensor.
+    delta: A scalar. Amount to add to the pixel values.
+    min_value: Minimum value for output.
+    max_value: Maximum value for output.
+
+  Returns:
+    A tensor of the same shape and type as `image`.
+  """
+  if min_value is None:
+    min_value = image.dtype.min
+  if max_value is None:
+    max_value = image.dtype.max
+
+  with ops.op_scope([image, delta, min_value, max_value], None,
+                    'adjust_brightness') as name:
+    adjusted = math_ops.add(
+        math_ops.cast(image, types.float32),
+        math_ops.cast(delta, types.float32),
+        name=name)
+    if image.dtype.is_integer:
+      rounded = math_ops.round(adjusted)
+    else:
+      rounded = adjusted
+    clipped = clip_ops.clip_by_value(rounded, float(min_value),
+                                     float(max_value))
+    output = math_ops.cast(clipped, image.dtype)
+    return output
+
+
+def adjust_contrast(images, contrast_factor, min_value=None, max_value=None):
+  """Adjust contrast of RGB or grayscale images.
+
+  `images` is a tensor of at least 3 dimensions.  The last 3 dimensions are
+  interpreted as `[height, width, channels]`.  The other dimensions only
+  represent a collection of images, such as `[batch, height, width, channels].`
+
+  Contrast is adjusted independently for each channel of each image.
+
+  For each channel, this Op first computes the mean of the image pixels in the
+  channel and then adjusts each component `x` of each pixel to
+  `(x - mean) * contrast_factor + mean`.
+
+  The adjusted values are then clipped to fit in the `[min_value, max_value]`
+  interval. If `min_value` or `max_value` is not given, it is replaced with the
+  minimum and maximum values for the data type of `images` respectively.
+
+  The contrast-adjusted image is always computed as `float`, and it is
+  cast back to its original type after clipping.
+
+  Args:
+    images: Images to adjust.  At least 3-D.
+    contrast_factor: A float multiplier for adjusting contrast.
+    min_value: Minimum value for clipping the adjusted pixels.
+    max_value: Maximum value for clipping the adjusted pixels.
+
+  Returns:
+    The constrast-adjusted image or images.
+
+  Raises:
+    ValueError: if the arguments are invalid.
+  """
+  _CheckAtLeast3DImage(images)
+
+  # If these are None, the min/max should be a nop, but still prevent overflows
+  # from the cast back to images.dtype at the end of adjust_contrast.
+  if min_value is None:
+    min_value = images.dtype.min
+  if max_value is None:
+    max_value = images.dtype.max
+
+  with ops.op_scope(
+      [images, contrast_factor, min_value,
+       max_value], None, 'adjust_contrast') as name:
+    adjusted = gen_image_ops.adjust_contrast(images,
+                                             contrast_factor=contrast_factor,
+                                             min_value=min_value,
+                                             max_value=max_value,
+                                             name=name)
+    if images.dtype.is_integer:
+      return math_ops.cast(math_ops.round(adjusted), images.dtype)
+    else:
+      return math_ops.cast(adjusted, images.dtype)
+
+
+ops.RegisterShape('AdjustContrast')(
+    common_shapes.unchanged_shape_with_rank_at_least(3))
+
+
+@ops.RegisterShape('ResizeBilinear')
+@ops.RegisterShape('ResizeNearestNeighbor')
+@ops.RegisterShape('ResizeBicubic')
+@ops.RegisterShape('ResizeArea')
+def _ResizeShape(op):
+  """Shape function for the resize_bilinear and resize_nearest_neighbor ops."""
+  input_shape = op.inputs[0].get_shape().with_rank(4)
+  size = tensor_util.ConstantValue(op.inputs[1])
+  if size is not None:
+    height = size[0]
+    width = size[1]
+  else:
+    height = None
+    width = None
+  return [tensor_shape.TensorShape(
+      [input_shape[0], height, width, input_shape[3]])]
+
+
+@ops.RegisterShape('DecodeJpeg')
+@ops.RegisterShape('DecodePng')
+def _ImageDecodeShape(op):
+  """Shape function for image decoding ops."""
+  unused_input_shape = op.inputs[0].get_shape().merge_with(
+      tensor_shape.scalar())
+  channels = op.get_attr('channels') or None
+  return [tensor_shape.TensorShape([None, None, channels])]
+
+
+@ops.RegisterShape('EncodeJpeg')
+@ops.RegisterShape('EncodePng')
+def _ImageEncodeShape(op):
+  """Shape function for image encoding ops."""
+  unused_input_shape = op.inputs[0].get_shape().with_rank(3)
+  return [tensor_shape.scalar()]
+
+
+@ops.RegisterShape('RandomCrop')
+def _random_cropShape(op):
+  """Shape function for the random_crop op."""
+  input_shape = op.inputs[0].get_shape().with_rank(3)
+  unused_size_shape = op.inputs[1].get_shape().merge_with(
+      tensor_shape.vector(2))
+  size = tensor_util.ConstantValue(op.inputs[1])
+  if size is not None:
+    height = size[0]
+    width = size[1]
+  else:
+    height = None
+    width = None
+  channels = input_shape[2]
+  return [tensor_shape.TensorShape([height, width, channels])]
+
+
+def random_crop(image, size, seed=None, name=None):
+  """Randomly crops `image` to size `[target_height, target_width]`.
+
+  The offset of the output within `image` is uniformly random. `image` always
+  fully contains the result.
+
+  Args:
+    image: 3-D tensor of shape `[height, width, channels]`
+    size: 1-D tensor with two elements, specifying target `[height, width]`
+    seed: A Python integer. Used to create a random seed.
+      See [`set_random_seed`](constant_op.md#set_random_seed) for behavior.
+    name: A name for this operation (optional).
+
+  Returns:
+    A cropped 3-D tensor of shape `[target_height, target_width, channels]`.
+  """
+  seed1, seed2 = random_seed.get_seed(seed)
+  return gen_image_ops.random_crop(image, size, seed=seed1, seed2=seed2,
+                                   name=name)
diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py
new file mode 100644
index 0000000000..2c51299198
--- /dev/null
+++ b/tensorflow/python/ops/image_ops_test.py
@@ -0,0 +1,771 @@
+"""Tests for tensorflow.ops.image_ops."""
+import math
+
+import tensorflow.python.platform
+
+import numpy as np
+
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import constant_op
+from tensorflow.python.ops import image_ops
+from tensorflow.python.ops import io_ops
+from tensorflow.python.platform import googletest
+
+
+class FlipTest(test_util.TensorFlowTestCase):
+
+  def testIdempotentLeftRight(self):
+    x_np = np.array([[1, 2, 3], [1, 2, 3]], dtype=np.uint8).reshape([2, 3, 1])
+    for use_gpu in [False, True]:
+      with self.test_session(use_gpu=use_gpu):
+        x_tf = constant_op.constant(x_np, shape=x_np.shape)
+        y = image_ops.flip_left_right(image_ops.flip_left_right(x_tf))
+        y_tf = y.eval()
+        self.assertAllEqual(y_tf, x_np)
+
+  def testLeftRight(self):
+    x_np = np.array([[1, 2, 3], [1, 2, 3]], dtype=np.uint8).reshape([2, 3, 1])
+    y_np = np.array([[3, 2, 1], [3, 2, 1]], dtype=np.uint8).reshape([2, 3, 1])
+
+    for use_gpu in [False, True]:
+      with self.test_session(use_gpu=use_gpu):
+        x_tf = constant_op.constant(x_np, shape=x_np.shape)
+        y = image_ops.flip_left_right(x_tf)
+        y_tf = y.eval()
+        self.assertAllEqual(y_tf, y_np)
+
+  def testIdempotentUpDown(self):
+    x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([2, 3, 1])
+
+    for use_gpu in [False, True]:
+      with self.test_session(use_gpu=use_gpu):
+        x_tf = constant_op.constant(x_np, shape=x_np.shape)
+        y = image_ops.flip_up_down(image_ops.flip_up_down(x_tf))
+        y_tf = y.eval()
+        self.assertAllEqual(y_tf, x_np)
+
+  def testUpDown(self):
+    x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([2, 3, 1])
+    y_np = np.array([[4, 5, 6], [1, 2, 3]], dtype=np.uint8).reshape([2, 3, 1])
+
+    for use_gpu in [False, True]:
+      with self.test_session(use_gpu=use_gpu):
+        x_tf = constant_op.constant(x_np, shape=x_np.shape)
+        y = image_ops.flip_up_down(x_tf)
+        y_tf = y.eval()
+        self.assertAllEqual(y_tf, y_np)
+
+  def testIdempotentTranspose(self):
+    x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([2, 3, 1])
+
+    for use_gpu in [False, True]:
+      with self.test_session(use_gpu=use_gpu):
+        x_tf = constant_op.constant(x_np, shape=x_np.shape)
+        y = image_ops.transpose_image(image_ops.transpose_image(x_tf))
+        y_tf = y.eval()
+        self.assertAllEqual(y_tf, x_np)
+
+  def testTranspose(self):
+    x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([2, 3, 1])
+    y_np = np.array([[1, 4], [2, 5], [3, 6]], dtype=np.uint8).reshape([3, 2, 1])
+
+    for use_gpu in [False, True]:
+      with self.test_session(use_gpu=use_gpu):
+        x_tf = constant_op.constant(x_np, shape=x_np.shape)
+        y = image_ops.transpose_image(x_tf)
+        y_tf = y.eval()
+        self.assertAllEqual(y_tf, y_np)
+
+
+class RandomFlipTest(test_util.TensorFlowTestCase):
+
+  def testRandomLeftRight(self):
+    x_np = np.array([0, 1], dtype=np.uint8).reshape([1, 2, 1])
+    num_iterations = 500
+
+    hist = [0, 0]
+    with self.test_session():
+      x_tf = constant_op.constant(x_np, shape=x_np.shape)
+      y = image_ops.random_flip_left_right(x_tf)
+      for _ in xrange(num_iterations):
+        y_np = y.eval().flatten()[0]
+        hist[y_np] += 1
+
+    # Ensure that each entry is observed within 4 standard deviations.
+    four_stddev = 4.0 * np.sqrt(num_iterations / 2.0)
+    self.assertAllClose(hist, [num_iterations / 2.0] * 2, atol=four_stddev)
+
+  def testRandomUpDown(self):
+    x_np = np.array([0, 1], dtype=np.uint8).reshape([2, 1, 1])
+    num_iterations = 500
+
+    hist = [0, 0]
+    with self.test_session():
+      x_tf = constant_op.constant(x_np, shape=x_np.shape)
+      y = image_ops.random_flip_up_down(x_tf)
+      for _ in xrange(num_iterations):
+        y_np = y.eval().flatten()[0]
+        hist[y_np] += 1
+
+    # Ensure that each entry is observed within 4 standard deviations.
+    four_stddev = 4.0 * np.sqrt(num_iterations / 2.0)
+    self.assertAllClose(hist, [num_iterations / 2.0] * 2, atol=four_stddev)
+
+
+class AdjustContrastTest(test_util.TensorFlowTestCase):
+
+  def _testContrast(self, x_np, y_np, contrast_factor, min_value, max_value):
+    for use_gpu in [True, False]:
+      with self.test_session(use_gpu=use_gpu):
+        x = constant_op.constant(x_np, shape=x_np.shape)
+        y = image_ops.adjust_contrast(x,
+                                      contrast_factor,
+                                      min_value=min_value,
+                                      max_value=max_value)
+        y_tf = y.eval()
+        self.assertAllEqual(y_tf, y_np)
+
+  def testDoubleContrastUint8(self):
+    x_shape = [1, 2, 2, 3]
+    x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
+    x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
+
+    y_data = [0, 0, 0, 63, 169, 255, 29, 0, 255, 135, 255, 0]
+    y_np = np.array(y_data, dtype=np.uint8).reshape(x_shape)
+
+    self._testContrast(x_np,
+                       y_np,
+                       contrast_factor=2.0,
+                       min_value=None,
+                       max_value=None)
+
+  def testDoubleContrastFloat(self):
+    x_shape = [1, 2, 2, 3]
+    x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
+    x_np = np.array(x_data, dtype=np.float).reshape(x_shape)
+
+    y_data = [0, 0, 0, 62.75, 169.25, 255, 28.75, 0, 255, 134.75, 255, 0]
+    y_np = np.array(y_data, dtype=np.float).reshape(x_shape)
+
+    self._testContrast(x_np,
+                       y_np,
+                       contrast_factor=2.0,
+                       min_value=0,
+                       max_value=255)
+
+  def testHalfContrastUint8(self):
+    x_shape = [1, 2, 2, 3]
+    x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
+    x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
+
+    y_data = [23, 53, 66, 50, 118, 172, 41, 54, 176, 68, 178, 60]
+    y_np = np.array(y_data, dtype=np.uint8).reshape(x_shape)
+
+    self._testContrast(x_np,
+                       y_np,
+                       contrast_factor=0.5,
+                       min_value=None,
+                       max_value=None)
+
+  def testBatchDoubleContrast(self):
+    x_shape = [2, 1, 2, 3]
+    x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
+    x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
+
+    y_data = [0, 0, 0, 81, 200, 255, 11, 0, 255, 117, 255, 0]
+    y_np = np.array(y_data, dtype=np.uint8).reshape(x_shape)
+
+    self._testContrast(x_np,
+                       y_np,
+                       contrast_factor=2.0,
+                       min_value=None,
+                       max_value=None)
+
+
+class AdjustBrightnessTest(test_util.TensorFlowTestCase):
+
+  def _testBrightness(self, x_np, y_np, delta, min_value, max_value):
+    with self.test_session():
+      x = constant_op.constant(x_np, shape=x_np.shape)
+      y = image_ops.adjust_brightness(x,
+                                      delta,
+                                      min_value=min_value,
+                                      max_value=max_value)
+      y_tf = y.eval()
+      self.assertAllEqual(y_tf, y_np)
+
+  def testPositiveDeltaUint8(self):
+    x_shape = [2, 2, 3]
+    x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
+    x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
+
+    y_data = [10, 15, 23, 64, 145, 236, 47, 18, 244, 100, 255, 11]
+    y_np = np.array(y_data, dtype=np.uint8).reshape(x_shape)
+
+    self._testBrightness(x_np, y_np, delta=10.0, min_value=None, max_value=None)
+
+  def testPositiveDeltaFloat(self):
+    x_shape = [2, 2, 3]
+    x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
+    x_np = np.array(x_data, dtype=np.float32).reshape(x_shape)
+
+    y_data = [10, 15, 23, 64, 145, 236, 47, 18, 244, 100, 265, 11]
+    y_np = np.array(y_data, dtype=np.float32).reshape(x_shape)
+
+    self._testBrightness(x_np, y_np, delta=10.0, min_value=None, max_value=None)
+
+  def testNegativeDelta(self):
+    x_shape = [2, 2, 3]
+    x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
+    x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
+
+    y_data = [5, 5, 5, 44, 125, 216, 27, 5, 224, 80, 245, 5]
+    y_np = np.array(y_data, dtype=np.uint8).reshape(x_shape)
+
+    self._testBrightness(x_np, y_np, delta=-10.0, min_value=5, max_value=None)
+
+
+class RandomCropTest(test_util.TensorFlowTestCase):
+
+  def testNoOp(self):
+    # No random cropping is performed since the target width and height
+    # are match the image dimensions.
+    height = 4
+    width = 5
+    x_shape = [height, width, 3]
+    x_np = np.arange(0, np.prod(x_shape), dtype=np.int32).reshape(x_shape)
+    target_shape_np = np.array([height, width], dtype=np.int64)
+
+    with self.test_session():
+      x = constant_op.constant(x_np, shape=x_shape)
+      target_shape = constant_op.constant(target_shape_np, shape=[2])
+      y = image_ops.random_crop(x, target_shape)
+      y_tf = y.eval()
+      self.assertAllEqual(y_tf, x_np)
+
+  def testRandomization(self):
+    # Run 1x1 crop num_samples times in an image and ensure that one finds each
+    # pixel 1/num_pixels of the time.
+    num_samples = 1000
+    height = 5
+    width = 4
+
+    num_pixels = height * width
+    data = np.arange(num_pixels).reshape([height, width, 1])
+    x_np = np.array(data).astype(np.int32)
+
+    target_shape_np = np.array([1, 1], dtype=np.int64)
+
+    y = []
+    with self.test_session():
+      x = constant_op.constant(x_np, shape=x_np.shape)
+      target_shape = constant_op.constant(target_shape_np, shape=[2])
+      y_tf = image_ops.random_crop(x, target_shape)
+      for _ in xrange(num_samples):
+        y_np = y_tf.eval()
+        self.assertAllEqual(y_np.shape, [1, 1, 1])
+        y.extend(y_np.flatten())
+
+    # Calculate the mean and 4 * standard deviation.
+    mean = [num_samples / num_pixels] * num_pixels
+    four_stddev = 4.0 * np.sqrt(mean)
+
+    # Ensure that each entry is observed in 1/num_pixels of the samples
+    # within 4 standard deviations.
+    counts = np.bincount(y)
+    self.assertAllClose(counts, mean, atol=four_stddev)
+
+
+class PerImageWhiteningTest(test_util.TensorFlowTestCase):
+
+  def _NumpyPerImageWhitening(self, x):
+    num_pixels = np.prod(x.shape)
+    x2 = np.square(x).astype(np.float32)
+    mn = np.mean(x)
+    vr = np.mean(x2) - (mn * mn)
+    stddev = max(math.sqrt(vr), 1.0 / math.sqrt(num_pixels))
+
+    y = x.astype(np.float32)
+    y -= mn
+    y /= stddev
+    return y
+
+  def testBasic(self):
+    x_shape = [13, 9, 3]
+    x_np = np.arange(0, np.prod(x_shape), dtype=np.int32).reshape(x_shape)
+    y_np = self._NumpyPerImageWhitening(x_np)
+
+    with self.test_session():
+      x = constant_op.constant(x_np, shape=x_shape)
+      y = image_ops.per_image_whitening(x)
+      y_tf = y.eval()
+      self.assertAllClose(y_tf, y_np, atol=1e-4)
+
+
+class CropToBoundingBoxTest(test_util.TensorFlowTestCase):
+
+  def testNoOp(self):
+    x_shape = [13, 9, 3]
+    x_np = np.ones(x_shape, dtype=np.float32)
+
+    with self.test_session():
+      x = constant_op.constant(x_np, shape=x_shape)
+      target_height = x_shape[0]
+      target_width = x_shape[1]
+      y = image_ops.crop_to_bounding_box(x, 0, 0, target_height, target_width)
+      y_tf = y.eval()
+      self.assertAllEqual(y_tf, x_np)
+
+  def testCropping(self):
+    x_np = np.arange(0, 30, dtype=np.int32).reshape([6, 5, 1])
+
+    offset_height = 1
+    after_height = 2
+
+    offset_width = 0
+    after_width = 3
+
+    target_height = x_np.shape[0] - offset_height - after_height
+    target_width = x_np.shape[1] - offset_width - after_width
+
+    y_np = x_np[offset_height:offset_height + target_height,
+                offset_width:offset_width + target_width, :]
+
+    with self.test_session():
+      x = constant_op.constant(x_np, shape=x_np.shape)
+      y = image_ops.crop_to_bounding_box(x, offset_height, offset_width,
+                                         target_height, target_width)
+      y_tf = y.eval()
+      self.assertAllEqual(y_tf.flatten(), y_np.flatten())
+
+
+class PadToBoundingBoxTest(test_util.TensorFlowTestCase):
+
+  def testNoOp(self):
+    x_shape = [13, 9, 3]
+    x_np = np.ones(x_shape, dtype=np.float32)
+
+    target_height = x_shape[0]
+    target_width = x_shape[1]
+
+    with self.test_session():
+      x = constant_op.constant(x_np, shape=x_shape)
+      y = image_ops.pad_to_bounding_box(x, 0, 0, target_height, target_width)
+      y_tf = y.eval()
+      self.assertAllEqual(y_tf, x_np)
+
+  def testPadding(self):
+    x_shape = [3, 4, 1]
+    x_np = np.ones(x_shape, dtype=np.float32)
+
+    offset_height = 2
+    after_height = 3
+
+    offset_width = 1
+    after_width = 4
+
+    target_height = x_shape[0] + offset_height + after_height
+    target_width = x_shape[1] + offset_width + after_width
+
+    # Note the padding are along batch, height, width and depth.
+    paddings = ((offset_height, after_height),
+                (offset_width, after_width),
+                (0, 0))
+
+    y_np = np.pad(x_np, paddings, 'constant')
+
+    with self.test_session():
+      x = constant_op.constant(x_np, shape=x_shape)
+      y = image_ops.pad_to_bounding_box(x, offset_height, offset_width,
+                                        target_height, target_width)
+      y_tf = y.eval()
+      self.assertAllEqual(y_tf, y_np)
+
+
+class ResizeImagesTest(test_util.TensorFlowTestCase):
+
+  OPTIONS = [image_ops.ResizeMethod.BILINEAR,
+             image_ops.ResizeMethod.NEAREST_NEIGHBOR,
+             image_ops.ResizeMethod.BICUBIC,
+             image_ops.ResizeMethod.AREA]
+
+  def testNoOp(self):
+    img_shape = [1, 6, 4, 1]
+    data = [128, 128, 64, 64,
+            128, 128, 64, 64,
+            64, 64, 128, 128,
+            64, 64, 128, 128,
+            50, 50, 100, 100,
+            50, 50, 100, 100]
+    img_np = np.array(data, dtype=np.uint8).reshape(img_shape)
+
+    target_height = 6
+    target_width = 4
+
+    for opt in self.OPTIONS:
+      with self.test_session():
+        image = constant_op.constant(img_np, shape=img_shape)
+        y = image_ops.resize_images(image, target_height, target_width, opt)
+        resized = y.eval()
+        self.assertAllClose(resized, img_np, atol=1e-5)
+
+  def testResizeDown(self):
+
+    data = [128, 128, 64, 64,
+            128, 128, 64, 64,
+            64, 64, 128, 128,
+            64, 64, 128, 128,
+            50, 50, 100, 100,
+            50, 50, 100, 100]
+    expected_data = [128, 64,
+                     64, 128,
+                     50, 100]
+    target_height = 3
+    target_width = 2
+
+    # Test out 3-D and 4-D image shapes.
+    img_shapes = [[1, 6, 4, 1], [6, 4, 1]]
+    target_shapes = [[1, target_height, target_width, 1],
+                     [target_height, target_width, 1]]
+
+    for target_shape, img_shape in zip(target_shapes, img_shapes):
+      img_np = np.array(data, dtype=np.uint8).reshape(img_shape)
+
+      for opt in self.OPTIONS:
+        with self.test_session():
+          image = constant_op.constant(img_np, shape=img_shape)
+          y = image_ops.resize_images(image, target_height, target_width, opt)
+          expected = np.array(expected_data).reshape(target_shape)
+          resized = y.eval()
+          self.assertAllClose(resized, expected, atol=1e-5)
+
+  def testResizeUp(self):
+    img_shape = [1, 3, 2, 1]
+    data = [128, 64,
+            64, 128,
+            50, 100]
+    img_np = np.array(data, dtype=np.uint8).reshape(img_shape)
+
+    target_height = 6
+    target_width = 4
+    expected_data = {}
+    expected_data[image_ops.ResizeMethod.BILINEAR] = [
+        128.0, 96.0, 64.0, 64.0,
+        96.0, 96.0, 96.0, 96.0,
+        64.0, 96.0, 128.0, 128.0,
+        57.0, 85.5, 114.0, 114.0,
+        50.0, 75.0, 100.0, 100.0,
+        50.0, 75.0, 100.0, 100.0]
+    expected_data[image_ops.ResizeMethod.NEAREST_NEIGHBOR] = [
+        128.0, 128.0, 64.0, 64.0,
+        128.0, 128.0, 64.0, 64.0,
+        64.0, 64.0, 128.0, 128.0,
+        64.0, 64.0, 128.0, 128.0,
+        50.0, 50.0, 100.0, 100.0,
+        50.0, 50.0, 100.0, 100.0]
+    expected_data[image_ops.ResizeMethod.AREA] = [
+        128.0, 128.0, 64.0, 64.0,
+        128.0, 128.0, 64.0, 64.0,
+        64.0, 64.0, 128.0, 128.0,
+        64.0, 64.0, 128.0, 128.0,
+        50.0, 50.0, 100.0, 100.0,
+        50.0, 50.0, 100.0, 100.0]
+
+    for opt in [
+        image_ops.ResizeMethod.BILINEAR,
+        image_ops.ResizeMethod.NEAREST_NEIGHBOR,
+        image_ops.ResizeMethod.AREA]:
+      with self.test_session():
+        image = constant_op.constant(img_np, shape=img_shape)
+        y = image_ops.resize_images(image, target_height, target_width, opt)
+        resized = y.eval()
+        expected = np.array(expected_data[opt]).reshape(
+            [1, target_height, target_width, 1])
+        self.assertAllClose(resized, expected, atol=1e-05)
+
+  def testResizeUpBicubic(self):
+    img_shape = [1, 6, 6, 1]
+    data = [128, 128, 64, 64, 128, 128, 64, 64,
+            64, 64, 128, 128, 64, 64, 128, 128,
+            50, 50, 100, 100, 50, 50, 100, 100,
+            50, 50, 100, 100, 50, 50, 100, 100,
+            50, 50, 100, 100]
+    img_np = np.array(data, dtype=np.uint8).reshape(img_shape)
+
+    target_height = 8
+    target_width = 8
+    expected_data = [128, 135, 96, 55, 64, 114, 134, 128,
+                     78, 81, 68, 52, 57, 118, 144, 136,
+                     55, 49, 79, 109, 103, 89, 83, 84,
+                     74, 70, 95, 122, 115, 69, 49, 55,
+                     100, 105, 75, 43, 50, 89, 105, 100,
+                     57, 54, 74, 96, 91, 65, 55, 58,
+                     70, 69, 75, 81, 80, 72, 69, 70,
+                     105, 112, 75, 36, 45, 92, 111, 105]
+
+    with self.test_session():
+      image = constant_op.constant(img_np, shape=img_shape)
+      y = image_ops.resize_images(image, target_height, target_width,
+                                  image_ops.ResizeMethod.BICUBIC)
+      resized = y.eval()
+      expected = np.array(expected_data).reshape(
+          [1, target_height, target_width, 1])
+      self.assertAllClose(resized, expected, atol=1)
+
+  def testResizeDownArea(self):
+    img_shape = [1, 6, 6, 1]
+    data = [128, 64, 32, 16, 8, 4,
+            4, 8, 16, 32, 64, 128,
+            128, 64, 32, 16, 8, 4,
+            5, 10, 15, 20, 25, 30,
+            30, 25, 20, 15, 10, 5,
+            5, 10, 15, 20, 25, 30]
+    img_np = np.array(data, dtype=np.uint8).reshape(img_shape)
+
+    target_height = 4
+    target_width = 4
+    expected_data = [73, 33, 23, 39,
+                     73, 33, 23, 39,
+                     14, 16, 19, 21,
+                     14, 16, 19, 21]
+
+    with self.test_session():
+      image = constant_op.constant(img_np, shape=img_shape)
+      y = image_ops.resize_images(image, target_height, target_width,
+                                  image_ops.ResizeMethod.AREA)
+      expected = np.array(expected_data).reshape(
+          [1, target_height, target_width, 1])
+      resized = y.eval()
+      self.assertAllClose(resized, expected, atol=1)
+
+
+class ResizeImageWithCropOrPadTest(test_util.TensorFlowTestCase):
+
+  def _ResizeImageWithCropOrPad(self, original, original_shape,
+                                expected, expected_shape):
+    x_np = np.array(original, dtype=np.uint8).reshape(original_shape)
+    y_np = np.array(expected).reshape(expected_shape)
+
+    target_height = expected_shape[0]
+    target_width = expected_shape[1]
+
+    with self.test_session():
+      image = constant_op.constant(x_np, shape=original_shape)
+      y = image_ops.resize_image_with_crop_or_pad(image,
+                                                  target_height,
+                                                  target_width)
+      resized = y.eval()
+      self.assertAllClose(resized, y_np, atol=1e-5)
+
+  def testBasic(self):
+    # Basic no-op.
+    original = [1, 2, 3, 4,
+                5, 6, 7, 8]
+    self._ResizeImageWithCropOrPad(original, [2, 4, 1],
+                                   original, [2, 4, 1])
+
+  def testPad(self):
+    # Pad even along col.
+    original = [1, 2, 3, 4, 5, 6, 7, 8]
+    expected = [0, 1, 2, 3, 4, 0,
+                0, 5, 6, 7, 8, 0]
+    self._ResizeImageWithCropOrPad(original, [2, 4, 1],
+                                   expected, [2, 6, 1])
+    # Pad odd along col.
+    original = [1, 2, 3, 4,
+                5, 6, 7, 8]
+    expected = [0, 1, 2, 3, 4, 0, 0,
+                0, 5, 6, 7, 8, 0, 0]
+    self._ResizeImageWithCropOrPad(original, [2, 4, 1],
+                                   expected, [2, 7, 1])
+
+    # Pad even along row.
+    original = [1, 2, 3, 4,
+                5, 6, 7, 8]
+    expected = [0, 0, 0, 0,
+                1, 2, 3, 4,
+                5, 6, 7, 8,
+                0, 0, 0, 0]
+    self._ResizeImageWithCropOrPad(original, [2, 4, 1],
+                                   expected, [4, 4, 1])
+    # Pad odd along row.
+    original = [1, 2, 3, 4,
+                5, 6, 7, 8]
+    expected = [0, 0, 0, 0,
+                1, 2, 3, 4,
+                5, 6, 7, 8,
+                0, 0, 0, 0,
+                0, 0, 0, 0]
+    self._ResizeImageWithCropOrPad(original, [2, 4, 1],
+                                   expected, [5, 4, 1])
+
+  def testCrop(self):
+    # Crop even along col.
+    original = [1, 2, 3, 4,
+                5, 6, 7, 8]
+    expected = [2, 3,
+                6, 7]
+    self._ResizeImageWithCropOrPad(original, [2, 4, 1],
+                                   expected, [2, 2, 1])
+    # Crop odd along col.
+
+    original = [1, 2, 3, 4, 5, 6,
+                7, 8, 9, 10, 11, 12]
+    expected = [2, 3, 4,
+                8, 9, 10]
+    self._ResizeImageWithCropOrPad(original, [2, 6, 1],
+                                   expected, [2, 3, 1])
+
+    # Crop even along row.
+    original = [1, 2,
+                3, 4,
+                5, 6,
+                7, 8]
+    expected = [3, 4,
+                5, 6]
+    self._ResizeImageWithCropOrPad(original, [4, 2, 1],
+                                   expected, [2, 2, 1])
+
+    # Crop odd along row.
+    original = [1, 2,
+                3, 4,
+                5, 6,
+                7, 8,
+                9, 10,
+                11, 12,
+                13, 14,
+                15, 16]
+    expected = [3, 4,
+                5, 6,
+                7, 8,
+                9, 10,
+                11, 12]
+    self._ResizeImageWithCropOrPad(original, [8, 2, 1],
+                                   expected, [5, 2, 1])
+
+  def testCropAndPad(self):
+    # Pad along row but crop along col.
+    original = [1, 2, 3, 4,
+                5, 6, 7, 8]
+    expected = [0, 0,
+                2, 3,
+                6, 7,
+                0, 0]
+    self._ResizeImageWithCropOrPad(original, [2, 4, 1],
+                                   expected, [4, 2, 1])
+
+    # Crop along row but pad along col.
+    original = [1, 2,
+                3, 4,
+                5, 6,
+                7, 8]
+    expected = [0, 3, 4, 0,
+                0, 5, 6, 0]
+    self._ResizeImageWithCropOrPad(original, [4, 2, 1],
+                                   expected, [2, 4, 1])
+
+
+def _SimpleColorRamp():
+  """Build a simple color ramp RGB image."""
+  w, h = 256, 200
+  i = np.arange(h)[:, None]
+  j = np.arange(w)
+  image = np.empty((h, w, 3), dtype=np.uint8)
+  image[:, :, 0] = i
+  image[:, :, 1] = j
+  image[:, :, 2] = (i + j) >> 1
+  return image
+
+
+class JpegTest(test_util.TensorFlowTestCase):
+
+  # TODO(irving): Add self.assertAverageLess or similar to test_util
+  def averageError(self, image0, image1):
+    self.assertEqual(image0.shape, image1.shape)
+    image0 = image0.astype(int)  # Avoid overflow
+    return np.abs(image0 - image1).sum() / float(np.prod(image0.shape))
+
+  def testExisting(self):
+    # Read a real jpeg and verify shape
+    path = ('tensorflow/core/lib/jpeg/testdata/'
+            'jpeg_merge_test1.jpg')
+    with self.test_session() as sess:
+      jpeg0 = io_ops.read_file(path)
+      image0 = image_ops.decode_jpeg(jpeg0)
+      image1 = image_ops.decode_jpeg(image_ops.encode_jpeg(image0))
+      jpeg0, image0, image1 = sess.run([jpeg0, image0, image1])
+      self.assertEqual(len(jpeg0), 3771)
+      self.assertEqual(image0.shape, (256, 128, 3))
+      self.assertLess(self.averageError(image0, image1), 0.8)
+
+  def testSynthetic(self):
+    with self.test_session() as sess:
+      # Encode it, then decode it, then encode it
+      image0 = constant_op.constant(_SimpleColorRamp())
+      jpeg0 = image_ops.encode_jpeg(image0)
+      image1 = image_ops.decode_jpeg(jpeg0)
+      image2 = image_ops.decode_jpeg(image_ops.encode_jpeg(image1))
+      jpeg0, image0, image1, image2 = sess.run([jpeg0, image0, image1, image2])
+
+      # The decoded-encoded image should be similar to the input
+      self.assertLess(self.averageError(image0, image1), 0.6)
+
+      # We should be very close to a fixpoint
+      self.assertLess(self.averageError(image1, image2), 0.02)
+
+      # Smooth ramps compress well (input size is 153600)
+      self.assertGreaterEqual(len(jpeg0), 5000)
+      self.assertLessEqual(len(jpeg0), 6000)
+
+  def testShape(self):
+    with self.test_session() as sess:
+      jpeg = constant_op.constant('nonsense')
+      for channels in 0, 1, 3:
+        image = image_ops.decode_jpeg(jpeg, channels=channels)
+        self.assertEqual(image.get_shape().as_list(),
+                         [None, None, channels or None])
+
+
+class PngTest(test_util.TensorFlowTestCase):
+
+  def testExisting(self):
+    # Read some real PNGs, converting to different channel numbers
+    prefix = 'tensorflow/core/lib/png/testdata/'
+    inputs = (1, 'lena_gray.png'), (4, 'lena_rgba.png')
+    for channels_in, filename in inputs:
+      for channels in 0, 1, 3, 4:
+        with self.test_session() as sess:
+          png0 = io_ops.read_file(prefix + filename)
+          image0 = image_ops.decode_png(png0, channels=channels)
+          png0, image0 = sess.run([png0, image0])
+          self.assertEqual(image0.shape, (26, 51, channels or channels_in))
+          if channels == channels_in:
+            image1 = image_ops.decode_png(image_ops.encode_png(image0))
+            self.assertAllEqual(image0, image1.eval())
+
+  def testSynthetic(self):
+    with self.test_session() as sess:
+      # Encode it, then decode it
+      image0 = constant_op.constant(_SimpleColorRamp())
+      png0 = image_ops.encode_png(image0, compression=7)
+      image1 = image_ops.decode_png(png0)
+      png0, image0, image1 = sess.run([png0, image0, image1])
+
+      # PNG is lossless
+      self.assertAllEqual(image0, image1)
+
+      # Smooth ramps compress well, but not too well
+      self.assertGreaterEqual(len(png0), 400)
+      self.assertLessEqual(len(png0), 750)
+
+  def testShape(self):
+    with self.test_session() as sess:
+      png = constant_op.constant('nonsense')
+      for channels in 0, 1, 3:
+        image = image_ops.decode_png(png, channels=channels)
+        self.assertEqual(image.get_shape().as_list(),
+                         [None, None, channels or None])
+
+
+if __name__ == '__main__':
+  googletest.main()
diff --git a/tensorflow/python/ops/init_ops.py b/tensorflow/python/ops/init_ops.py
new file mode 100644
index 0000000000..09c8801e0e
--- /dev/null
+++ b/tensorflow/python/ops/init_ops.py
@@ -0,0 +1,181 @@
+"""Operations often used for initializing tensors."""
+
+import math
+from tensorflow.python.framework import types
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import constant_op
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops import random_ops
+
+
+# TODO(mrry): PEP8 these.
+def constant_initializer(value=0.0):
+  """Returns an initializer that generates Tensors with a single value.
+
+  Args:
+    value: A Python scalar. All elements of the initialized variable
+      will be set to this value.
+
+  Returns:
+    An initializer that generates Tensors with a single value.
+  """
+  def _initializer(shape, dtype=types.float32):
+    return constant_op.constant(value, dtype=dtype, shape=shape)
+  return _initializer
+
+def random_uniform_initializer(minval=0.0, maxval=1.0, seed=None):
+  """Returns an initializer that generates Tensors with a uniform distribution.
+
+  Args:
+    minval: a python scalar or a scalar tensor. lower bound of the range
+      of random values to generate.
+    maxval: a python scalar or a scalar tensor. upper bound of the range
+      of random values to generate.
+    seed: A Python integer. Used to create random seeds.
+      See [`set_random_seed`](constant_op.md#set_random_seed) for behavior.
+
+  Returns:
+    An initializer that generates Tensors with a uniform distribution.
+  """
+  def _initializer(shape, dtype=types.float32):
+    return random_ops.random_uniform(shape, minval, maxval, dtype, seed=seed)
+  return _initializer
+
+def random_normal_initializer(mean=0.0, stddev=1.0, seed=None):
+  """Returns an initializer that generates Tensors with a normal distribution.
+
+  Args:
+    mean: a python scalar or a scalar tensor. Mean of the random values
+      to generate.
+    stddev: a python scalar or a scalar tensor. Standard deviation of the
+      random values to generate.
+    seed: A Python integer. Used to create random seeds.
+      See [`set_random_seed`](constant_op.md#set_random_seed) for behavior.
+
+  Returns:
+    An initializer that generates Tensors with a normal distribution.
+  """
+  def _initializer(shape, dtype=types.float32):
+    return random_ops.random_normal(shape, mean, stddev, dtype, seed=seed)
+  return _initializer
+
+def truncated_normal_initializer(mean=0.0, stddev=1.0, seed=None):
+  """Returns an initializer that generates a truncated normal distribution.
+
+  These values are similar to values from a random_normal_initializer
+  except that values more than two standard deviations from the mean
+  are discarded and re-drawn. This is the recommended initializer for
+  neural network weights and filters.
+
+  Args:
+    mean: a python scalar or a scalar tensor. Mean of the random values
+      to generate.
+    stddev: a python scalar or a scalar tensor. Standard deviation of the
+      random values to generate.
+    seed: A Python integer. Used to create random seeds.
+      See [`set_random_seed`](constant_op.md#set_random_seed) for behavior.
+
+  Returns:
+    An initializer that generates Tensors with a truncated normal
+    distribution.
+  """
+  def _initializer(shape, dtype=types.float32):
+    return random_ops.truncated_normal(shape, mean, stddev, dtype, seed=seed)
+  return _initializer
+
+def uniform_unit_scaling_initializer(factor=1.0, seed=None):
+  """Returns an initializer that generates tensors without scaling variance.
+
+  When initializing a deep network, it is in principle advantageous to keep
+  the scale of the input variance constant, so it does not explode or diminish
+  by reaching the final layer. If the input is `x` and the operation `x * W`,
+  and we want to initialize `W` uniformly at random, we need to pick `W` from
+
+      [-sqrt(3) / sqrt(dim), sqrt(3) / sqrt(dim)]
+
+  to keep the scale intact, where `dim = W.shape[0]` (the size of the input).
+  A similar calculation for convolutional networks gives an analogous result
+  with `dim` equal to the product of the first 3 dimensions.  When
+  nonlinearities are present, we need to multiply this by a constant `factor`.
+  See <https://arxiv.org/pdf/1412.6558v3.pdf> for deeper motivation, experiments
+  and the calculation of constants. In section 2.3 there, the constants were
+  numerically computed: for a linear layer it's 1.0, relu: ~1.43, tanh: ~1.15.
+
+  Args:
+    factor: Float.  A multiplicative factor by which the values will be scaled.
+    seed: A Python integer. Used to create random seeds.
+      See [`set_random_seed`](constant_op.md#set_random_seed) for behavior.
+
+  Returns:
+    An initializer that generates tensors with unit variance.
+  """
+  def _initializer(shape, dtype=types.float32):
+    input_size = 1.0
+    # Estimating input size is not possible to do perfectly, but we try.
+    # The estimate, obtained by multiplying all dimensions but the last one,
+    # is the right thing for matrix multiply and convolutions (see above).
+    for dim in shape[:-1]:
+      input_size *= float(dim)
+    max_val = math.sqrt(float(3) / float(input_size)) * factor
+    return random_ops.random_uniform(shape, -max_val, max_val,
+                                     dtype, seed=seed)
+  return _initializer
+
+# TODO(vrv): Unhide when we are ready to expose this publicly.
+def _random_walk(shape, nonlinearity, dtype=types.float32, seed=None,
+                 name="random_walk"):
+  """Create a random tensor such that backprop neither vanishes nor explodes.
+
+  Args:
+    shape: a python array of int or a 1-d tensor. Sizes of the Tensor.
+    nonlinearity: the brain python function for implementing the
+      nonlinearity in tensor flow.
+    dtype: The type of the output.
+    seed: A Python integer. Used to create random seeds.
+      See [`set_random_seed`](constant_op.md#set_random_seed) for behavior.
+    name: string.  Optional name for the op.
+
+  Returns:
+    A Tensor of the specified sizes filled with random values.
+  """
+  assert len(shape) == 2, "Random Walk initialization only supports 2D tensors."
+  num_inputs = shape[0]
+  if nonlinearity == math_ops.tanh:
+    # No real formula for this case yet, but this works well for many
+    # layer widths.
+    rwg = 1.13
+  elif nonlinearity == array_ops.identity:
+    rwg = math.exp(1.0 / float(2.0 * num_inputs))
+  elif nonlinearity == nn_ops.relu:
+    rwg = math.sqrt(2.0) * math.exp(1.2 / float(max(num_inputs, 6) - 2.4))
+  else:
+    assert False, "Unsupported nonlinearity for Random Walk initialization."
+
+  mean = 0.0
+  stddev = rwg / math.sqrt(float(num_inputs))
+
+  return random_ops.random_normal(shape, mean=mean, stddev=stddev, dtype=dtype,
+                                  seed=seed, name=name)
+
+
+# TODO(vrv): Unhide when we are ready to expose this publicly.
+class _RandomWalkInitializer(object):
+  """An Initializer that generates a tensor for Random Walk Initialization."""
+
+  def __init__(self, nonlinearity, seed=None):
+    """Construct a RandomWalkInitializer.
+
+    Args:
+      nonlinearity: the python tensorflow function that computes a nonlinearity
+        in the graph, typically after a Wx+b type operation.
+      seed: A Python integer. Used to create random seeds.
+        See [`set_random_seed`](constant_op.md#set_random_seed) for behavior.
+    """
+    self._nonlinearity = nonlinearity
+    self._seed = seed
+
+  def __call__(self, shape, dtype=types.float32):
+    """Generate a tensor used to initialize a variable."""
+    return random_ops._random_walk(shape, self._nonlinearity, dtype,
+                                   seed=self._seed)
diff --git a/tensorflow/python/ops/io_ops.py b/tensorflow/python/ops/io_ops.py
new file mode 100644
index 0000000000..9eb3bdfae4
--- /dev/null
+++ b/tensorflow/python/ops/io_ops.py
@@ -0,0 +1,541 @@
+"""## Placeholders
+
+TensorFlow provides a placeholder operation that must be fed with data
+on execution.  For more info, see the section on [Feeding
+data](../../how_tos/reading_data/index.md#feeding).
+
+@@placeholder
+
+## Readers
+
+TensorFlow provides a set of Reader classes for reading data formats.
+For more information on inputs and readers, see [Reading
+data](../../how_tos/reading_data/index.md).
+
+@@ReaderBase
+@@TextLineReader
+@@WholeFileReader
+@@IdentityReader
+@@TFRecordReader
+@@FixedLengthRecordReader
+
+## Converting
+
+TensorFlow provides several operations that you can use to convert various data
+formats into tensors.
+
+@@decode_csv
+@@decode_raw
+@@parse_example
+@@parse_single_example
+
+## Queues
+
+TensorFlow provides several implementations of 'Queues', which are
+structures within the TensorFlow computation graph to stage pipelines
+of tensors together. The following describe the basic Queue interface
+and some implementations.  To see an example use, see [Threading and
+Queues](../../how_tos/threading_and_queues/index.md).
+
+@@QueueBase
+@@FIFOQueue
+@@RandomShuffleQueue
+
+## Dealing with the filesystem
+
+@@matching_files
+@@read_file
+
+## Input pipeline
+
+TensorFlow functions for setting up an input-prefetching pipeline.
+Please see the [reading data how-to](../../how_tos/reading_data.md)
+for context.
+
+### Beginning of an input pipeline
+
+The "producer" functions add a queue to the graph and a corresponding
+`QueueRunner` for running the subgraph that fills that queue.
+
+@@match_filenames_once
+@@limit_epochs
+@@range_input_producer
+@@slice_input_producer
+@@string_input_producer
+
+### Batching at the end of an input pipeline
+
+These functions add a queue to the graph to assemble a batch of examples, with
+possible shuffling.  They also add a `QueueRunner` for running the subgraph
+that fills that queue.
+
+Use [batch](#batch) or [batch_join](#batch_join) for batching examples that have
+already been well shuffled.  Use [shuffle_batch](#shuffle_batch) or
+[shuffle_batch_join](#shuffle_batch_join) for examples that
+would benefit from additional shuffling.
+
+Use [batch](#batch) or [shuffle_batch](#shuffle_batch) if you want a
+single thread producing examples to batch, or if you have a
+single subgraph producing examples but you want to run it in N threads
+(where you increase N until it can keep the queue full).  Use
+[batch_join](#batch_join) or [shuffle_batch_join](#shuffle_batch_join)
+if you have N different subgraphs producing examples to batch and you
+want them run by N threads.
+
+@@batch
+@@batch_join
+@@shuffle_batch
+@@shuffle_batch_join
+"""
+
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import types
+from tensorflow.python.ops import common_shapes
+from tensorflow.python.ops import gen_io_ops
+# pylint: disable=wildcard-import
+from tensorflow.python.ops.gen_io_ops import *
+# pylint: enable=wildcard-import
+
+
+# pylint: disable=protected-access
+def _save(filename, tensor_names, tensors, tensor_slices=None, name="save"):
+  """Save a list of tensors to a file with given names.
+
+  Example usage without slice info:
+    Save("/foo/bar", ["w", "b"], [w, b])
+
+  Example usage with slices:
+    Save("/foo/bar", ["w", "w"], [slice0, slice1],
+         tensor_slices=["4 10 0,2:-", "4 10 2,2:-"])
+
+  Args:
+    filename: the file name of the sstable.
+    tensor_names: a list of strings.
+    tensors: the list of tensors to be saved.
+    tensor_slices: Optional list of strings to specify the shape and slices of
+      a larger virtual tensor that each tensor is a part of.  If not specified
+      each tensor is saved as a full slice.
+    name: string.  Optional name for the op.
+
+  Requires:
+    The length of tensors should match the size of tensor_names and of
+    tensor_slices.
+
+  Returns:
+    An Operation that saves the tensors.
+  """
+  if tensor_slices is None:
+    return gen_io_ops._save(filename, tensor_names, tensors, name=name)
+  else:
+    return gen_io_ops._save_slices(filename, tensor_names, tensor_slices,
+                                   tensors, name=name)
+
+
+def _restore_slice(file_pattern, tensor_name, shape_and_slice, tensor_type,
+                   name="restore_slice", preferred_shard=-1):
+  """Restore a tensor slice from a set of files with a given pattern.
+
+  Example usage:
+    RestoreSlice("/foo/bar-?????-of-?????", "w", "10 10 0,2:-", DT_FLOAT)
+
+  Args:
+    file_pattern: the file pattern used to match a set of checkpoint files.
+    tensor_name: the name of the tensor to restore.
+    shape_and_slice: the shape-and-slice spec of the slice.
+    tensor_type: the type of the tensor to restore.
+    name: string.  Optional name for the op.
+    preferred_shard: Int. Optional shard to open first in the checkpoint file.
+
+  Returns:
+    A tensor of type "tensor_type".
+  """
+  base_type = types.as_dtype(tensor_type).base_dtype
+  return gen_io_ops._restore_slice(
+      file_pattern, tensor_name, shape_and_slice, base_type,
+      preferred_shard, name=name)
+
+
+@ops.RegisterShape("Restore")
+def _RestoreShape(op):
+  """Shape function for Restore op."""
+  # Validate input shapes.
+  unused_file_pattern = op.inputs[0].get_shape().merge_with(
+      tensor_shape.scalar())
+  unused_tensor_name = op.inputs[1].get_shape().merge_with(
+      tensor_shape.scalar())
+  return [tensor_shape.unknown_shape()]
+
+
+@ops.RegisterShape("RestoreSlice")
+def _RestoreSliceShape(op):
+  """Shape function for RestoreSlice op."""
+  # Validate input shapes.
+  unused_file_pattern = op.inputs[0].get_shape().merge_with(
+      tensor_shape.scalar())
+  unused_tensor_name = op.inputs[1].get_shape().merge_with(
+      tensor_shape.scalar())
+  unused_shape_and_slice_shape = op.inputs[2].get_shape().merge_with(
+      tensor_shape.scalar())
+  # TODO(mrry): Attempt to parse the shape_and_slice value and use it
+  # to form the shape of the output.
+  return [tensor_shape.unknown_shape()]
+
+
+@ops.RegisterShape("Save")
+def _SaveShape(op):
+  """Shape function for Save op."""
+  # Validate input shapes.
+  unused_filename = op.inputs[0].get_shape().merge_with(tensor_shape.scalar())
+  data_count = len(op.inputs) - 2
+  unused_tensor_names_shape = op.inputs[1].get_shape().merge_with(
+      tensor_shape.vector(data_count))
+  return []
+
+
+@ops.RegisterShape("SaveSlices")
+def _SaveSlicesShape(op):
+  """Shape function for SaveSlices op."""
+  # Validate input shapes.
+  unused_filename = op.inputs[0].get_shape().merge_with(tensor_shape.scalar())
+  data_count = len(op.inputs) - 3
+  unused_tensor_names_shape = op.inputs[1].get_shape().merge_with(
+      tensor_shape.vector(data_count))
+  unused_shapes_and_slices_shape = op.inputs[2].get_shape().merge_with(
+      tensor_shape.vector(data_count))
+  # TODO(mrry): Attempt to parse the shapes_and_slices values and use
+  # them to constrain the shape of the remaining inputs.
+  return []
+
+
+@ops.RegisterShape("ShardedFilename")
+def _ShardedFilenameShape(op):
+  """Shape function for ShardedFilename op."""
+  # Validate input shapes.
+  unused_basename_shape = op.inputs[0].get_shape().merge_with(
+      tensor_shape.scalar())
+  unused_shard_shape = op.inputs[1].get_shape().merge_with(
+      tensor_shape.scalar())
+  unused_num_shards_shape = op.inputs[2].get_shape().merge_with(
+      tensor_shape.scalar())
+  return [tensor_shape.scalar()]
+
+
+@ops.RegisterShape("ShardedFilespec")
+def _ShardedFilespecShape(op):
+  """Shape function for ShardedFilespec op."""
+  # Validate input shapes.
+  unused_basename_shape = op.inputs[0].get_shape().merge_with(
+      tensor_shape.scalar())
+  unused_num_shards_shape = op.inputs[1].get_shape().merge_with(
+      tensor_shape.scalar())
+  return [tensor_shape.scalar()]
+
+
+class ReaderBase(object):
+  """Base class for different Reader types, that produce a record every step.
+
+  Conceptually, Readers convert string 'work units' into records (key,
+  value pairs).  Typically the 'work units' are filenames and the
+  records are extracted from the contents of those files.  We want a
+  single record produced per step, but a work unit can correspond to
+  many records.
+
+  Therefore we introduce some decoupling using a queue.  The queue
+  contains the work units and the Reader dequeues from the queue when
+  it is asked to produce a record (via Read()) but it has finished the
+  last work unit.
+  """
+
+  def __init__(self, reader_ref, supports_serialize=False):
+    """Creates a new ReaderBase.
+
+    Args:
+      reader_ref: The operation that implements the reader.
+      supports_serialize: True if the reader implementation can
+        serialize its state.
+    """
+    self._reader_ref = reader_ref
+    self._supports_serialize = supports_serialize
+
+  @property
+  def reader_ref(self):
+    """Op that implements the reader."""
+    return self._reader_ref
+
+  def read(self, queue, name=None):
+    """Returns the next record (key, value pair) produced by a reader.
+
+    Will dequeue a work unit from queue if necessary (e.g. when the
+    Reader needs to start reading from a new file since it has
+    finished with the previous file).
+
+    Args:
+      queue: A Queue or a mutable string Tensor representing a handle
+        to a Queue, with string work items.
+      name: A name for the operation (optional).
+
+    Returns:
+      A tuple of Tensors (key, value).
+      key: A string scalar Tensor.
+      value: A string scalar Tensor.
+    """
+    if isinstance(queue, ops.Tensor):
+      queue_ref = queue
+    else:
+      queue_ref = queue.queue_ref
+    return gen_io_ops._reader_read(self._reader_ref, queue_ref, name=name)
+
+  def num_records_produced(self, name=None):
+    """Returns the number of records this reader has produced.
+
+    This is the same as the number of Read executions that have
+    succeeded.
+
+    Args:
+      name: A name for the operation (optional).
+
+    Returns:
+      An int64 Tensor.
+
+    """
+    return gen_io_ops._reader_num_records_produced(self._reader_ref, name=name)
+
+  def num_work_units_completed(self, name=None):
+    """Returns the number of work units this reader has finished processing.
+
+    Args:
+      name: A name for the operation (optional).
+
+    Returns:
+      An int64 Tensor.
+    """
+    return gen_io_ops._reader_num_work_units_completed(self._reader_ref,
+                                                       name=name)
+
+  def serialize_state(self, name=None):
+    """Produce a string tensor that encodes the state of a reader.
+
+    Not all Readers support being serialized, so this can produce an
+    Unimplemented error.
+
+    Args:
+      name: A name for the operation (optional).
+
+    Returns:
+      A string Tensor.
+    """
+    return gen_io_ops._reader_serialize_state(self._reader_ref, name=name)
+
+  def restore_state(self, state, name=None):
+    """Restore a reader to a previously saved state.
+
+    Not all Readers support being restored, so this can produce an
+    Unimplemented error.
+
+    Args:
+      state: A string Tensor.
+        Result of a SerializeState of a Reader with matching type.
+      name: A name for the operation (optional).
+
+    Returns:
+      The created Operation.
+    """
+    return gen_io_ops._reader_restore_state(self._reader_ref, state, name=name)
+
+  @property
+  def supports_serialize(self):
+    """Whether the Reader implementation can serialize its state."""
+    return self._supports_serialize
+
+  def reset(self, name=None):
+    """Restore a reader to its initial clean state.
+
+    Args:
+      name: A name for the operation (optional).
+
+    Returns:
+      The created Operation.
+    """
+    return gen_io_ops._reader_reset(self._reader_ref, name=name)
+
+
+ops.NoGradient("ReaderRead")
+ops.NoGradient("ReaderNumRecordsProduced")
+ops.NoGradient("ReaderNumWorkUnitsCompleted")
+ops.NoGradient("ReaderSerializeState")
+ops.NoGradient("ReaderRestoreState")
+ops.NoGradient("ReaderReset")
+
+
+class WholeFileReader(ReaderBase):
+  """A Reader that outputs the entire contents of a file as a value.
+
+  To use, enqueue filenames in a Queue.  The output of Read will
+  be a filename (key) and the contents of that file (value).
+
+  See ReaderBase for supported methods.
+  """
+
+  def __init__(self, name=None):
+    """Create a WholeFileReader.
+
+    Args:
+      name: A name for the operation (optional).
+    """
+    rr = gen_io_ops._whole_file_reader(name=name)
+    super(WholeFileReader, self).__init__(rr, supports_serialize=True)
+
+
+ops.NoGradient("WholeFileReader")
+
+
+class TextLineReader(ReaderBase):
+  """A Reader that outputs the lines of a file delimited by newlines.
+
+  Newlines are stripped from the output.
+  See ReaderBase for supported methods.
+  """
+  # TODO(josh11b): Support serializing and restoring state.
+
+  def __init__(self, skip_header_lines=None, name=None):
+    """Create a TextLineReader.
+
+    Args:
+      skip_header_lines: An optional int. Defaults to 0.  Number of lines
+        to skip from the beginning of every file.
+      name: A name for the operation (optional).
+    """
+    rr = gen_io_ops._text_line_reader(skip_header_lines=skip_header_lines,
+                                      name=name)
+    super(TextLineReader, self).__init__(rr)
+
+
+ops.NoGradient("TextLineReader")
+
+
+class FixedLengthRecordReader(ReaderBase):
+  """A Reader that outputs fixed-length records from a file.
+
+  See ReaderBase for supported methods.
+  """
+  # TODO(josh11b): Support serializing and restoring state.
+
+  def __init__(self, record_bytes, header_bytes=None, footer_bytes=None,
+               name=None):
+    """Create a FixedLengthRecordReader.
+
+    Args:
+      record_bytes: An int.
+      header_bytes: An optional int. Defaults to 0.
+      footer_bytes: An optional int. Defaults to 0.
+      name: A name for the operation (optional).
+    """
+    rr = gen_io_ops._fixed_length_record_reader(
+        record_bytes=record_bytes, header_bytes=header_bytes,
+        footer_bytes=footer_bytes, name=name)
+    super(FixedLengthRecordReader, self).__init__(rr)
+
+
+ops.NoGradient("FixedLengthRecordReader")
+
+
+class TFRecordReader(ReaderBase):
+  """A Reader that outputs the records from a TFRecords file.
+
+  See ReaderBase for supported methods.
+  """
+  # TODO(josh11b): Support serializing and restoring state.
+
+  def __init__(self, name=None):
+    """Create a TFRecordReader.
+
+    Args:
+      name: A name for the operation (optional).
+    """
+    rr = gen_io_ops._tf_record_reader(name=name)
+    super(TFRecordReader, self).__init__(rr)
+
+
+ops.NoGradient("TFRecordReader")
+
+
+class IdentityReader(ReaderBase):
+  """A Reader that outputs the queued work as both the key and value.
+
+  To use, enqueue strings in a Queue.  Read will take the front
+  work string and output (work, work).
+
+  See ReaderBase for supported methods.
+  """
+
+  def __init__(self, name=None):
+    """Create a IdentityReader.
+
+    Args:
+      name: A name for the operation (optional).
+    """
+    rr = gen_io_ops._identity_reader(name=name)
+    super(IdentityReader, self).__init__(rr, supports_serialize=True)
+
+
+ops.NoGradient("IdentityReader")
+
+
+ops.RegisterShape("FixedLengthRecordReader")(common_shapes.scalar_shape)
+ops.RegisterShape("IdentityReader")(common_shapes.scalar_shape)
+ops.RegisterShape("TextLineReader")(common_shapes.scalar_shape)
+ops.RegisterShape("WholeFileReader")(common_shapes.scalar_shape)
+ops.RegisterShape("TFRecordReader")(common_shapes.scalar_shape)
+
+
+@ops.RegisterShape("ReaderNumRecordsProduced")
+@ops.RegisterShape("ReaderNumWorkUnitsCompleted")
+@ops.RegisterShape("ReaderSerializeState")
+def _ReaderScalarShape(op):
+  """Shape function for ops that transform a reader to a scalar."""
+  unused_handle_shape = op.inputs[0].get_shape().merge_with(
+      tensor_shape.scalar())
+  return [tensor_shape.scalar()]
+
+
+@ops.RegisterShape("ReaderRead")
+def _ReaderReadShape(op):
+  """Shape function for the ReaderBase.Read op."""
+  unused_handle_shape = op.inputs[0].get_shape().merge_with(
+      tensor_shape.scalar())
+  unused_queue_shape = op.inputs[1].get_shape().merge_with(
+      tensor_shape.scalar())
+  return [tensor_shape.scalar(), tensor_shape.scalar()]
+
+
+@ops.RegisterShape("ReaderReset")
+def _ReaderResetShape(op):
+  """Shape function for the ReaderBase.Reset op."""
+  unused_handle_shape = op.inputs[0].get_shape().merge_with(
+      tensor_shape.scalar())
+  return []
+
+
+@ops.RegisterShape("ReaderRestoreState")
+def _ReaderRestoreStateShape(op):
+  """Shape function for the ReaderBase.Restore op."""
+  unused_handle_shape = op.inputs[0].get_shape().merge_with(
+      tensor_shape.scalar())
+  unused_state_shape = op.inputs[1].get_shape().merge_with(
+      tensor_shape.scalar())
+  return []
+
+
+@ops.RegisterShape("ReadFile")
+def _ReadFileShape(op):
+  """Shape function for the ReadFile op."""
+  return [op.inputs[0].get_shape().merge_with(tensor_shape.scalar())]
+
+
+@ops.RegisterShape("MatchingFiles")
+def _MatchingFilesShape(op):
+  """Shape function for the MatchingFiles op."""
+  unused_patern_shape = op.inputs[0].get_shape().merge_with(
+      tensor_shape.scalar())
+  return [tensor_shape.unknown_shape(ndims=1)]
diff --git a/tensorflow/python/ops/linalg_grad.py b/tensorflow/python/ops/linalg_grad.py
new file mode 100644
index 0000000000..893618c9dd
--- /dev/null
+++ b/tensorflow/python/ops/linalg_grad.py
@@ -0,0 +1,25 @@
+"""Gradients for operators defined in linalg_ops.py."""
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import constant_op
+from tensorflow.python.ops import linalg_ops
+from tensorflow.python.ops import math_ops
+
+@ops.RegisterGradient("MatrixInverse")
+def _MatrixInverseGrad(op, grad):
+  """Gradient for MatrixInverse."""
+  ainv = op.outputs[0]
+  return -math_ops.matmul(
+      ainv,
+      math_ops.matmul(grad, ainv, transpose_b=True),
+      transpose_a=True)
+
+@ops.RegisterGradient("BatchMatrixInverse")
+def _BatchMatrixInverseGrad(op, grad):
+  """Gradient for BatchMatrixInverse."""
+  ainv = op.outputs[0]
+  return -math_ops.batch_matmul(
+      ainv,
+      math_ops.batch_matmul(grad, ainv, adj_y=True),
+      adj_x=True)
diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py
new file mode 100644
index 0000000000..76fd83fb3d
--- /dev/null
+++ b/tensorflow/python/ops/linalg_ops.py
@@ -0,0 +1,62 @@
+"""Operations for linear algebra."""
+
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import gen_linalg_ops
+# pylint: disable=wildcard-import
+from tensorflow.python.ops.gen_linalg_ops import *
+# pylint: enable=wildcard-import
+
+
+@ops.RegisterShape("Cholesky")
+def _CholeskyShape(op):
+  input_shape = op.inputs[0].get_shape().with_rank(2)
+  # The matrix must be square.
+  input_shape[0].assert_is_compatible_with(input_shape[1])
+  return [input_shape]
+
+
+@ops.RegisterShape("BatchCholesky")
+def _BatchCholeskyShape(op):
+  input_shape = op.inputs[0].get_shape().with_rank_at_least(3)
+  # The matrices in the batch must be square.
+  input_shape[-1].assert_is_compatible_with(input_shape[-2])
+  return [input_shape]
+
+
+@ops.RegisterShape("MatrixDeterminant")
+def _MatrixDeterminantShape(op):
+  input_shape = op.inputs[0].get_shape().with_rank(2)
+  # The matrix must be square.
+  input_shape[0].assert_is_compatible_with(input_shape[1])
+  if input_shape.ndims is not None:
+    return [tensor_shape.scalar()]
+  else:
+    return [tensor_shape.unknown_shape()]
+
+
+@ops.RegisterShape("BatchMatrixDeterminant")
+def _BatchMatrixDeterminantShape(op):
+  input_shape = op.inputs[0].get_shape().with_rank_at_least(3)
+  # The matrices in the batch must be square.
+  input_shape[-1].assert_is_compatible_with(input_shape[-2])
+  if input_shape.ndims is not None:
+    return [input_shape[:-2]]
+  else:
+    return [tensor_shape.unknown_shape()]
+
+
+@ops.RegisterShape("MatrixInverse")
+def _MatrixInverseShape(op):
+  input_shape = op.inputs[0].get_shape().with_rank(2)
+  # The matrix must be square.
+  input_shape[0].assert_is_compatible_with(input_shape[1])
+  return [input_shape]
+
+
+@ops.RegisterShape("BatchMatrixInverse")
+def _BatchMatrixInverseShape(op):
+  input_shape = op.inputs[0].get_shape().with_rank_at_least(3)
+  # The matrices in the batch must be square.
+  input_shape[-1].assert_is_compatible_with(input_shape[-2])
+  return [input_shape]
diff --git a/tensorflow/python/ops/logging_ops.py b/tensorflow/python/ops/logging_ops.py
new file mode 100644
index 0000000000..0fad4a2dde
--- /dev/null
+++ b/tensorflow/python/ops/logging_ops.py
@@ -0,0 +1,58 @@
+"""Logging Operations."""
+
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import common_shapes
+from tensorflow.python.ops import gen_logging_ops
+# pylint: disable=wildcard-import
+from tensorflow.python.ops.gen_logging_ops import *
+# pylint: enable=wildcard-import
+
+
+# Assert and Print are special symbols in python, so we must
+# use an upper-case version of them.
+def Assert(condition, data, summarize=None, name=None):
+  """Asserts that the given condition is true.
+
+  If `condition` evaluates to false, print the list of tensors in `data`.
+  `summarize` determines how many entries of the tensors to print.
+
+  Args:
+    condition: The condition to evaluate.
+    data: The tensors to print out when condition is false.
+    summarize: Print this many entries of each tensor.
+    name: A name for this operation (optional).
+  """
+  return gen_logging_ops._assert(condition, data, summarize, name)
+
+
+def Print(input_, data, message=None, first_n=None, summarize=None,
+          name=None):
+  """Prints a list of tensors.
+
+  This is an identity op with the side effect of printing `data` when
+  evaluating.
+
+  Args:
+    input_: A tensor passed through this op.
+    data: A list of tensors to print out when op is evaluated.
+    message: A string, prefix of the error message.
+    first_n: Only log `first_n` number of times. Negative numbers log always;
+             this is the default.
+    summarize: Only print this many entries of each tensor.
+    name: A name for the operation (optional).
+
+  Returns:
+    Same tensor as `input_`.
+  """
+  return gen_logging_ops._print(input_, data, message, first_n, summarize, name)
+
+
+@ops.RegisterGradient("Print")
+def _PrintGrad(op, *grad):
+  return list(grad) + [None] * (len(op.inputs) - 1)
+
+
+# NOTE(mrry): Assert and Print produce an empty output, which is
+# presumably never read.
+ops.RegisterShape("Assert")(common_shapes.unknown_shape)
+ops.RegisterShape("Print")(common_shapes.unknown_shape)
diff --git a/tensorflow/python/ops/math_grad.py b/tensorflow/python/ops/math_grad.py
new file mode 100644
index 0000000000..cb808ff5b8
--- /dev/null
+++ b/tensorflow/python/ops/math_grad.py
@@ -0,0 +1,506 @@
+"""Gradients for operators defined in math_ops.py."""
+
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import types
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import constant_op
+from tensorflow.python.ops import data_flow_ops
+from tensorflow.python.ops import gen_array_ops
+from tensorflow.python.ops import gen_math_ops
+from tensorflow.python.ops import math_ops
+
+
+def _ReductionGradAssist(op):
+  """Reduction grads have much in common, so factor the commonality out."""
+  inp = op.inputs[0]                                # Example:
+  input_shape = array_ops.shape(inp)                # [2, 3, 5, 7]
+  input_rank = array_ops.rank(inp)                  # 4
+  indices = op.inputs[1]                            # [1, 2]
+  indices_shape = array_ops.shape(indices)          # [2]
+  new_output_shape = data_flow_ops.dynamic_stitch(  # [2, 1, 1, 7]
+      [math_ops.range(0, input_rank),               # [0, 1, 2, 3]
+       indices],                                    # [1, 2]
+      [input_shape,                                 # [2, 3, 5, 7]
+       array_ops.fill(indices_shape, 1)])           # [1, 1]
+  return inp, new_output_shape, input_shape
+
+
+@ops.RegisterGradient("Sum")
+def _SumGrad(op, grad):
+  """Gradient for Sum."""
+  _, new_output_shape, input_shape = _ReductionGradAssist(op)
+  tile_scaling = input_shape / new_output_shape
+  grad = array_ops.reshape(grad, new_output_shape)
+  return [array_ops.tile(grad, tile_scaling), None]
+
+
+def _MinOrMaxGrad(op, grad):
+  """Gradient for Max or Max. Amazingly it's precisely the same code."""
+  inp, new_output_shape, _ = _ReductionGradAssist(op)
+  y = op.outputs[0]
+  y = array_ops.reshape(y, new_output_shape)
+  grad = array_ops.reshape(grad, new_output_shape)
+  indicators = math_ops.cast(math_ops.equal(y, inp), grad.dtype)
+  return [indicators * grad, None]
+
+
+@ops.RegisterGradient("Max")
+def _MaxGrad(op, grad):
+  """Gradient for Max."""
+  return _MinOrMaxGrad(op, grad)
+
+
+@ops.RegisterGradient("Min")
+def _MinGrad(op, grad):
+  return _MinOrMaxGrad(op, grad)
+
+
+@ops.RegisterGradient("Mean")
+def _MeanGrad(op, grad):
+  """Gradient for Mean."""
+  sum_grad = _SumGrad(op, grad)[0]
+  input_shape = array_ops.shape(op.inputs[0])
+  output_shape = array_ops.shape(op.outputs[0])
+  factor = (math_ops.reduce_prod(input_shape) /
+            math_ops.reduce_prod(output_shape))
+  return sum_grad / math_ops.cast(factor, sum_grad.dtype), None
+
+
+@ops.RegisterGradient("Prod")
+def _ProdGrad(op, grad):
+  """Gradient for Prod."""
+  # TODO(kearnes): this gives NaNs for 0s in the input tensor
+  _, new_output_shape, input_shape = _ReductionGradAssist(op)
+  tile_scaling = input_shape / new_output_shape
+  grad = array_ops.reshape(grad * op.outputs[0], new_output_shape)
+  grad = math_ops.div(array_ops.tile(grad, tile_scaling), op.inputs[0])
+  return grad, None
+
+
+@ops.RegisterGradient("SegmentSum")
+def _SegmentSumGrad(op, grad):
+  """Gradient for SegmentSum."""
+  return array_ops.gather(grad, op.inputs[1]), None
+
+
+@ops.RegisterGradient("SegmentMean")
+def _SegmentMeanGrad(op, grad):
+  """Gradient for SegmentMean."""
+  input_rank = array_ops.rank(op.inputs[0])
+  ones_shape = array_ops.concat(
+      0, [array_ops.shape(op.inputs[1]),
+          array_ops.fill(array_ops.expand_dims(input_rank - 1, 0), 1)])
+  ones = array_ops.fill(ones_shape,
+                        constant_op.constant(1, dtype=grad.dtype))
+  scaled_grad = grad * math_ops.inv(math_ops.segment_sum(ones, op.inputs[1]))
+  return array_ops.gather(scaled_grad, op.inputs[1]), None
+
+
+@ops.RegisterGradient("SparseSegmentSum")
+def _SparseSegmentSumGrad(op, grad):
+  """Gradient for SparseSegmentSum."""
+  input_rows = array_ops.shape(op.inputs[0])[0]
+  return (math_ops.unsorted_segment_sum(
+      array_ops.gather(grad, op.inputs[2]),
+      op.inputs[1], input_rows), None, None)
+
+
+@ops.RegisterGradient("SparseSegmentMean")
+def _SparseSegmentMeanGrad(op, grad):
+  """Gradient for SparseSegmentMean."""
+  dim0 = array_ops.shape(op.inputs[0])[0]
+  return (math_ops.sparse_segment_mean_grad(grad,
+                                            op.inputs[1],
+                                            op.inputs[2],
+                                            dim0),
+          None, None)
+
+
+@ops.RegisterGradient("SegmentMin")
+def _SegmentMinGrad(op, grad):
+  """Gradient for SegmentMin."""
+  zeros = array_ops.zeros(array_ops.shape(op.inputs[0]),
+                          dtype=op.inputs[0].dtype)
+  gathered_grads = array_ops.gather(grad, op.inputs[1])
+  gathered_outputs = array_ops.gather(op.outputs[0], op.inputs[1])
+  return math_ops.select(math_ops.greater(op.inputs[0], gathered_outputs),
+                         zeros,
+                         gathered_grads), None
+
+
+@ops.RegisterGradient("SegmentMax")
+def _SegmentMaxGrad(op, grad):
+  """Gradient for SegmentMax."""
+  zeros = array_ops.zeros(array_ops.shape(op.inputs[0]),
+                          dtype=op.inputs[0].dtype)
+  gathered_grads = array_ops.gather(grad, op.inputs[1])
+  gathered_outputs = array_ops.gather(op.outputs[0], op.inputs[1])
+  return math_ops.select(math_ops.less(op.inputs[0], gathered_outputs),
+                         zeros,
+                         gathered_grads), None
+
+
+@ops.RegisterGradient("UnsortedSegmentSum")
+def _UnsortedSegmentSumGrad(op, grad):
+  """Gradient for SegmentSum."""
+  return array_ops.gather(grad, op.inputs[1]), None, None
+
+
+@ops.RegisterGradient("Abs")
+def _AbsGrad(op, grad):
+  x = op.inputs[0]
+  return grad * math_ops.sign(x)
+
+
+@ops.RegisterGradient("Neg")
+def _NegGrad(_, grad):
+  """Returns -grad."""
+  return - grad
+
+
+@ops.RegisterGradient("Inv")
+def _InvGrad(op, grad):
+  """Returns -grad * (1 / x^2)."""
+  y = op.outputs[0]  # y = 1 / x
+  return grad * (- math_ops.square(y))
+
+
+@ops.RegisterGradient("Square")
+def _SquareGrad(op, grad):
+  x = op.inputs[0]
+  return grad * (2.0 * x)
+
+
+@ops.RegisterGradient("Sqrt")
+def _SqrtGrad(op, grad):
+  y = op.outputs[0]  # y = x^(1/2)
+  return grad * (.5 * math_ops.inv(y))
+
+
+@ops.RegisterGradient("Rsqrt")
+def _RsqrtGrad(op, grad):
+  x = op.inputs[0]
+  y = op.outputs[0]  # y = x^(-1/2)
+  return grad * ((-0.5) * math_ops.inv(x) * y)
+
+
+@ops.RegisterGradient("Exp")
+def _ExpGrad(op, grad):
+  """Returns grad * exp(x)."""
+  y = op.outputs[0]  # y = e^x
+  return grad * y
+
+
+@ops.RegisterGradient("Log")
+def _LogGrad(op, grad):
+  """Returns grad * (1/x)."""
+  x = op.inputs[0]
+  return grad * math_ops.inv(x)
+
+
+@ops.RegisterGradient("Tanh")
+def _TanhGrad(op, grad):
+  """Returns grad * (1 - tanh(x) * tanh(x))."""
+  y = op.outputs[0]  # y = tanh(x)
+  return grad * (1 - math_ops.square(y))
+
+
+@ops.RegisterGradient("Sigmoid")
+def _SigmoidGrad(op, grad):
+  """Returns grad * sigmoid(x) * (1 - sigmoid(x))."""
+  y = op.outputs[0]  # y = sigmoid(x)
+  return grad * (y * (1 - y))
+
+
+@ops.RegisterGradient("Sign")
+def _SignGrad(op, _):
+  """Returns 0."""
+  x = op.inputs[0]
+  return array_ops.zeros(array_ops.shape(x), dtype=x.dtype)
+
+
+@ops.RegisterGradient("Sin")
+def _SinGrad(op, grad):
+  """Returns grad * cos(x)."""
+  x = op.inputs[0]
+  return grad * math_ops.cos(x)
+
+
+@ops.RegisterGradient("Cos")
+def _CosGrad(op, grad):
+  """Returns grad * -sin(x)."""
+  x = op.inputs[0]
+  return -grad * math_ops.sin(x)
+
+
+@ops.RegisterGradient("AddN")
+def _AddNGrad(op, grad):
+  """Copies the gradient to all inputs."""
+  # Not broadcasting.
+  return [grad] * len(op.inputs)
+
+
+@ops.RegisterGradient("Add")
+def _AddGrad(op, grad):
+  x = op.inputs[0]
+  y = op.inputs[1]
+  sx = array_ops.shape(x)
+  sy = array_ops.shape(y)
+  rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy)
+  return (array_ops.reshape(math_ops.reduce_sum(grad, rx), sx),
+          array_ops.reshape(math_ops.reduce_sum(grad, ry), sy))
+
+
+@ops.RegisterGradient("Sub")
+def _SubGrad(op, grad):
+  x = op.inputs[0]
+  y = op.inputs[1]
+  sx = array_ops.shape(x)
+  sy = array_ops.shape(y)
+  rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy)
+  return (array_ops.reshape(math_ops.reduce_sum(grad, rx), sx),
+          array_ops.reshape(-math_ops.reduce_sum(grad, ry), sy))
+
+
+@ops.RegisterGradient("Mul")
+def _MulGrad(op, grad):
+  x = op.inputs[0]
+  y = op.inputs[1]
+  assert x.dtype.base_dtype == y.dtype.base_dtype, (x.dtype, " vs. ", y.dtype)
+  sx = array_ops.shape(x)
+  sy = array_ops.shape(y)
+  rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy)
+  if x.dtype.base_dtype == types.complex64:
+    return (array_ops.reshape(math_ops.reduce_sum(grad * math_ops.conj(y), rx), sx),
+            array_ops.reshape(math_ops.reduce_sum(math_ops.conj(x) * grad, ry), sy))
+  else:
+    return (array_ops.reshape(math_ops.reduce_sum(grad * y, rx), sx),
+            array_ops.reshape(math_ops.reduce_sum(x * grad, ry), sy))
+
+
+@ops.RegisterGradient("Div")
+def _DivGrad(op, grad):
+  x = op.inputs[0]
+  y = op.inputs[1]
+  sx = array_ops.shape(x)
+  sy = array_ops.shape(y)
+  rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy)
+  return (array_ops.reshape(math_ops.reduce_sum(grad / y, rx), sx),
+          array_ops.reshape(math_ops.reduce_sum(grad *
+                                         (-x / math_ops.square(y)), ry), sy))
+
+
+@ops.RegisterGradient("Pow")
+def _PowGrad(op, grad):
+  """Returns grad * (y*x^(y-1), z*log(x))."""
+  x = op.inputs[0]
+  y = op.inputs[1]
+  z = op.outputs[0]
+  sx = array_ops.shape(x)
+  sy = array_ops.shape(y)
+  rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy)
+  gx = array_ops.reshape(math_ops.reduce_sum(grad * y * math_ops.pow(x, y - 1), rx),
+                         sx)
+  gy = array_ops.reshape(math_ops.reduce_sum(grad * z * math_ops.log(x), ry), sy)
+  return gx, gy
+
+
+def _MaximumMinimumGrad(op, grad, selector_op):
+  """Factor out the code for the gradient of Maximum or Minimum."""
+  x = op.inputs[0]
+  y = op.inputs[1]
+  gdtype = grad.dtype
+  sx = array_ops.shape(x)
+  sy = array_ops.shape(y)
+  gradshape = array_ops.shape(grad)
+  zeros = array_ops.zeros(gradshape, gdtype)
+  xmask = selector_op(x, y)
+  rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy)
+  xgrad = math_ops.select(xmask, grad, zeros)
+  ygrad = math_ops.select(math_ops.logical_not(xmask), grad, zeros)
+  gx = array_ops.reshape(math_ops.reduce_sum(xgrad, rx), sx)
+  gy = array_ops.reshape(math_ops.reduce_sum(ygrad, ry), sy)
+  return (gx, gy)
+
+
+@ops.RegisterGradient("Maximum")
+def _MaximumGrad(op, grad):
+  """Returns grad*(x > y, x <= y) with type of grad."""
+  return _MaximumMinimumGrad(op, grad, math_ops.greater_equal)
+
+
+@ops.RegisterGradient("Minimum")
+def _MinimumGrad(op, grad):
+  """Returns grad*(x < y, x >= y) with type of grad."""
+  return _MaximumMinimumGrad(op, grad, math_ops.less_equal)
+
+
+# Logical operations have no gradients.
+ops.NoGradient("Less")
+ops.NoGradient("LessEqual")
+ops.NoGradient("Greater")
+ops.NoGradient("GreaterEqual")
+ops.NoGradient("Equal")
+ops.NoGradient("NotEqual")
+ops.NoGradient("LogicalAnd")
+ops.NoGradient("LogicalOr")
+ops.NoGradient("LogicalNot")
+
+
+@ops.RegisterGradient("Select")
+def _SelectGrad(op, grad):
+  c = op.inputs[0]
+  x = op.inputs[1]
+  zeros = array_ops.zeros(array_ops.shape(c), dtype=x.dtype)
+  return (None, math_ops.select(c, grad, zeros),
+          math_ops.select(c, zeros, grad))
+
+
+@ops.RegisterGradient("MatMul")
+def _MatMulGrad(op, grad):
+  t_a = op.get_attr("transpose_a")
+  t_b = op.get_attr("transpose_b")
+  if not t_a and not t_b:
+    return (math_ops.matmul(grad, op.inputs[1], transpose_b=True),
+            math_ops.matmul(op.inputs[0], grad, transpose_a=True))
+  elif not t_a and t_b:
+    return (math_ops.matmul(grad, op.inputs[1]),
+            math_ops.matmul(grad, op.inputs[0], transpose_a=True))
+  elif t_a and not t_b:
+    return (math_ops.matmul(op.inputs[1], grad, transpose_b=True),
+            math_ops.matmul(op.inputs[0], grad))
+  elif t_a and t_b:
+    return (math_ops.matmul(op.inputs[1], grad, transpose_a=True,
+                            transpose_b=True),
+            math_ops.matmul(grad, op.inputs[0], transpose_a=True,
+                            transpose_b=True))
+
+
+@ops.RegisterGradient("SparseMatMul")
+def _SparseMatMulGrad(op, grad):
+  """Gradient for SparseMatMul."""
+
+  t_a = op.get_attr("transpose_a")
+  t_b = op.get_attr("transpose_b")
+  is_sparse = {
+      op.inputs[0]: op.get_attr("a_is_sparse"),
+      op.inputs[1]: op.get_attr("b_is_sparse"),
+      # Use heuristic to figure out if grad might be sparse
+      grad: (grad.op.type == "ReluGrad")
+  }
+  def _SparseMatMul(t1, t2, transpose_a=False, transpose_b=False):
+    """Helper function to create SparseMatMul op."""
+
+    assert t1 in is_sparse and t2 in is_sparse
+    t1_sparse = is_sparse[t1]
+    t2_sparse = is_sparse[t2]
+    if not t1_sparse and not t2_sparse:
+      return math_ops.matmul(t1, t2,
+                             transpose_a=transpose_a,
+                             transpose_b=transpose_b)
+    transpose_out = False
+    if not t1_sparse:
+      transpose_out = True
+      t1, t2 = t2, t1
+      t1_sparse, t2_sparse = t2_sparse, t1_sparse
+      assert t1_sparse
+      transpose_a, transpose_b = not transpose_b, not transpose_a
+
+    if transpose_b:
+      t2 = array_ops.transpose(t2)
+      transpose_b = False
+    m = math_ops.matmul(t1, t2,
+                        transpose_a=transpose_a,
+                        transpose_b=transpose_b,
+                        a_is_sparse=t1_sparse,
+                        b_is_sparse=t2_sparse)
+    if transpose_out:
+      m = array_ops.transpose(m)
+    return m
+
+  if not t_a and not t_b:
+    return (_SparseMatMul(grad, op.inputs[1], transpose_b=True),
+            _SparseMatMul(op.inputs[0], grad, transpose_a=True))
+  elif not t_a and t_b:
+    return (_SparseMatMul(grad, op.inputs[1]),
+            _SparseMatMul(grad, op.inputs[0], transpose_a=True))
+  elif t_a and not t_b:
+    return (_SparseMatMul(op.inputs[1], grad, transpose_b=True),
+            _SparseMatMul(op.inputs[0], grad))
+  elif t_a and t_b:
+    return (_SparseMatMul(op.inputs[1], grad,
+                          transpose_a=True, transpose_b=True),
+            _SparseMatMul(grad, op.inputs[0],
+                          transpose_a=True, transpose_b=True))
+
+
+@ops.RegisterGradient("Floor")
+def _FloorGrad(_, grad):
+  return grad
+
+
+@ops.RegisterGradient("BatchMatMul")
+def _BatchMatMul(op, grad):
+  """Returns the gradient of x and y given the gradient of x * y."""
+  x = op.inputs[0]
+  y = op.inputs[1]
+  adj_x = op.get_attr("adj_x")
+  adj_y = op.get_attr("adj_y")
+
+  if not adj_x:
+    if not adj_y:
+      grad_x = math_ops.batch_matmul(grad, y, False, True)
+      grad_y = math_ops.batch_matmul(x, grad, True, False)
+    else:
+      grad_x = math_ops.batch_matmul(grad, y, False, False)
+      grad_y = math_ops.batch_matmul(grad, x, True, False)
+  else:
+    if not adj_y:
+      grad_x = math_ops.batch_matmul(y, grad, False, True)
+      grad_y = math_ops.batch_matmul(x, grad, False, False)
+    else:
+      grad_x = math_ops.batch_matmul(y, grad, True, True)
+      grad_y = math_ops.batch_matmul(grad, x, True, True)
+
+  return grad_x, grad_y
+
+
+ops.NoGradient("Range")
+ops.NoGradient("LinSpace")
+
+
+@ops.RegisterGradient("Complex")
+def _ComplexGrad(_, grad):
+  """Returns the real and imaginary components of 'grad', respectively."""
+  return math_ops.real(grad), math_ops.imag(grad)
+
+
+@ops.RegisterGradient("Real")
+def _RealGrad(_, grad):
+  """Returns 'grad' as the real part and set the imaginary part 0."""
+  zero = constant_op.constant(0, dtype=grad.dtype)
+  return math_ops.complex(grad, zero)
+
+
+@ops.RegisterGradient("Imag")
+def _ImagGrad(_, grad):
+  """Returns 'grad' as the imaginary part and set the real part 0."""
+  zero = constant_op.constant(0, dtype=grad.dtype)
+  return math_ops.complex(zero, grad)
+
+
+@ops.RegisterGradient("Conj")
+def _ConjGrad(_, grad):
+  """Returns the complex conjugate of grad."""
+  return math_ops.conj(grad)
+
+
+@ops.RegisterGradient("Cast")
+def _CastGrad(op, grad):
+  t = [types.float32, types.float64, types.bfloat16]
+  src_type = op.inputs[0].dtype.base_dtype
+  dst_type = grad.dtype.base_dtype
+  if src_type in t and dst_type in t:
+    return math_ops.cast(grad, src_type)
+  else:
+    return None
diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
new file mode 100644
index 0000000000..d96320e96e
--- /dev/null
+++ b/tensorflow/python/ops/math_ops.py
@@ -0,0 +1,1201 @@
+"""## Arithmetic Operators
+
+TensorFlow provides several operations that you can use to add basic arithmetic
+operators to your graph.
+
+@@add
+@@sub
+@@mul
+@@div
+@@mod
+
+## Basic Math Functions
+
+TensorFlow provides several operations that you can use to add basic
+mathematical functions to your graph.
+
+@@add_n
+@@abs
+@@neg
+@@sign
+@@inv
+@@square
+@@round
+@@sqrt
+@@rsqrt
+@@pow
+@@exp
+@@log
+@@ceil
+@@floor
+@@maximum
+@@minimum
+@@cos
+@@sin
+
+## Matrix Math Functions
+
+TensorFlow provides several operations that you can use to add basic
+mathematical functions for matrices to your graph.
+
+@@diag
+@@transpose
+
+@@matmul
+@@batch_matmul
+
+@@matrix_determinant
+@@batch_matrix_determinant
+
+@@matrix_inverse
+@@batch_matrix_inverse
+
+@@cholesky
+@@batch_cholesky
+
+## Complex Number Functions
+
+TensorFlow provides several operations that you can use to add complex number
+functions to your graph.
+
+@@complex
+@@complex_abs
+@@conj
+@@imag
+@@real
+
+## Reduction
+
+TensorFlow provides several operations that you can use to perform
+common math computations that reduce various dimensions of a tensor.
+
+@@reduce_sum
+@@reduce_prod
+@@reduce_min
+@@reduce_max
+@@reduce_mean
+@@reduce_all
+@@reduce_any
+
+@@accumulate_n
+
+## Segmentation
+
+TensorFlow provides several operations that you can use to perform common
+math computations on tensor segments.
+Here a segmentation is a partitioning of a tensor along
+the first dimension, i.e. it  defines a mapping from the first dimension onto
+`segment_ids`. The `segment_ids` tensor should be the size of
+the first dimension, `d0`, with consecutive IDs in the range `0` to `k`,
+where `k<d0`.
+In particular, a segmentation of a matrix tensor is a mapping of rows to
+segments.
+
+For example:
+
+```python
+c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]])
+tf.segment_sum(c, tf.constant([0, 0, 1]))
+  ==>  [[0 0 0 0]
+        [5 6 7 8]]
+```
+
+@@segment_sum
+@@segment_prod
+@@segment_min
+@@segment_max
+@@segment_mean
+
+@@unsorted_segment_sum
+
+@@sparse_segment_sum
+@@sparse_segment_mean
+
+
+## Sequence Comparison and Indexing
+
+TensorFlow provides several operations that you can use to add sequence
+comparison and index extraction to your graph. You can use these operations to
+determine sequence differences and determine the indexes of specific values in
+a tensor.
+
+@@argmin
+@@argmax
+
+@@listdiff
+@@where
+@@unique
+
+@@edit_distance
+
+@@invert_permutation
+"""
+import itertools
+
+import tensorflow.python.platform
+
+import numpy as np
+
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.framework import types
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import common_shapes
+from tensorflow.python.ops import gen_math_ops
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import gen_state_ops
+# pylint: disable=wildcard-import,undefined-variable
+from tensorflow.python.ops.gen_math_ops import *
+
+
+# Aliases for some automatically-generated names.
+argmax = gen_math_ops.arg_max
+argmin = gen_math_ops.arg_min
+linspace = gen_math_ops.lin_space
+
+
+# pylint: disable=anomalous-backslash-in-string,protected-access
+def abs(x, name=None):
+  """Computes the absolute value of a tensor.
+
+  Given a tensor of real numbers `x`, this operation returns a tensor
+  containing the absolute value of each element in `x`. For example, if x is
+  an input element and y is an output element, this operation computes
+  \\\\(y = |x|\\\\).
+
+  See [`tf.complex_abs()`](#tf_complex_abs) to compute the absolute value of a complex
+  number.
+
+  Args:
+    x: A `Tensor` of type `float`, `double`, `int32`, or `int64`.
+    name: A name for the operation (optional).
+
+  Returns:
+     A `Tensor` the same size and type as `x` with absolute values.
+  """
+  with ops.op_scope([x], name, "Abs") as name:
+    x = ops.convert_to_tensor(x, name="x")
+    if x.dtype == types.complex64:
+      return gen_math_ops.complex_abs(x, name=name)
+    return gen_math_ops._abs(x, name=name)
+
+
+
+def pow(x, y, name=None):
+  """Computes the power of one value to another.
+
+  Given a tensor `x` and a tensor `y`, this operation computes \\\\(x^y\\\\) for
+  corresponding elements in `x` and `y`. For example:
+
+  ```
+  # tensor 'x' is [[2, 2]], [3, 3]]
+  # tensor 'y' is [[8, 16], [2, 3]]
+  tf.pow(x, y) ==> [[256, 65536], [9, 27]]
+  ```
+
+  Args:
+    x: A `Tensor` of type `float`, `double`, `int32`, `complex64`, or `int64`.
+    y: A `Tensor` of type `float`, `double`, `int32`, `complex64`, or `int64`.
+    name: A name for the operation (optional).
+
+  Returns:
+    A `Tensor`.
+  """
+  with ops.op_scope([x], name, "Pow") as name:
+    return gen_math_ops._pow(x, y, name=name)
+
+
+def complex(real, imag, name=None):
+  """Converts two real numbers to a complex number.
+
+  Given a tensor `real` representing the real part of a complex number, and a
+  tensor `imag` representing the imaginary part of a complex number, this
+  operation computes complex numbers elementwise of the form \\\\(a + bj\\\\),
+  where *a* represents the `real` part and *b* represents the `imag` part.
+
+  The input tensors `real` and `imag` must be the same shape.
+
+  For example:
+
+  ```
+  # tensor 'real' is [2.25, 3.25]
+  # tensor `imag` is [4.75, 5.75]
+  tf.complex(real, imag) ==> [[2.25 + 4.74j], [3.25 + 5.75j]]
+  ```
+
+  Args:
+    real: A `Tensor` of type `float`.
+    imag: A `Tensor` of type `float`.
+    name: A name for the operation (optional).
+
+  Returns:
+    A `Tensor` of type `complex64`.
+  """
+  with ops.op_scope([real, imag], name, "Complex") as name:
+    return gen_math_ops._complex(real, imag, name=name)
+
+
+def round(x, name=None):
+  """Rounds the values of a tensor to the nearest integer, element-wise.
+
+  For example:
+
+  ```python
+  # 'a' is [0.9, 2.5, 2.3, -4.4]
+  tf.round(a) ==> [ 1.0, 3.0, 2.0, -4.0 ]
+  ```
+
+  Args:
+    x: A `Tensor` of type `float` or `double`.
+    name: A name for the operation (optional).
+
+  Returns:
+    A `Tensor` of same shape and type as `x`.
+  """
+  x = ops.convert_to_tensor(x, name="x")
+  if x.dtype.is_integer:
+    return x
+  else:
+    return floor(x + 0.5, name=name)
+
+
+def cast(x, dtype, name=None):
+  """Casts a tensor to a new type.
+
+  The operation casts `x` (in case of `Tensor`) or `x.values`
+  (in case of `SparseTensor`) to `dtype`.
+
+  For example:
+
+  ```python
+  # tensor `a` is [1.8, 2.2], dtype=tf.float
+  tf.cast(a, tf.int32) ==> [1, 2]  # dtype=tf.int32
+  ```
+
+  Args:
+    x: A `Tensor` or `SparseTensor`.
+    dtype: The destination type.
+    name: A name for the operation (optional).
+
+  Returns:
+    A `Tensor` or `SparseTensor` with same shape as `x`.
+
+  Raises:
+    TypeError: If `x` cannot be cast to the `dtype`.
+  """
+  with ops.op_scope([x], name, "Cast") as name:
+    if isinstance(x, ops.SparseTensor):
+      values_cast = cast(x.values, dtype, name=name)
+      return ops.SparseTensor(x.indices, values_cast, x.shape)
+    else:
+      # TODO(mdevin): Handle what Josh said.
+      #
+      # Could return ops.convert_to_tensor(x, dtype=dtype, ...)  here, but that
+      # allows some conversions that cast() can't do, e.g.  casting numbers to
+      # strings.
+      x = ops.convert_to_tensor(x, name="x")
+      if x.dtype.base_dtype == dtype:
+        return x
+      return gen_math_ops.cast(x, dtype, name=name)
+
+
+def to_float(x, name="ToFloat"):
+  """Casts a tensor to type `float32`.
+
+  Args:
+    x: A `Tensor` or `SparseTensor`.
+    name: A name for the operation (optional).
+
+  Returns:
+    A `Tensor` or `SparseTensor` with same shape as `x` with type `float32`.
+
+  Raises:
+    TypeError: If `x` cannot be cast to the `float32`.
+  """
+  return cast(x, types.float32, name=name)
+
+
+def to_double(x, name="ToDouble"):
+  """Casts a tensor to type `float64`.
+
+  Args:
+    x: A `Tensor` or `SparseTensor`.
+    name: A name for the operation (optional).
+
+  Returns:
+    A `Tensor` or `SparseTensor` with same shape as `x` with type `float64`.
+
+  Raises:
+    TypeError: If `x` cannot be cast to the `float64`.
+  """
+  return cast(x, types.float64, name=name)
+
+
+def to_int32(x, name="ToInt32"):
+  """Casts a tensor to type `int32`.
+
+  Args:
+    x: A `Tensor` or `SparseTensor`.
+    name: A name for the operation (optional).
+
+  Returns:
+    A `Tensor` or `SparseTensor` with same shape as `x` with type `int32`.
+
+  Raises:
+    TypeError: If `x` cannot be cast to the `int32`.
+  """
+  return cast(x, types.int32, name=name)
+
+
+def to_int64(x, name="ToInt64"):
+  """Casts a tensor to type `int64`.
+
+  Args:
+    x: A `Tensor` or `SparseTensor`.
+    name: A name for the operation (optional).
+
+  Returns:
+    A `Tensor` or `SparseTensor` with same shape as `x` with type `int64`.
+
+  Raises:
+    TypeError: If `x` cannot be cast to the `int64`.
+  """
+  return cast(x, types.int64, name=name)
+
+
+def to_bfloat16(x, name="ToBFloat16"):
+  """Casts a tensor to type `bfloat16`.
+
+  Args:
+    x: A `Tensor` or `SparseTensor`.
+    name: A name for the operation (optional).
+
+  Returns:
+    A `Tensor` or `SparseTensor` with same shape as `x` with type `bfloat16`.
+
+  Raises:
+    TypeError: If `x` cannot be cast to the `bfloat16`.
+  """
+  return cast(x, types.bfloat16, name=name)
+
+
+ops.Tensor._override_operator("__neg__", neg)
+ops.Tensor._override_operator("__abs__", abs)
+# __invert__ corresponds to the ~ operator.  Here we follow the numpy convention
+# ~ marks an elementwise bit-wise inverse.  This is only implemented for boolean
+# tensors and will throw a TypeError if used on nonboolean arrays
+ops.Tensor._override_operator("__invert__", logical_not)
+
+
+def _OverrideBinaryOperatorHelper(func, op_name):
+  """Register operators with different tensor and scalar versions.
+
+  Args:
+    func: the operator
+    op_name: name of the operator being overridden
+  """
+
+  def binary_op_wrapper(x, y):
+    with ops.op_scope([x, y], None, op_name) as name:
+      assert isinstance(x, ops.Tensor)
+      y = ops.convert_to_tensor(y, dtype=x.dtype.base_dtype, name="y")
+      return func(x, y, name=name)
+
+  ops.Tensor._override_operator("__%s__" % op_name, binary_op_wrapper)
+  del binary_op_wrapper
+
+  def r_binary_op_wrapper(y, x):
+    with ops.op_scope([x, y], None, op_name) as name:
+      assert isinstance(y, ops.Tensor)
+      x = ops.convert_to_tensor(x, dtype=y.dtype.base_dtype, name="x")
+      return func(x, y, name=name)
+
+  ops.Tensor._override_operator("__r%s__" % op_name, r_binary_op_wrapper)
+  del r_binary_op_wrapper
+
+
+_OverrideBinaryOperatorHelper(add, "add")
+_OverrideBinaryOperatorHelper(sub, "sub")
+_OverrideBinaryOperatorHelper(mul, "mul")
+_OverrideBinaryOperatorHelper(div, "div")
+_OverrideBinaryOperatorHelper(mod, "mod")
+
+
+def logical_xor(x, y, name="LogicalXor"):
+  """x ^ y = (x | y) & ~(x & y)."""
+  # TODO(alemi) Make this a cwise op if people end up relying on it.
+  return logical_and(logical_or(x, y), logical_not(logical_and(x, y)),
+                     name=name)
+
+_OverrideBinaryOperatorHelper(logical_and, "and")
+_OverrideBinaryOperatorHelper(logical_or, "or")
+_OverrideBinaryOperatorHelper(logical_xor, "xor")
+
+ops.Tensor._override_operator("__lt__", less)
+ops.Tensor._override_operator("__le__", less_equal)
+ops.Tensor._override_operator("__gt__", greater)
+ops.Tensor._override_operator("__ge__", greater_equal)
+
+
+def range(start, limit, delta=1, name="range"):
+  """Creates a sequence of integers.
+
+  This operation creates a sequence of integers that begins at `start` and
+  extends by increments of `delta` up to but not including `limit`.
+
+  For example:
+
+  ```
+  # 'start' is 3
+  # 'limit' is 18
+  # 'delta' is 3
+  tf.range(start, limit, delta) ==> [3, 6, 9, 12, 15]
+  ```
+
+  Args:
+    start: A 0-D (scalar) of type `int32`. First entry in sequence.
+    limit: A 0-D (scalar) of type `int32`. Upper limit of sequence,
+      exclusive.
+    delta: A 0-D `Tensor` (scalar) of type `int32`. Optional. Default is 1.
+      Number that increments `start`.
+    name: A name for the operation (optional).
+
+  Returns:
+    An 1-D `int32` `Tensor`.
+  """
+  return gen_math_ops._range(start, limit, delta, name=name)
+
+
+@ops.RegisterShape("Range")
+def _RangeShape(op):
+  start_value = tensor_util.ConstantValue(op.inputs[0])
+  limit_value = tensor_util.ConstantValue(op.inputs[1])
+  delta_value = tensor_util.ConstantValue(op.inputs[2])
+  if start_value is None or limit_value is None or delta_value is None:
+    return [tensor_shape.vector(None)]
+  else:
+    return [tensor_shape.vector(
+        (limit_value - start_value + delta_value - 1) / delta_value)]
+
+
+# Reduction operations
+def _ReductionDims(x, reduction_indices):
+  """Returns range(0, rank(x)) if reduction_indices is None."""
+  if reduction_indices is not None:
+    return reduction_indices
+  else:
+    return range(0, array_ops.rank(x))
+
+
+def reduce_sum(input_tensor, reduction_indices=None, keep_dims=False,
+               name=None):
+  """Computes the sum of elements across dimensions of a tensor.
+
+  Reduces `input_tensor` along the dimensions given in `reduction_indices`.
+  Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each
+  entry in `reduction_indices`. If `keep_dims` is true, the reduced dimensions
+  are retained with length 1.
+
+  If `reduction_indices` has no entries, all dimensions are reduced, and a
+  tensor with a single element is returned.
+
+  For example:
+
+  ```python
+  # 'x' is [[1, 1, 1]]
+  #         [1, 1, 1]]
+  tf.reduce_sum(x) ==> 6
+  tf.reduce_sum(x, 0) ==> [2, 2, 2]
+  tf.reduce_sum(x, 1) ==> [3, 3]
+  tf.reduce_sum(x, 1, keep_dims=True) ==> [[3], [3]]
+  tf.reduce_sum(x, [0, 1]) ==> 6
+  ```
+
+  Args:
+    input_tensor: The tensor to reduce. Should have numeric type.
+    reduction_indices: The dimensions to reduce. If `None` (the defaut),
+      reduces all dimensions.
+    keep_dims: If true, retains reduced dimensions with length 1.
+    name: A name for the operation (optional).
+
+  Returns:
+    The reduced tensor.
+  """
+  return gen_math_ops._sum(input_tensor, _ReductionDims(input_tensor,
+                                                        reduction_indices),
+                           keep_dims, name=name)
+
+
+def reduce_mean(input_tensor, reduction_indices=None, keep_dims=False,
+                name=None):
+  """Computes the mean of elements across dimensions of a tensor.
+
+  Reduces `input_tensor` along the dimensions given in `reduction_indices`.
+  Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each
+  entry in `reduction_indices`. If `keep_dims` is true, the reduced dimensions
+  are retained with length 1.
+
+  If `reduction_indices` has no entries, all dimensions are reduced, and a
+  tensor with a single element is returned.
+
+  For example:
+
+  ```python
+  # 'x' is [[1., 1. ]]
+  #         [2., 2.]]
+  tf.reduce_mean(x) ==> 1.5
+  tf.reduce_mean(x, 0) ==> [1.5, 1.5]
+  tf.reduce_mean(x, 1) ==> [1.,  2.]
+  ```
+
+  Args:
+    input_tensor: The tensor to reduce. Should have numeric type.
+    reduction_indices: The dimensions to reduce. If `None` (the defaut),
+      reduces all dimensions.
+    keep_dims: If true, retains reduced dimensions with length 1.
+    name: A name for the operation (optional).
+
+  Returns:
+    The reduced tensor.
+  """
+  return gen_math_ops._mean(input_tensor, _ReductionDims(input_tensor,
+                                                         reduction_indices),
+                            keep_dims, name=name)
+
+
+def reduce_prod(input_tensor, reduction_indices=None, keep_dims=False,
+                name=None):
+  """Computes the product of elements across dimensions of a tensor.
+
+  Reduces `input_tensor` along the dimensions given in `reduction_indices`.
+  Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each
+  entry in `reduction_indices`. If `keep_dims` is true, the reduced dimensions
+  are retained with length 1.
+
+  If `reduction_indices` has no entries, all dimensions are reduced, and a
+  tensor with a single element is returned.
+
+  Args:
+    input_tensor: The tensor to reduce. Should have numeric type.
+    reduction_indices: The dimensions to reduce. If `None` (the defaut),
+      reduces all dimensions.
+    keep_dims: If true, retains reduced dimensions with length 1.
+    name: A name for the operation (optional).
+
+  Returns:
+    The reduced tensor.
+  """
+  return gen_math_ops._prod(input_tensor, _ReductionDims(input_tensor,
+                                                         reduction_indices),
+                            keep_dims, name=name)
+
+
+def reduce_min(input_tensor, reduction_indices=None, keep_dims=False,
+               name=None):
+  """Computes the minimum of elements across dimensions of a tensor.
+
+  Reduces `input_tensor` along the dimensions given in `reduction_indices`.
+  Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each
+  entry in `reduction_indices`. If `keep_dims` is true, the reduced dimensions
+  are retained with length 1.
+
+  If `reduction_indices` has no entries, all dimensions are reduced, and a
+  tensor with a single element is returned.
+
+  Args:
+    input_tensor: The tensor to reduce. Should have numeric type.
+    reduction_indices: The dimensions to reduce. If `None` (the defaut),
+      reduces all dimensions.
+    keep_dims: If true, retains reduced dimensions with length 1.
+    name: A name for the operation (optional).
+
+  Returns:
+    The reduced tensor.
+  """
+  return gen_math_ops._min(input_tensor, _ReductionDims(input_tensor,
+                                                        reduction_indices),
+                           keep_dims, name=name)
+
+
+def reduce_max(input_tensor, reduction_indices=None, keep_dims=False,
+               name=None):
+  """Computes the maximum of elements across dimensions of a tensor.
+
+  Reduces `input_tensor` along the dimensions given in `reduction_indices`.
+  Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each
+  entry in `reduction_indices`. If `keep_dims` is true, the reduced dimensions
+  are retained with length 1.
+
+  If `reduction_indices` has no entries, all dimensions are reduced, and a
+  tensor with a single element is returned.
+
+  Args:
+    input_tensor: The tensor to reduce. Should have numeric type.
+    reduction_indices: The dimensions to reduce. If `None` (the defaut),
+      reduces all dimensions.
+    keep_dims: If true, retains reduced dimensions with length 1.
+    name: A name for the operation (optional).
+
+  Returns:
+    The reduced tensor.
+  """
+  return gen_math_ops._max(input_tensor, _ReductionDims(input_tensor,
+                                                        reduction_indices),
+                           keep_dims, name=name)
+
+
+def reduce_all(input_tensor, reduction_indices=None, keep_dims=False,
+               name=None):
+  """Computes the "logical and" of elements across dimensions of a tensor.
+
+  Reduces `input_tensor` along the dimensions given in `reduction_indices`.
+  Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each
+  entry in `reduction_indices`. If `keep_dims` is true, the reduced dimensions
+  are retained with length 1.
+
+  If `reduction_indices` has no entries, all dimensions are reduced, and a
+  tensor with a single element is returned.
+
+  For example:
+
+  ```python
+  # 'x' is [[True,  True]]
+  #         [False, False]]
+  tf.reduce_all(x) ==> False
+  tf.reduce_all(x, 0) ==> [False, False]
+  tf.reduce_all(x, 1) ==> [True, False]
+  ```
+
+  Args:
+    input_tensor: The boolean tensor to reduce.
+    reduction_indices: The dimensions to reduce. If `None` (the defaut),
+      reduces all dimensions.
+    keep_dims: If true, retains reduced dimensions with length 1.
+    name: A name for the operation (optional).
+
+  Returns:
+    The reduced tensor.
+  """
+  return gen_math_ops._all(input_tensor, _ReductionDims(input_tensor,
+                                                        reduction_indices),
+                           keep_dims, name=name)
+
+
+def reduce_any(input_tensor, reduction_indices=None, keep_dims=False,
+               name=None):
+  """Computes the "logical or" of elements across dimensions of a tensor.
+
+  Reduces `input_tensor` along the dimensions given in `reduction_indices`.
+  Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each
+  entry in `reduction_indices`. If `keep_dims` is true, the reduced dimensions
+  are retained with length 1.
+
+  If `reduction_indices` has no entries, all dimensions are reduced, and a
+  tensor with a single element is returned.
+
+  For example:
+
+  ```python
+  # 'x' is [[True,  True]]
+  #         [False, False]]
+  tf.reduce_any(x) ==> True
+  tf.reduce_any(x, 0) ==> [True, True]
+  tf.reduce_any(x, 1) ==> [True, False]
+  ```
+
+  Args:
+    input_tensor: The boolean tensor to reduce.
+    reduction_indices: The dimensions to reduce. If `None` (the defaut),
+      reduces all dimensions.
+    keep_dims: If true, retains reduced dimensions with length 1.
+    name: A name for the operation (optional).
+
+  Returns:
+    The reduced tensor.
+  """
+  return gen_math_ops._any(input_tensor, _ReductionDims(input_tensor,
+                                                        reduction_indices),
+                           keep_dims, name=name)
+
+
+def matmul(a, b,
+           transpose_a=False, transpose_b=False,
+           a_is_sparse=False, b_is_sparse=False,
+           name=None):
+  """Multiplies matrix `a` by matrix `b`, producing `a` * `b`.
+
+  The inputs must be two-dimensional matrices, with matching inner dimensions,
+  possibly after transposition.
+
+  Both matrices must be of the same type. The supported types are:
+  `float`, `double`, `int32`, `complex64`.
+
+  Either matrix can be transposed on the fly by setting the corresponding flag
+  to `True`. This is `False` by default.
+
+  If one or both of the matrices contain a lot of zeros, a more efficient
+  multiplication algorithm can be used by setting the corresponding
+  `a_is_sparse` or `b_is_sparse` flag to `True`. These are `False` by default.
+
+  For example:
+
+  ```python
+  # 2-D tensor `a`
+  a = tf.constant([1, 2, 3, 4, 5, 6], shape=[2, 3]) => [[1. 2. 3.]
+                                                        [4. 5. 6.]]
+  # 2-D tensor `b`
+  b = tf.constant([7, 8, 9, 10, 11, 12], shape=[3, 2]) => [[7. 8.]
+                                                           [9. 10.]
+                                                           [11. 12.]]
+  c = tf.matmul(a, b) => [[58 64]
+                          [139 154]]
+  ```
+
+  Args:
+    a: `Tensor` of type `float`, `double`, `int32` or `complex64`.
+    b: `Tensor` with same type as `a`.
+    transpose_a: If `True`, `a` is transposed before multiplication.
+    transpose_b: If `True`, `b` is transposed before multiplication.
+    a_is_sparse: If `True`, `a` is treated as a sparse matrix.
+    b_is_sparse: If `True`, `b` is treated as a sparse matrix.
+    name: Name for the operation (optional).
+
+  Returns:
+    A `Tensor` of the same type as `a`.
+  """
+  with ops.op_scope([a, b], name, "MatMul") as name:
+    a = ops.convert_to_tensor(a, name="a")
+    b = ops.convert_to_tensor(b, name="b")
+    if a.dtype == types.float32 and (a_is_sparse or b_is_sparse):
+      return sparse_matmul(a, b,
+                           transpose_a=transpose_a,
+                           transpose_b=transpose_b,
+                           a_is_sparse=a_is_sparse,
+                           b_is_sparse=b_is_sparse,
+                           name=name)
+    else:
+      return gen_math_ops._mat_mul(a, b,
+                                   transpose_a=transpose_a,
+                                   transpose_b=transpose_b,
+                                   name=name)
+
+sparse_matmul = gen_math_ops._sparse_mat_mul
+batch_matmul = gen_math_ops._batch_mat_mul
+
+ops.RegisterShape("MatMul")(common_shapes.matmul_shape)
+ops.RegisterShape("SparseMatMul")(common_shapes.matmul_shape)
+
+
+def _as_indexed_slices(x):
+  """Convert 'x' to IndexedSlices.
+
+  Convert a dense Tensor to a block-sparse IndexedSlices.
+
+  Args:
+    x: Either a Tensor object, or an IndexedSlices object.
+
+  Returns:
+    An IndexedSlices object.
+
+  Raises:
+    TypeError: If 'x' is not a Tensor or an IndexedSlices object.
+  """
+  # TODO(mdevin): op_scope
+  if not isinstance(x, (ops.Tensor, ops.IndexedSlices)):
+    raise TypeError("Not a Tensor or IndexedSlices: %s" % type(x))
+  if isinstance(x, ops.IndexedSlices):
+    return x
+  x_shape = array_ops.shape(x)
+  return ops.IndexedSlices(x, range(0, x_shape[0]), x_shape)
+
+
+def _as_indexed_slices_list(inputs):
+  """Convert all elements of 'inputs' to IndexedSlices.
+
+  Additionally, homogenize the types of all the indices to
+  either int32 or int64.
+
+  Args:
+    inputs: List containing either Tensor or IndexedSlices objects.
+
+  Returns:
+    A list of IndexedSlices objects.
+
+  Raises:
+    TypeError: If 'inputs' is not a list or a tuple.
+  """
+  if not isinstance(inputs, (list, tuple)):
+    raise TypeError("Expected a list or tuple, not a %s" % type(inputs))
+  outputs = [_as_indexed_slices(i) for i in inputs]
+  with_int32_index = [o.indices for o in outputs
+                      if o.indices.dtype == types.int32]
+  if not with_int32_index or len(with_int32_index) == len(outputs):
+    return outputs
+  casted_outputs = []
+  for o in outputs:
+    if o.indices.dtype == types.int32:
+      casted_outputs.append(
+          ops.IndexedSlices(o.values, cast(o.indices, types.int64),
+                            o.dense_shape))
+    else:
+      casted_outputs.append(o)
+  return casted_outputs
+
+
+def accumulate_n(inputs, shape=None, tensor_dtype=None, name=None):
+  """Returns the element-wise sum of a list of tensors.
+
+  Optionally, pass `shape` and `tensor_dtype` for shape and type checking,
+  otherwise, these are inferred.
+
+  For example:
+
+  ```python
+  # tensor 'a' is [[1, 2], [3, 4]
+  # tensor `b` is [[5, 0], [0, 6]]
+  tf.accumulate_n([a, b, a]) ==> [[7, 4], [6, 14]]
+
+  # Explicitly pass shape and type
+  tf.accumulate_n([a, b, a], shape=[2, 2], tensor_dtype=tf.int32)
+    ==> [[7, 4], [6, 14]]
+  ```
+
+  Args:
+    inputs: A list of `Tensor` objects, each with same shape and type.
+    shape: Shape of elements of `inputs`.
+    tensor_dtype: The type of `inputs`.
+    name: A name for the operation (optional).
+
+  Returns:
+    A `Tensor` of same shape and type as the elements of `inputs`.
+
+  Raises:
+    ValueError: If `inputs` don't all have same shape and dtype or the shape
+    cannot be inferred.
+  """
+  if tensor_dtype is None:
+    if not inputs or not isinstance(inputs, (list, tuple)):
+      raise ValueError("inputs must be a list of at least one Tensor with the "
+                       "same dtype and shape")
+    inputs = ops.convert_n_to_tensor_or_indexed_slices(inputs)
+    if not all(isinstance(x, ops.Tensor) for x in inputs):
+      raise ValueError("inputs must be a list of at least one Tensor with the "
+                       "same dtype and shape")
+    if not all(x.dtype == inputs[0].dtype for x in inputs):
+      raise ValueError("inputs must be a list of at least one Tensor with the "
+                       "same dtype and shape")
+    tensor_dtype = inputs[0].dtype
+  if shape is not None:
+    shape = tensor_shape.as_shape(shape)
+  else:
+    shape = tensor_shape.unknown_shape()
+    for input_tensor in inputs:
+      if isinstance(input_tensor, ops.Tensor):
+        shape = shape.merge_with(input_tensor.get_shape())
+  if not shape.is_fully_defined():
+    # TODO(pbar): Make a version of assign_add that accepts an uninitialized
+    # lvalue, and takes its shape from that? This would allow accumulate_n to
+    # work in all situations that add_n currently works.
+    raise ValueError("Cannot infer the shape of the accumulator for "
+                     "accumulate_n. Pass the shape argument, or set the shape "
+                     "of at least one of the inputs.")
+  with ops.op_scope(inputs, name, "AccumulateN") as name:
+    var = gen_state_ops._temporary_variable(shape=shape, dtype=tensor_dtype)
+    var_name = var.op.name
+    var = state_ops.assign(var, array_ops.zeros_like(inputs[0]))
+    update_ops = []
+    for input_tensor in inputs:
+      op = state_ops.assign_add(var, input_tensor, use_locking=True)
+      update_ops.append(op)
+    with ops.control_dependencies(update_ops):
+      return gen_state_ops._destroy_temporary_variable(var,
+                                                       var_name=var_name,
+                                                       name=name)
+
+
+@ops.RegisterShape("BatchMatMul")
+def _BatchMatMulShape(op):
+  """Shape function for BatchMatMul op."""
+  a_shape = op.inputs[0].get_shape()
+  adj_a = op.get_attr("adj_x")
+  b_shape = op.inputs[1].get_shape()
+  adj_b = op.get_attr("adj_y")
+  if not a_shape.is_fully_defined() or not b_shape.is_fully_defined():
+    return [tensor_shape.unknown_shape()]
+  batch_dims = a_shape[:-2].merge_with(b_shape[:-2])
+  output_rows = a_shape[-1] if adj_a else a_shape[-2]
+  output_cols = b_shape[-2] if adj_b else b_shape[-1]
+  inner_a = a_shape[-2] if adj_a else a_shape[-1]
+  inner_b = b_shape[-1] if adj_b else b_shape[-2]
+  inner_a.assert_is_compatible_with(inner_b)
+  return [batch_dims.concatenate([output_rows, output_cols])]
+
+
+def sigmoid(x, name=None):
+  """Computes sigmoid of `x` element-wise.
+
+  Specifically, `y = 1 / (1 + exp(-x))`.
+
+  Args:
+    x: A Tensor with type `float`, `double`, `int32`, `complex64`, `int64`,
+      or `qint32`.
+    name: A name for the operation (optional).
+
+  Returns:
+    A Tensor with the same type as `x` if `x.dtype != qint32`
+      otherwise the return type is `quint8`.
+  """
+  with ops.op_scope([x], name, "Sigmoid") as name:
+    x = ops.convert_to_tensor(x, name="x")
+    return gen_math_ops._sigmoid(x, name=name)
+
+
+def tanh(x, name=None):
+  """Computes hyperbolic tangent of `x` element-wise.
+
+  Args:
+    x: A Tensor with type `float`, `double`, `int32`, `complex64`, `int64`,
+      or `qint32`.
+    name: A name for the operation (optional).
+
+  Returns:
+    A Tensor with the same type as `x` if `x.dtype != qint32` otherwise
+      the return type is `quint8`.
+  """
+  with ops.op_scope([x], name, "Tanh") as name:
+    x = ops.convert_to_tensor(x, name="x")
+    return gen_math_ops._tanh(x, name=name)
+
+
+ops.RegisterShape("Abs")(common_shapes.unchanged_shape)
+ops.RegisterShape("Ceil")(common_shapes.unchanged_shape)
+ops.RegisterShape("Conj")(common_shapes.unchanged_shape)
+ops.RegisterShape("Cos")(common_shapes.unchanged_shape)
+ops.RegisterShape("Exp")(common_shapes.unchanged_shape)
+ops.RegisterShape("Floor")(common_shapes.unchanged_shape)
+ops.RegisterShape("Imag")(common_shapes.unchanged_shape)
+ops.RegisterShape("Inv")(common_shapes.unchanged_shape)
+ops.RegisterShape("IsFinite")(common_shapes.unchanged_shape)
+ops.RegisterShape("IsInf")(common_shapes.unchanged_shape)
+ops.RegisterShape("IsNan")(common_shapes.unchanged_shape)
+ops.RegisterShape("Log")(common_shapes.unchanged_shape)
+ops.RegisterShape("LogicalNot")(common_shapes.unchanged_shape)
+ops.RegisterShape("Neg")(common_shapes.unchanged_shape)
+ops.RegisterShape("Real")(common_shapes.unchanged_shape)
+ops.RegisterShape("Rsqrt")(common_shapes.unchanged_shape)
+ops.RegisterShape("Sign")(common_shapes.unchanged_shape)
+ops.RegisterShape("Sin")(common_shapes.unchanged_shape)
+ops.RegisterShape("Sqrt")(common_shapes.unchanged_shape)
+ops.RegisterShape("Square")(common_shapes.unchanged_shape)
+ops.RegisterShape("Sigmoid")(common_shapes.unchanged_shape)
+ops.RegisterShape("Tanh")(common_shapes.unchanged_shape)
+ops.RegisterShape("Cast")(common_shapes.unchanged_shape)
+ops.RegisterShape("ComplexAbs")(common_shapes.unchanged_shape)
+
+
+@ops.RegisterShape("Add")
+@ops.RegisterShape("Complex")
+@ops.RegisterShape("Div")
+@ops.RegisterShape("Equal")
+@ops.RegisterShape("Greater")
+@ops.RegisterShape("GreaterEqual")
+@ops.RegisterShape("Less")
+@ops.RegisterShape("LessEqual")
+@ops.RegisterShape("LogicalAnd")
+@ops.RegisterShape("LogicalOr")
+@ops.RegisterShape("Maximum")
+@ops.RegisterShape("Minimum")
+@ops.RegisterShape("Mod")
+@ops.RegisterShape("Mul")
+@ops.RegisterShape("NotEqual")
+@ops.RegisterShape("Pow")
+@ops.RegisterShape("Sub")
+def _BroadcastShape(op):
+  """Common shape function for binary operators that broadcast their inputs."""
+  shape_x = op.inputs[0].get_shape()
+  shape_y = op.inputs[1].get_shape()
+  if shape_x.ndims is None or shape_y.ndims is None:
+    return [tensor_shape.unknown_shape()]
+
+  # To compute the broadcasted dimensions, we zip together shape_x and shape_y,
+  # and pad with 1 to make them the same length.
+  broadcasted_dims = reversed(list(itertools.izip_longest(
+      reversed(shape_x.dims), reversed(shape_y.dims),
+      fillvalue=tensor_shape.Dimension(1))))
+  # Next we combine the dimensions according to the numpy broadcasting rules.
+  # http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html
+  return_dims = []
+  for (dim_x, dim_y) in broadcasted_dims:
+    if dim_x.value is None or dim_y.value is None:
+      # One or both dimensions is unknown. If either dimension is greater than
+      # 1, we assume that the program is correct, and the other dimension will
+      # be broadcast to match it.
+      # TODO(mrry): If we eliminate the shape checks in C++, we must still
+      # assert that the unknown dim is either 1 or the same as the known dim.
+      if dim_x.value is not None and dim_x.value > 1:
+        return_dims.append(dim_x)
+      elif dim_y.value is not None and dim_y.value > 1:
+        return_dims.append(dim_y)
+      else:
+        return_dims.append(None)
+    elif dim_x.value == 1:
+      # We will broadcast dim_x to dim_y.
+      return_dims.append(dim_y)
+    elif dim_y.value == 1:
+      # We will broadcast dim_y to dim_x.
+      return_dims.append(dim_x)
+    elif dim_x.value == dim_y.value:
+      # The dimensions are compatible, so output is the same size in that
+      # dimension.
+      return_dims.append(dim_x.merge_with(dim_y))
+    else:
+      raise ValueError("Incompatible shapes for broadcasting: %s and %s"
+                       % (shape_x, shape_y))
+  return [tensor_shape.TensorShape(return_dims)]
+
+
+@ops.RegisterShape("AddN")
+def _AddNShape(op):
+  merged_shape = tensor_shape.unknown_shape()
+  for input_ in op.inputs:
+    merged_shape = merged_shape.merge_with(input_.get_shape())
+  return [merged_shape]
+
+
+@ops.RegisterShape("Select")
+def _SelectShape(op):
+  # All three inputs must have the same shape.
+  return [op.inputs[0].get_shape()
+          .merge_with(op.inputs[1].get_shape())
+          .merge_with(op.inputs[2].get_shape())]
+
+
+@ops.RegisterShape("ArgMax")
+@ops.RegisterShape("ArgMin")
+def _ArgOpShape(op):
+  """Common shape function for arg-reduction ops."""
+  dimension_shape = op.inputs[1].get_shape()
+  dimension_shape.assert_is_compatible_with(tensor_shape.scalar())
+  input_shape = op.inputs[0].get_shape()
+  if input_shape.ndims is None:
+    return [tensor_shape.unknown_shape()]
+  elif input_shape.ndims <= 1:
+    return [tensor_shape.scalar()]
+
+  dimension = tensor_util.ConstantValue(op.inputs[1])
+  if dimension is None:
+    return [tensor_shape.unknown_shape(ndims=input_shape.ndims - 1)]
+  elif 0 <= dimension and dimension < input_shape.ndims:
+    returned_shape = []
+    for i, dim in enumerate(input_shape.dims):
+      if i != dimension:
+        returned_shape.append(dim)
+    return [tensor_shape.TensorShape(returned_shape)]
+  else:
+    raise ValueError(
+        "dimension (%d) must be in the range [0, %d), where %d is the number "
+        "of dimensions in the input"
+        % (dimension, input_shape.ndims, input_shape.ndims))
+
+
+@ops.RegisterShape("All")
+@ops.RegisterShape("Any")
+@ops.RegisterShape("Max")
+@ops.RegisterShape("Mean")
+@ops.RegisterShape("Min")
+@ops.RegisterShape("Prod")
+@ops.RegisterShape("Sum")
+def _ReductionShape(op):
+  """Common shape function for reduction ops."""
+  input_shape = op.inputs[0].get_shape()
+  reduction_indices = tensor_util.ConstantValue(op.inputs[1])
+  keep_dims = op.get_attr("keep_dims")
+  if reduction_indices is None or input_shape.ndims is None:
+    if keep_dims:
+      return [tensor_shape.unknown_shape(ndims=input_shape.ndims)]
+    else:
+      return [tensor_shape.unknown_shape()]
+
+  # Turn reduction_indices from scalar to vector if necessary
+  reduction_indices = np.ravel(reduction_indices)
+
+  for reduction_index in reduction_indices:
+    if reduction_index < 0 or reduction_index >= input_shape.ndims:
+      raise ValueError("Invalid reduction dimension %d for input with %d "
+                       "dimensions" % (reduction_index, input_shape.ndims))
+
+  returned_dims = []
+  if keep_dims:
+    for i, dim in enumerate(input_shape.dims):
+      if i in reduction_indices:
+        returned_dims.append(1)
+      else:
+        returned_dims.append(dim)
+  else:
+    for i, dim in enumerate(input_shape.dims):
+      if i not in reduction_indices:
+        returned_dims.append(dim)
+  return [tensor_shape.TensorShape(returned_dims)]
+
+
+@ops.RegisterShape("SegmentMax")
+@ops.RegisterShape("SegmentMean")
+@ops.RegisterShape("SegmentMin")
+@ops.RegisterShape("SegmentProd")
+@ops.RegisterShape("SegmentSum")
+def _SegmentReductionShape(op):
+  """Common shape function for segment reduction ops."""
+  data_shape = op.inputs[0].get_shape()
+  segment_ids_shape = op.inputs[1].get_shape()
+  segment_ids_shape.assert_has_rank(1)
+  return [tensor_shape.TensorShape([None]).concatenate(data_shape[1:])]
+
+
+@ops.RegisterShape("SparseSegmentMean")
+@ops.RegisterShape("SparseSegmentSum")
+def _SparseSegmentReductionShape(op):
+  """Common shape function for sparse segment reduction ops."""
+  data_shape = op.inputs[0].get_shape()
+  indices_shape = op.inputs[1].get_shape()
+  indices_shape.assert_has_rank(1)
+  segment_ids_shape = op.inputs[2].get_shape()
+  segment_ids_shape.assert_has_rank(1)
+  indices_shape.assert_is_compatible_with(segment_ids_shape)
+  return [tensor_shape.TensorShape([None]).concatenate(data_shape[1:])]
+
+
+@ops.RegisterShape("SparseSegmentMeanGrad")
+def _SparseSegmentMeanGradShape(op):
+  """Shape function for the SparseSegmentMeanGrad op."""
+  input_shape = op.inputs[0].get_shape()
+  indices_shape = op.inputs[1].get_shape().with_rank(1)
+  unused_segment_ids_shape = op.inputs[2].get_shape().merge_with(indices_shape)
+  unused_output_dim0_shape = op.inputs[3].get_shape().merge_with(
+      tensor_shape.scalar())
+  output_dim0 = tensor_util.ConstantValue(op.inputs[3])
+  if output_dim0 is not None:
+    dim0 = output_dim0[0]
+  else:
+    dim0 = None
+  return [tensor_shape.TensorShape([dim0]).concatenate(input_shape[1:])]
+
+
+@ops.RegisterShape("UnsortedSegmentSum")
+def _UnsortedSegmentSumShape(op):
+  """Shape function for UnsortedSegmentSum."""
+  data_shape = op.inputs[0].get_shape()
+  segment_ids_shape = op.inputs[1].get_shape()
+  mid = segment_ids_shape.ndims
+  if mid is None:
+    return [tensor_shape.unknown_shape()]
+  else:
+    num_segments = tensor_util.ConstantValue(op.inputs[2])
+    return [tensor_shape.TensorShape([num_segments]).concatenate(
+        data_shape[mid:])]
+
+
+@ops.RegisterShape("LinSpace")
+def _LinspaceShape(op):
+  num = tensor_util.ConstantValue(op.inputs[2])
+  return [tensor_shape.vector(num)]
diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py
new file mode 100644
index 0000000000..86ea04f54d
--- /dev/null
+++ b/tensorflow/python/ops/math_ops_test.py
@@ -0,0 +1,68 @@
+"""Tests for tensorflow.ops.math_ops."""
+import math
+
+import tensorflow.python.platform
+
+import numpy as np
+
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import constant_op
+from tensorflow.python.ops import math_ops
+from tensorflow.python.platform import googletest
+
+exp = math.exp
+log = math.log
+
+class ReduceTest(test_util.TensorFlowTestCase):
+
+  def testReduceAllDims(self):
+    x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int32)
+    with self.test_session():
+      y_tf = math_ops.reduce_sum(x).eval()
+      self.assertEqual(y_tf, 21)
+
+class RoundTest(test_util.TensorFlowTestCase):
+
+  def testRounding(self):
+    x = [0.49, 0.7, -0.3, -0.8]
+    for dtype in [np.float32, np.double]:
+      x_np = np.array(x, dtype=dtype)
+      for use_gpu in [True, False]:
+        with self.test_session(use_gpu=use_gpu):
+          x_tf = constant_op.constant(x_np, shape=x_np.shape)
+          y_tf = math_ops.round(x_tf)
+          y_tf_np = y_tf.eval()
+          y_np = np.round(x_np)
+          self.assertAllClose(y_tf_np, y_np, atol=1e-2)
+
+
+class ModTest(test_util.TensorFlowTestCase):
+
+  def testFloat(self):
+    x = [0.5, 0.7, 0.3]
+    for dtype in [np.float32, np.double]:
+      # Test scalar and vector versions.
+      for denom in [x[0], [x[0]] * 3]:
+        x_np = np.array(x, dtype=dtype)
+        with self.test_session():
+          x_tf = constant_op.constant(x_np, shape=x_np.shape)
+          y_tf = math_ops.mod(x_tf, denom)
+          y_tf_np = y_tf.eval()
+          y_np = np.fmod(x_np, denom)
+        self.assertAllClose(y_tf_np, y_np, atol=1e-2)
+
+  def testFixed(self):
+    x = [5, 10, 23]
+    for dtype in [np.int32, np.int64]:
+      # Test scalar and vector versions.
+      for denom in [x[0], x]:
+        x_np = np.array(x, dtype=dtype)
+        with self.test_session():
+          x_tf = constant_op.constant(x_np, shape=x_np.shape)
+          y_tf = math_ops.mod(x_tf, denom)
+          y_tf_np = y_tf.eval()
+          y_np = np.mod(x_np, denom)
+        self.assertAllClose(y_tf_np, y_np)
+
+if __name__ == "__main__":
+  googletest.main()
diff --git a/tensorflow/python/ops/nn.py b/tensorflow/python/ops/nn.py
new file mode 100644
index 0000000000..7a4dc25e8b
--- /dev/null
+++ b/tensorflow/python/ops/nn.py
@@ -0,0 +1,816 @@
+# pylint: disable=wildcard-import,unused-import,g-bad-import-order
+"""## Activation Functions
+
+The activation ops provide different types of nonlinearities for use in
+neural networks.  These include smooth nonlinearities (`sigmoid`,
+`tanh`, and `softplus`), continuous but not everywhere differentiable
+functions (`relu`, `relu6`, and `relu_x`), and random regularization
+(`dropout`).
+
+All activation ops apply componentwise, and produce a tensor of the same
+shape as the input tensor.
+
+@@relu
+@@relu6
+@@softplus
+@@dropout
+@@bias_add
+@@sigmoid
+@@tanh
+
+## Convolution
+
+The convolution ops sweep a 2-D filter over a batch of images, applying the
+filter to each window of each image of the appropriate size.  The different
+ops trade off between generic vs. specific filters:
+
+* `conv2d`: Arbitrary filters that can mix channels together.
+* `depthwise_conv2d`: Filters that operate on each channel independently.
+* `separable_conv2d`: A depthwise spatial filter followed by a pointwise filter.
+
+Note that although these ops are called "convolution", they are strictly
+speaking "cross-correlation" since the filter is combined with an input window
+without reversing the filter.  For details, see [the properties of
+cross-correlation](https://en.wikipedia.org/wiki/Cross-correlation#Properties).
+
+The filter is applied to image patches of the same size as the filter and
+strided according to the `strides` argument.  `strides = [1, 1, 1, 1]` applies
+the filter to a patch at every offset, `strides = [1, 2, 2, 1]` applies the
+filter to every other image patch in each dimension, etc.
+
+Ignoring channels for the moment, the spatial semantics of the convolution ops
+are as follows.  If the 4-D `input` has shape
+`[batch, in_height, in_width, ...]` and the 4-D `filter` has shape
+`[filter_height, filter_width, ...]`, then
+
+    output.shape = [batch,
+                    (in_height - filter_height + 1) / strides[1],
+                    (in_width - filter_width + 1) / strides[2],
+                    ...]
+
+    output[b, i, j, :] =
+        sum_{di, dj} input[b, strides[1] * i + di, strides[2] * j + dj, ...] *
+                     filter[di, dj, ...]
+
+Since `input` is 4-D, each `input[b, i, j, :]` is a vector.  For `conv2d`, these
+vectors are multiplied by the `filter[di, dj, :, :]` matrices to produce new
+vectors.  For `depthwise_conv_2d`, each scalar component `input[b, i, j, k]`
+is multiplied by a vector `filter[di, dj, k]`, and all the vectors are
+concatenated.
+
+In the formula for `output.shape`, the rounding direction depends on padding:
+
+* `padding = 'SAME'`: Round down (only full size windows are considered).
+* `padding = 'VALID'`: Round up (partial windows are included).
+
+@@conv2d
+@@depthwise_conv2d
+@@separable_conv2d
+
+## Pooling
+
+The pooling ops sweep a rectangular window over the input tensor, computing a
+reduction operation for each window (average, max, or max with argmax).  Each
+pooling op uses rectangular windows of size `ksize` separated by offset
+`strides`.  For example, if `strides` is all ones every window is used, if
+`strides` is all twos every other window is used in each dimension, etc.
+
+In detail, the output is
+
+    output[i] = reduce(value[strides * i:strides * i + ksize])
+
+for each tuple of indices `i`.  The output shape is
+
+    output.shape = (value.shape - ksize + 1) / strides
+
+where the rounding direction depends on padding:
+
+* `padding = 'SAME'`: Round down (only full size windows are considered).
+* `padding = 'VALID'`: Round up (partial windows are included).
+
+@@avg_pool
+@@max_pool
+@@max_pool_with_argmax
+
+## Normalization
+
+Normalization is useful to prevent neurons from saturating when inputs may
+have varying scale, and to aid generalization.
+
+@@l2_normalize
+@@local_response_normalization
+@@moments
+
+## Losses
+
+The loss ops measure error between two tensors, or between a tensor and zero.
+These can be used for measuring accuracy of a network in a regression task
+or for regularization purposes (weight decay).
+
+@@l2_loss
+
+## Classification
+
+TensorFlow provides several operations that help you perform classification.
+
+@@sigmoid_cross_entropy_with_logits
+@@softmax
+@@softmax_cross_entropy_with_logits
+
+## Embeddings
+
+TensorFlow provides several operations that help you compute embeddings.
+
+@@embedding_lookup
+@@embedding_lookup_sparse
+
+## Evaluation
+
+The evaluation ops are useful for measuring the performance of a network.
+Since they are nondifferentiable, they are typically used at evaluation time.
+
+@@top_k
+@@in_top_k
+
+## Candidate Sampling
+
+Do you want to train a multiclass or multilabel model with thousands
+or millions of output classes (for example, a language model with a
+large vocabulary)?  Training with a full Softmax is slow in this case,
+since all of the classes are evaluated for every training example.
+Candidate Sampling training algorithms can speed up your step times by
+only considering a small randomly-chosen subset of contrastive classes
+(called candidates) for each batch of training examples.
+
+See our [Candidate Sampling Algorithms Reference]
+(http://www.tensorflow.org/extras/candidate_sampling.pdf)
+
+### Sampled Loss Functions
+
+TensorFlow provides the following sampled loss functions for faster training.
+
+@@nce_loss
+@@sampled_softmax_loss
+
+### Candidate Samplers
+
+TensorFlow provides the following samplers for randomly sampling candidate
+classes when using one of the sampled loss functions above.
+
+@@uniform_candidate_sampler
+@@log_uniform_candidate_sampler
+@@learned_unigram_candidate_sampler
+@@fixed_unigram_candidate_sampler
+
+### Miscellaneous candidate sampling utilities
+
+@@compute_accidental_hits
+
+"""
+
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import types
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import candidate_sampling_ops
+from tensorflow.python.ops import constant_op
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import embedding_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_grad
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops import numerics
+from tensorflow.python.ops import random_ops
+from tensorflow.python.ops import sparse_ops
+from tensorflow.python.ops.math_ops import sigmoid
+from tensorflow.python.ops.math_ops import tanh
+
+# Bring more nn-associated functionality into this package.
+from tensorflow.python.ops.nn_ops import *
+from tensorflow.python.ops.candidate_sampling_ops import *
+from tensorflow.python.ops.embedding_ops import *
+
+
+def sigmoid_cross_entropy_with_logits(logits, targets, name=None):
+  """Computes sigmoid cross entropy given `logits`.
+
+  Measures the probability error in discrete classification tasks in which each
+  class is independent and not mutually exclusive.  For instance, one could
+  perform multilabel classification where a picture can contain both an elephant
+  and a dog at the same time.
+
+  For brevity, let `x = logits`, `z = targets`.  The logistic loss is
+
+      x - x * z + log(1 + exp(-x))
+
+  To ensure stability and avoid overflow, the implementation uses
+
+      max(x, 0) - x * z + log(1 + exp(-abs(x)))
+
+  `logits` and `targets` must have the same type and shape.
+
+  Args:
+    logits: A `Tensor` of type `float32` or `float64`.
+    targets: A `Tensor` of the same type and shape as `logits`.
+    name: A name for the operation (optional).
+
+  Returns:
+    A `Tensor` of the same shape as `logits` with the componentwise
+    logistic losses.
+  """
+  with ops.op_scope([logits, targets], name, "logistic_loss") as name:
+    logits = ops.convert_to_tensor(logits, name="logits")
+    targets = ops.convert_to_tensor(targets, name="targets")
+    # The logistic loss formula from above is
+    #   x - x * z + log(1 + exp(-x))
+    # For x < 0, a more numerically stable formula is
+    #   -x * z + log(1 + exp(x))
+    # To avoid branching, we use the combined version
+    #   max(x, 0) - x * z + log(1 + exp(-abs(x)))
+    return math_ops.add(nn_ops.relu(logits) - logits * targets,
+                        math_ops.log(1 + math_ops.exp(-math_ops.abs(logits))),
+                        name=name)
+
+
+def xw_plus_b(x, weights, biases, name=None):
+  """Computes matmul(x, weights) + biases.
+
+  Args:
+    x: a 2D tensor.  Dimensions typically: batch, in_units
+    weights: a 2D tensor.  Dimensions typically: in_units, out_units
+    biases: a 1D tensor.  Dimensions: out_units
+    name: A name for the operation (optional).  If not specified
+      "wx_plus_b" is used.
+
+  Returns:
+    A 2-D Tensor computing matmul(x, weights) + biases.
+    Dimensions typically: batch, out_units.
+  """
+  with ops.op_scope([x, weights, biases], name, "xw_plus_b") as name:
+    x = ops.convert_to_tensor(x, name="x")
+    weights = ops.convert_to_tensor(weights, name="weights")
+    biases = ops.convert_to_tensor(biases, name="biases")
+    mm = math_ops.matmul(x, weights)
+    return nn_ops.bias_add(mm, biases, name=name)
+
+
+def relu_layer(x, weights, biases, name=None):
+  """Computes Relu(x * weight + biases).
+
+  Args:
+    x: a 2D tensor.  Dimensions typically: batch, in_units
+    weights: a 2D tensor.  Dimensions typically: in_units, out_units
+    biases: a 1D tensor.  Dimensions: out_units
+    name: A name for the operation (optional).  If not specified
+      "nn_relu_layer" is used.
+
+  Returns:
+    A 2-D Tensor computing relu(matmul(x, weights) + biases).
+    Dimensions typically: batch, out_units.
+  """
+  with ops.op_scope([x, weights, biases], name, "relu_layer") as name:
+    x = ops.convert_to_tensor(x, name="x")
+    weights = ops.convert_to_tensor(weights, name="weights")
+    biases = ops.convert_to_tensor(biases, name="biases")
+    xw_plus_b = nn_ops.bias_add(math_ops.matmul(x, weights), biases)
+    return nn_ops.relu(xw_plus_b, name=name)
+
+
+def l2_normalize(x, dim, epsilon=1e-12, name=None):
+  """Normalizes along dimension `dim` using an L2 norm.
+
+  For a 1-D tensor with `dim = 0`, computes
+
+      output = x / sqrt(max(sum(x**2), epsilon))
+
+  For `x` with more dimensions, independently normalizes each 1-D slice along
+  dimension `dim`.
+
+  Args:
+    x: A `Tensor`.
+    dim: Dimension along which to normalize.
+    epsilon: A lower bound value for the norm. Will use `sqrt(epsilon)` as the
+      divisor if `norm < sqrt(epsilon)`.
+    name: A name for this operation (optional).
+
+  Returns:
+    A `Tensor` with the same shape as `x`.
+  """
+  with ops.op_scope([x], name, "l2_normalize") as name:
+    x = ops.convert_to_tensor(x, name="x")
+    square_sum = math_ops.reduce_sum(math_ops.square(x), [dim], keep_dims=True)
+    x_inv_norm = math_ops.rsqrt(math_ops.maximum(square_sum, epsilon))
+    return math_ops.mul(x, x_inv_norm, name=name)
+
+
+def zero_fraction(value, name=None):
+  """Returns the fraction of zeros in `value`.
+
+  If `value` is empty, the result is `nan`.
+
+  This is useful in summaries to measure and report sparsity.  For example,
+
+      z = tf.Relu(...)
+      summ = tf.scalar_summary('sparsity', tf.zero_fraction(z))
+
+  Args:
+    value: A tensor of numeric type.
+    name: A name for the operation (optional).
+
+  Returns:
+    The fraction of zeros in `value`, with type `float32`.
+  """
+  with ops.op_scope([value], name, "zero_fraction"):
+    value = ops.convert_to_tensor(value, name="value")
+    zero = constant_op.constant(0, dtype=value.dtype, name="zero")
+    return math_ops.reduce_mean(math_ops.cast(math_ops.equal(value, zero),
+                                              types.float32))
+
+
+def dropout(x, keep_prob, noise_shape=None, seed=None, name=None):
+  """Computes dropout.
+
+  With probability `keep_prob`, outputs the input element scaled up by
+  `1 / keep_prob`, otherwise outputs `0`.  The scaling is so that the expected
+  sum is unchanged.
+
+  By default, each element is kept or dropped independently.  If `noise_shape`
+  is specified, it must be
+  [broadcastable](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+  to the shape of `x`, and only dimensions with `noise_shape[i] == x.shape[i]`
+  will make independent decisions.  For example, if `x.shape = [b, x, y, c]` and
+  `noise_shape = [b, 1, 1, c]`, each batch and channel component will be
+  kept independently and each row and column will be kept or not kept together.
+
+  Args:
+    x: A tensor.
+    keep_prob: Float probability that each element is kept.
+    noise_shape: Shape for randomly generated keep/drop flags.
+    seed: A Python integer. Used to create a random seed.
+      See [`set_random_seed`](constant_op.md#set_random_seed) for behavior.
+    name: A name for this operation (optional).
+
+  Returns:
+    A Tensor of the same shape of `x`.
+
+  Raises:
+    ValueError: If `keep_prob` is not in `(0, 1]`.
+  """
+  if not (0 < keep_prob <= 1):
+    raise ValueError("Expected keep_prob in (0, 1], got %g" % keep_prob)
+  with ops.op_scope([x], name, "dropout") as name:
+    x = ops.convert_to_tensor(x, name="x")
+    noise_shape = noise_shape or array_ops.shape(x)
+    # uniform [keep_prob, 1.0 + keep_prob)
+    random_tensor = keep_prob
+    random_tensor += random_ops.random_uniform(
+        noise_shape, seed=seed, dtype=x.dtype)
+    # 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob)
+    binary_tensor = math_ops.floor(random_tensor)
+    return x * (1.0 / keep_prob) * binary_tensor
+
+
+def depthwise_conv2d(input, filter, strides, padding, name=None):
+  """Depthwise 2-D convolution.
+
+  Given an input tensor of shape `[batch, in_height, in_width, in_channels]`
+  and a filter tensor of shape
+  `[filter_height, filter_width, in_channels, channel_multiplier]`
+  containing `in_channels` convolutional filters of depth 1, `depthwise_conv2d`
+  applies a different filter to each input channel (expanding from 1 channel
+  to `channel_multiplier` channels for each), then concatenates the results
+  together.  The output has `in_channels * channel_multiplier` channels.
+
+  In detail,
+
+      output[b, i, j, k * channel_multiplier + q] =
+          sum_{di, dj} input[b, strides[1] * i + di, strides[2] * j + dj, k] *
+                       filter[di, dj, k, q]
+
+  Must have `strides[0] = strides[3] = 1`.  For the most common case of the
+  same horizontal and vertical strides, `strides = [1, stride, stride, 1]`.
+
+  Args:
+    input: 4-D with shape `[batch, in_height, in_width, in_channels]`.
+    filter: 4-D with shape
+      `[filter_height, filter_width, in_channels, channel_multiplier]`.
+    strides: 1-D of size 4.  The stride of the sliding window for each
+      dimension of `input`.
+    padding: A string, either `'VALID'` or `'SAME'`.  The padding algorithm.
+    name: A name for this operation (optional).
+
+  Returns:
+    A 4-D `Tensor` of shape
+    `[batch, out_height, out_width, in_channels * channel_multiplier].`
+  """
+  with ops.op_scope([input, filter], name, "depthwise") as name:
+    input = ops.convert_to_tensor(input, name="tensor_in")
+    filter = ops.convert_to_tensor(filter, name="filter_in")
+    # A shape is required to statically compute the number of separable filters.
+    if filter.get_shape().ndims is not None:
+      assert len(filter.get_shape()) == 4
+      in_channels = filter.get_shape()[2]
+      # Sanity checks, if shape information is available for the inputs.
+      if input.get_shape().ndims is not None:
+        assert len(input.get_shape()) == 4
+        assert input.get_shape()[3] == in_channels, (
+            "Mismatched input depth %d and number of depthwise filters %d." % (
+                input.get_shape()[3].value, in_channels))
+    else:
+      assert input.get_shape().ndims is not None, (
+          "Either tensor must provide static shape information.")
+      assert input.get_shape().ndims == 4
+      in_channels = input.get_shape()[3]
+
+    if in_channels == 1:
+      return nn_ops.conv2d(input, filter, strides, padding, name=name)
+    else:
+      # Create one separate convolution per channel.
+      convs = []
+      for channel in xrange(in_channels):
+        with ops.name_scope("depth%d" % channel) as channel_scope:
+          t_in = array_ops.slice(input, [0, 0, 0, channel], [-1, -1, -1, 1],
+                                 name="slice_inputs")
+          f_in = array_ops.slice(filter, [0, 0, channel, 0], [-1, -1, 1, -1],
+                                 name="slice_params")
+          convs.append(nn_ops.conv2d(t_in, f_in,
+                                     strides, padding, name=channel_scope))
+      # Concatenate the per-channel convolutions along the channel dimension.
+      return array_ops.concat(3, convs, name=name)
+
+
+def separable_conv2d(input, depthwise_filter, pointwise_filter, strides,
+                     padding,
+                     name=None):
+  """2-D convolution with separable filters.
+
+  Performs a depthwise convolution that acts separately on channels followed by
+  a pointwise convolution that mixes channels.  Note that this is separability
+  between dimensions `[1, 2]` and `3`, not spatial separability between
+  dimensions `1` and `2`.
+
+  In detail,
+
+      output[b, i, j, k] = sum_{di, dj, q, r]
+          input[b, strides[1] * i + di, strides[2] * j + dj, q] *
+          depthwise_filter[di, dj, q, r] *
+          pointwise_filter[0, 0, q * channel_multiplier + r, k]
+
+  `strides` controls the strides for the depthwise convolution only, since
+  the pointwise convolution has implicit strides of `[1, 1, 1, 1]`.  Must have
+  `strides[0] = strides[3] = 1`.  For the most common case of the same
+  horizontal and vertical strides, `strides = [1, stride, stride, 1]`.
+
+  Args:
+    input: 4-D `Tensor` with shape `[batch, in_height, in_width, in_channels]`.
+    depthwise_filter: 4-D `Tensor` with shape
+      `[filter_height, filter_width, in_channels, channel_multiplier]`.
+      Contains `in_channels` convolutional filters of depth 1.
+    pointwise_filter: 4-D `Tensor` with shape
+      `[1, 1, channel_multiplier * in_channels, out_channels]`.  Pointwise
+      filter to mix channels after `depthwise_filter` has convolved spatially.
+    strides: 1-D of size 4.  The strides for the depthwise convolution for
+      each dimension of `input`.
+    padding: A string, either `'VALID'` or `'SAME'`.  The padding algorithm.
+    name: A name for this operation (optional).
+
+  Returns:
+    A 4-D `Tensor` of shape `[batch, out_height, out_width, out_channels]`.
+  """
+  with ops.op_scope([input, depthwise_filter, pointwise_filter],
+                   name, "separable_conv2d") as name:
+    input = ops.convert_to_tensor(input, name="tensor_in")
+    depthwise_filter = ops.convert_to_tensor(depthwise_filter,
+                                             name="depthwise_filter")
+    pointwise_filter = ops.convert_to_tensor(pointwise_filter,
+                                             name="pointwise_filter")
+
+    if pointwise_filter.get_shape().ndims is not None:
+      assert len(pointwise_filter.get_shape()) == 4
+      assert pointwise_filter.get_shape()[0] == 1
+      assert pointwise_filter.get_shape()[1] == 1
+      if depthwise_filter.get_shape().ndims and input.get_shape().ndims:
+        channel_multiplier = depthwise_filter.get_shape()[3]
+        in_channels = input.get_shape()[3]
+        out_channels = pointwise_filter.get_shape()[3]
+        # This would mean the separable convolutions is over-parametrized.
+        assert channel_multiplier * in_channels < out_channels
+    # The layout of the ops in the graph are expected to be as follows:
+    # separable_conv2d  // Conv2D op corresponding to the pointwise conv.
+    # separable_conv2d/depthwise  // Concat op for the deptwise outputs.
+    # separable_conv2d/depthwise/depth0  // Conv2D op for depth 0
+    # separable_conv2d/depthwise/depth1  // Conv2D op for depth 1
+    # separable_conv2d/depthwise/depth2  // Conv2D op for depth 2
+    depthwise = depthwise_conv2d(input, depthwise_filter, strides,
+                                 padding, name="depthwise")
+    return nn_ops.conv2d(depthwise, pointwise_filter, [1, 1, 1, 1],
+                         padding="VALID", name=name)
+
+
+def moments(x, axes, name=None):
+  """Calculate the mean and variance of `x`.
+
+  The mean and variance are calculated by aggregating the contents of `x`
+  across `axes`.  If `x` is 1-D and `axes = [0]` this is just the mean
+  and variance of a vector.
+
+  For so-called "global normalization" needed for convolutional filters pass
+  `axes=[0, 1, 2]` (batch, height, width).  For batch normalization pass
+  `axes=[0]` (batch).
+
+  Args:
+    x: A `Tensor`.
+    axes: array of ints.  Axes along which to compute mean and
+      variance.
+    name: Name used to scope the operations that compute the moments.
+
+  Returns:
+    Two `Tensors`: `mean` and `variance`.
+  """
+  with ops.op_scope([x, axes], name, "moments"):
+    x = ops.convert_to_tensor(x, name="x")
+    divisor = 1.0
+    for d in xrange(len(x.get_shape())):
+      if d in axes:
+        divisor *= x.get_shape()[d].value
+    divisor = constant_op.constant(1.0 / divisor, x.dtype, name="divisor")
+    axes = constant_op.constant(axes, name="axes")
+    # Note: We do not use Mean here because it is very slow on GPU.
+    # Note 2: The expression below is potentially more stable.
+    # It is however a bit slower and stability doesn't appear to be an issue.
+    # mean = math_ops.reduce_sum(math_ops.mul(x, divisor), axes, name="mean")
+    # var = math_ops.reduce_sum(math_ops.mul(math_ops.square(x - mean),
+    #                                        divisor), axes,
+    #                    name="variance")
+    mean = math_ops.mul(math_ops.reduce_sum(x, axes), divisor, name="mean")
+    var = math_ops.mul(math_ops.reduce_sum(math_ops.square(x - mean), axes),
+                       divisor, name="variance")
+    return mean, var
+
+
+def _sum_rows(x):
+  """Returns a vector summing up each row of the matrix x."""
+  # _sum_rows(x) is equivalent to math_ops.reduce_sum(x, 1) when x is
+  # a matrix.  The gradient of _sum_rows(x) is more efficient than
+  # reduce_sum(x, 1)'s gradient in today's implementation. Therefore,
+  # we use _sum_rows(x) in the nce_loss() computation since the loss
+  # is mostly used for training.
+  cols = array_ops.shape(x)[1]
+  ones_shape = array_ops.pack([cols, 1])
+  ones = array_ops.ones(ones_shape, x.dtype)
+  return array_ops.reshape(math_ops.matmul(x, ones), [-1])
+
+
+def _compute_sampled_logits(weights, biases, inputs, labels, num_sampled,
+                            num_classes, num_true=1,
+                            sampled_values=None,
+                            subtract_log_q=True,
+                            remove_accidental_hits=False,
+                            name=None):
+  """Helper function for nce_loss and sampled_softmax_loss functions.
+
+  Computes sampled output training logits and labels suitable for implementing
+  e.g. noise-contrastive estimation (see nce_loss) or sampled softmax (see
+  sampled_softmax_loss).
+
+  Note: In the case where num_true > 1, we assign to each target class
+  the target probability 1 / num_true so that the target probabilities
+  sum to 1 per-example.
+
+  Args:
+    weights: tensor of label embeddings with shape = [num_classes, dim]
+    biases: tensor of num_classes label biases
+    inputs: tensor with shape = [batch_size, dim] corresponding to forward
+        activations of the input network
+    labels: int tensor with shape [batch_size, num_true]
+    num_sampled: number of label classes to sample per batch
+    num_classes: number of possible label classes in the data (e.g. vocab size)
+    num_true: number of target classes per example (default: 1)
+    sampled_values: a tuple of (sampled_candidates, true_expected_count,
+        sampled_expected_count) returned by a *CandidateSampler function to use
+        (if None, we default to LogUniformCandidateSampler)
+    subtract_log_q: subtract the log expected count of the labels in the sample
+        to get the logits of the true labels (default: True)
+        Turn off for Negative Sampling.
+    remove_accidental_hits: whether to remove "accidental hits" where a sampled
+        label equals the true labels (bool, default: False)
+    name: name for this op
+
+  Returns:
+    out_logits, out_labels: tensors with shape [batch_size, num_true +
+        num_sampled] for passing to either SigmoidCrossEntropyWithLogits (NCE)
+        or SoftmaxCrossEntropyWithLogits (sampled softmax).
+
+  """
+
+  with ops.op_scope(
+      [weights, biases, inputs, labels], name, "compute_sampled_logits"):
+    if labels.dtype != types.int64:
+      labels = math_ops.cast(labels, types.int64)
+    labels_flat = array_ops.reshape(labels, [-1])
+
+    # Sample the negative labels.
+    #   sampled shape: num_sampled vector
+    #   true_expected_count shape = [batch_size, 1]
+    #   sampled_expected_count shape = num_sampled vector
+    if sampled_values is None:
+      sampled_values = candidate_sampling_ops.log_uniform_candidate_sampler(
+          true_classes=labels,
+          num_true=num_true,
+          num_sampled=num_sampled,
+          unique=True,
+          range_max=num_classes)
+    # NOTE: pylint cannot tell that 'sampled_values' is a sequence
+    # pylint: disable=unpacking-non-sequence
+    sampled, true_expected_count, sampled_expected_count = sampled_values
+    # pylint: enable=unpacking-non-sequence
+
+    # weights shape is [num_classes, dim]
+    # labels_flat is a [batch_size * num_true] vector
+    # true_w shape is [batch_size * num_true, dim]
+    # true_b is a [batch_size * num_true] vector
+    true_w = embedding_ops.embedding_lookup(weights, labels_flat)
+    true_b = embedding_ops.embedding_lookup(biases, labels_flat)
+
+    # inputs shape is [batch_size, dim]
+    # true_w shape is [batch_size * num_true, dim]
+    # row_wise_dots is [batch_size, num_true, dim]
+    dim = array_ops.shape(true_w)[1:2]
+    new_true_w_shape = array_ops.concat(0, [[-1, num_true], dim])
+    row_wise_dots = math_ops.mul(
+        array_ops.expand_dims(inputs, 1),
+        array_ops.reshape(true_w, new_true_w_shape))
+    # We want the row-wise dot plus biases which yields a
+    # [batch_size, num_true] tensor of true_logits.
+    dots_as_matrix = array_ops.reshape(row_wise_dots,
+                                       array_ops.concat(0, [[-1], dim]))
+    true_logits = array_ops.reshape(_sum_rows(dots_as_matrix), [-1, num_true])
+    true_b = array_ops.reshape(true_b, [-1, num_true])
+    true_logits += true_b
+
+    # Lookup weights and biases for sampled labels.
+    #   sampled is a num_sampled int vector
+    #   sampled_w shape is [num_sampled, dim]
+    #   sampled_b is a num_sampled float vector
+    sampled_w = embedding_ops.embedding_lookup(weights, sampled)
+    sampled_b = embedding_ops.embedding_lookup(biases, sampled)
+
+    # inputs has shape [batch_size, dim]
+    # sampled_w has shape [num_sampled, dim]
+    # sampled_b has shape [num_sampled]
+    # Apply X*W'+B, which yields [batch_size, num_sampled]
+    sampled_logits = math_ops.matmul(inputs,
+                                     sampled_w,
+                                     transpose_b=True) + sampled_b
+
+    if remove_accidental_hits:
+      acc_hits = candidate_sampling_ops.compute_accidental_hits(
+          labels, sampled, num_true=num_true)
+      acc_indices, acc_ids, acc_weights = acc_hits
+
+      # This is how SparseToDense expects the indices.
+      acc_indices_2d = array_ops.reshape(acc_indices, [-1, 1])
+      acc_ids_2d_int32 = array_ops.reshape(math_ops.cast(
+          acc_ids, types.int32), [-1, 1])
+      sparse_indices = array_ops.concat(
+          1, [acc_indices_2d, acc_ids_2d_int32], "sparse_indices")
+      # Create sampled_logits_shape = [batch_size, num_sampled]
+      sampled_logits_shape = array_ops.concat(
+          0,
+          [array_ops.shape(labels)[:1], array_ops.expand_dims(num_sampled, 0)])
+      sampled_logits += sparse_ops.sparse_to_dense(
+          sparse_indices, sampled_logits_shape, acc_weights, 0.0)
+
+    if subtract_log_q:
+      # Subtract log of Q(l), prior probability that l appears in sampled.
+      true_logits -= math_ops.log(true_expected_count)
+      sampled_logits -= math_ops.log(sampled_expected_count)
+
+    # Construct output logits and labels. The true labels/logits start at col 0.
+    out_logits = array_ops.concat(1, [true_logits, sampled_logits])
+    # true_logits is a float tensor, ones_like(true_logits) is a float tensor
+    # of ones. We then divide by num_true to ensure the per-example labels sum
+    # to 1.0, i.e. form a proper probability distribution.
+    out_labels = array_ops.concat(
+        1, [array_ops.ones_like(true_logits) / num_true,
+            array_ops.zeros_like(sampled_logits)])
+
+  return out_logits, out_labels
+
+
+def nce_loss(weights, biases, inputs, labels, num_sampled, num_classes,
+             num_true=1,
+             sampled_values=None,
+             remove_accidental_hits=False,
+             name="nce_loss"):
+  """Computes and returns the noise-contrastive estimation training loss.
+
+  See [Noise-contrastive estimation: A new estimation principle for
+  unnormalized statistical models]
+  (http://www.jmlr.org/proceedings/papers/v9/gutmann10a/gutmann10a.pdf).
+  Also see our [Candidate Sampling Algorithms Reference]
+  (http://www.tensorflow.org/extras/candidate_sampling.pdf)
+
+  Note: In the case where num_true > 1, we assign to each target class
+  the target probability 1 / num_true so that the target probabilities
+  sum to 1 per-example.
+
+  Note: It would be useful to allow a variable number of target classes per
+  example.  We hope to provide this functionality in a future release.
+  For now, if you have a variable number of target classes, you can pad them
+  out to a constant number by either repeating them or by padding
+  with an otherwise unused class.
+
+  Args:
+    weights: A `Tensor` of shape [num_classes, dim].  The class embeddings.
+    biases: A `Tensor` of shape [num_classes].  The class biases.
+    inputs: A `Tensor` of shape [batch_size, dim].  The forward
+        activations of the input network.
+    labels: A `Tensor` of type `int64` and shape `[batch_size,
+      num_true]`. The target classes.
+    num_sampled: An `int`.  The number of classes to randomly sample per batch.
+    num_classes: An `int`. The number of possible classes.
+    num_true: An `int`.  The number of target classes per training example.
+    sampled_values: a tuple of `(sampled_candidates, true_expected_count,
+        sampled_expected_count)` returned by a *_candidate_sampler function.
+        (if None, we default to LogUniformCandidateSampler)
+    remove_accidental_hits:  A `bool`.  Whether to remove "accidental hits"
+        where a sampled class equals one of the target classes.  If set to
+        `True`, this is a "Sampled Logistic" loss instead of NCE, and we are
+        learning to generate log-odds instead of log probabilities.  See
+        our [Candidate Sampling Algorithms Reference]
+        (http://www.tensorflow.org/extras/candidate_sampling.pdf).
+        Default is False.
+    name: A name for the operation (optional).
+
+  Returns:
+    A batch_size 1-D tensor of per-example NCE losses.
+  """
+  logits, labels = _compute_sampled_logits(
+      weights, biases, inputs, labels, num_sampled, num_classes,
+      num_true=num_true,
+      sampled_values=sampled_values,
+      subtract_log_q=True,
+      remove_accidental_hits=remove_accidental_hits,
+      name=name)
+  sampled_losses = sigmoid_cross_entropy_with_logits(logits,
+                                                     labels,
+                                                     name="sampled_losses")
+  # sampled_losses is batch_size x {true_loss, sampled_losses...}
+  # We sum out true and sampled losses.
+  return _sum_rows(sampled_losses)
+
+
+def sampled_softmax_loss(weights, biases, inputs, labels, num_sampled,
+                         num_classes, num_true=1,
+                         sampled_values=None,
+                         remove_accidental_hits=True,
+                         name="sampled_softmax_loss"):
+  """Computes and returns the sampled softmax training loss.
+
+  This is a faster way to train a softmax classifier over a huge number of
+  classes.
+
+  This operation is for training only.  It is generally an underestimate of
+  the full softmax loss.
+
+  At inference time, you can compute full softmax probabilities with the
+  expression `tf.nn.softmax(tf.matmul(inputs, weights) + biases)`.
+
+  See our [Candidate Sampling Algorithms Reference]
+  (http://www.tensorflow.org/extras/candidate_sampling.pdf)
+
+  Also see Section 3 of http://arxiv.org/abs/1412.2007 for the math.
+
+  Args:
+    weights: A `Tensor` of shape [num_classes, dim].  The class embeddings.
+    biases: A `Tensor` of shape [num_classes].  The class biases.
+    inputs: A `Tensor` of shape [batch_size, dim].  The forward
+        activations of the input network.
+    labels: A `Tensor` of type `int64` and shape `[batch_size,
+      num_true]`. The target classes.  Note that this format differs from
+      the `labels` argument of `nn.softmax_cross_entropy_with_logits`.
+    num_sampled: An `int`.  The number of classes to randomly sample per batch.
+    num_classes: An `int`. The number of possible classes.
+    num_true: An `int`.  The number of target classes per training example.
+    sampled_values: a tuple of `(sampled_candidates, true_expected_count,
+        sampled_expected_count)` returned by a *_candidate_sampler function.
+        (if None, we default to LogUniformCandidateSampler)
+    remove_accidental_hits:  A `bool`.  whether to remove "accidental hits"
+        where a sampled class equals one of the target classes.  Default is
+        True.
+    name: A name for the operation (optional).
+
+  Returns:
+    A batch_size 1-D tensor of per-example sampled softmax losses.
+
+  """
+  logits, labels = _compute_sampled_logits(
+      weights, biases, inputs, labels, num_sampled, num_classes,
+      num_true=num_true,
+      sampled_values=sampled_values,
+      subtract_log_q=True,
+      remove_accidental_hits=remove_accidental_hits,
+      name=name)
+  sampled_losses = nn_ops.softmax_cross_entropy_with_logits(logits, labels)
+  # sampled_losses is a batch_size vector.
+  return sampled_losses
diff --git a/tensorflow/python/ops/nn_grad.py b/tensorflow/python/ops/nn_grad.py
new file mode 100644
index 0000000000..0cf867d217
--- /dev/null
+++ b/tensorflow/python/ops/nn_grad.py
@@ -0,0 +1,229 @@
+"""Gradients for operators defined in nn_ops.py."""
+
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops import gen_nn_ops
+
+
+@ops.RegisterGradient("Conv2DBackpropInput")
+def _DeConv2DGrad(op, grad):
+  """The derivatives for deconvolution.
+
+  Args:
+    op: the Deconvolution op.
+    grad: the tensor representing the gradient w.r.t. the output
+
+  Returns:
+    the gradients w.r.t. the input and the filter
+  """
+  return [None,
+          nn_ops.conv2d_backprop_filter(grad,
+                                      array_ops.shape(op.inputs[1]),
+                                      op.inputs[2],
+                                      op.get_attr("strides"),
+                                      op.get_attr("padding")),
+          nn_ops.conv2d(grad,
+                        op.inputs[1],
+                        op.get_attr("strides"),
+                        op.get_attr("padding"))]
+
+
+@ops.RegisterGradient("Softmax")
+def _SoftmaxGrad(op, grad_softmax):
+  """The derivative of the softmax nonlinearity.
+
+  We assume that probs is of shape [batch_size * dim]
+  The formula for dsoftmax / dx = (diag(softmax) - softmax * softmax').
+  This matrix is diagonal minus a rank one matrix, so it is easy to implement
+  as follows:
+
+    grad_x = grad_softmax * softmax - sum(grad_softmax * softmax) * softmax
+
+  Args:
+     op: the Softmax op.
+     grad_softmax:  the tensor representing the gradient w.r.t. the
+       softmax output.
+
+  Returns:
+     gradient w.r.t the input to the softmax
+
+  """
+  # TODO(ilyasu): assert that the tensor has two dimensions at
+  # graph-construction time?  Alternatively: do different things
+  # depending on the dimensionality of the input tensors.
+  softmax = op.outputs[0]
+  grad_x = ((grad_softmax -
+             array_ops.reshape(math_ops.reduce_sum(grad_softmax * softmax, [1]),
+                               [-1, 1]))
+            * softmax)
+  return grad_x
+
+
+@ops.RegisterGradient("BiasAdd")
+def _BiasAddGrad(unused_bias_op, received_grad):
+  """Return the gradients for the 2 inputs of bias_op.
+
+  The first input of unused_bias_op is the tensor t, and its gradient is
+  just the gradient the unused_bias_op received.
+
+  The second input of unused_bias_op is the bias vector which has one fewer
+  dimension than "received_grad" (the batch dimension.)  Its gradient is the
+  received gradient Summed on the batch dimension, which is the first dimension.
+
+  Args:
+    unused_bias_op: The BiasOp for which we need to generate gradients.
+    received_grad: Tensor.  The gradients passed to the BiasOp.
+
+  Returns:
+    Two tensors, the first one for the "tensor" input of the BiasOp,
+    the second one for the "bias" input of the BiasOp.
+  """
+  reduction_dim_tensor = math_ops.range(0, array_ops.rank(received_grad) - 1)
+  return (received_grad, math_ops.reduce_sum(received_grad, reduction_dim_tensor))
+
+
+def _VerifyTensor(t, name, msg):
+  """Assert that the tensor does not contain any NaN's.
+
+  Args:
+    t: Tensor
+    name: name
+    msg: message to log
+  Returns:
+    Tensor, but verified
+  """
+  with ops.name_scope(name):
+    with ops.device(t.device or ops.get_default_graph().get_default_device()):
+      verify_input = array_ops.check_numerics(t, message=msg)
+      out = control_flow_ops.with_dependencies([verify_input], t)
+  return out
+
+
+@ops.RegisterGradient("Relu")
+def _ReluGrad(op, grad):
+  t = _VerifyTensor(op.inputs[0], op.name, "ReluGrad input is not finite.")
+  return gen_nn_ops._relu_grad(grad, t)
+
+
+@ops.RegisterGradient("Relu6")
+def _Relu6Grad(op, grad):
+  return gen_nn_ops._relu6_grad(grad, op.inputs[0])
+
+
+@ops.RegisterGradient("Softplus")
+def _SoftplusGrad(op, grad):
+  return gen_nn_ops._softplus_grad(grad, op.inputs[0])
+
+
+@ops.RegisterGradient("ReluGrad")
+def _ReluGradGrad(op, grad):
+  x = op.inputs[1]
+  return (gen_nn_ops._relu_grad(grad, x),
+          array_ops.zeros(shape=array_ops.shape(x), dtype=x.dtype))
+
+
+def _BroadcastMul(vec, mat):
+  """Multiply after broadcasting vec to match dimensions of mat.
+
+  Args:
+    vec: A 1-D tensor of dimension [D0]
+    mat: A 2-D tensor of dimension [D0, D1]
+
+  Returns:
+    A tensor of dimension [D0, D1], the result of vec * mat
+  """
+  # Reshape vec to [D0, 1]
+  vec = array_ops.expand_dims(vec, -1)
+  return vec * mat
+
+
+@ops.RegisterGradient("SoftmaxCrossEntropyWithLogits")
+def _SoftmaxCrossEntropyWithLogitsGrad(op, grad_0, _):
+  # grad_0 is the backprop for cost, and we multiply it with the gradients
+  # (which is output[1])
+  # There is no gradient for the labels
+  return _BroadcastMul(grad_0, op.outputs[1]), None
+
+
+@ops.RegisterGradient("Conv2D")
+def _Conv2DGrad(op, grad):
+  return [nn_ops.conv2d_backprop_input(array_ops.shape(op.inputs[0]),
+                                       op.inputs[1],
+                                       grad,
+                                       op.get_attr("strides"),
+                                       op.get_attr("padding")),
+          nn_ops.conv2d_backprop_filter(op.inputs[0],
+                                        array_ops.shape(op.inputs[1]),
+                                        grad,
+                                        op.get_attr("strides"),
+                                        op.get_attr("padding"))]
+
+
+@ops.RegisterGradient("LRN")
+def _LRNGrad(op, grad):
+  depth_radius = op.get_attr("depth_radius")
+  bias = op.get_attr("bias")
+  alpha = op.get_attr("alpha")
+  beta = op.get_attr("beta")
+  return [gen_nn_ops._lrn_grad(grad, op.inputs[0], op.outputs[0],
+                               depth_radius, bias, alpha, beta)]
+
+
+@ops.RegisterGradient("AvgPool")
+def _AvgPoolGrad(op, grad):
+  return gen_nn_ops._avg_pool_grad(array_ops.shape(op.inputs[0]), grad,
+                                   op.get_attr("ksize"),
+                                   op.get_attr("strides"),
+                                   op.get_attr("padding"))
+
+
+@ops.RegisterGradient("MaxPool")
+def _MaxPoolGrad(op, grad):
+  return gen_nn_ops._max_pool_grad(op.inputs[0], op.outputs[0], grad,
+                                   op.get_attr("ksize"),
+                                   op.get_attr("strides"),
+                                   padding=op.get_attr("padding"))
+
+
+@ops.RegisterGradient("BatchNormWithGlobalNormalization")
+def _BatchNormWithGlobalNormalizationGrad(op, grad):
+  """Return the gradients for the 5 inputs of BatchNormWithGlobalNormalization.
+
+  We do not backprop anything for the mean and var intentionally as they are
+  not being trained with backprop in the operation.
+
+  Args:
+    op: The BatchNormOp for which we need to generate gradients.
+    grad: Tensor.  The gradients passed to the BatchNormOp.
+
+  Returns:
+    dx: Backprop for input, which is (grad * (g * rsqrt(v + epsilon)))
+    dm: Backprop for mean, which is
+        sum_over_rest(grad * g) * (-1 / rsqrt(v + epsilon))
+    dv: Backprop for variance, which is
+        sum_over_rest(grad * g * (x - m)) * (-1/2) * (v + epsilon) ^ (-3/2)
+    db: Backprop for beta, which is grad reduced in all except the
+        last dimension.
+    dg: Backprop for gamma, which is (grad * ((x - m) * rsqrt(v + epsilon)))
+  """
+  dx, dm, dv, db, dg = gen_nn_ops._batch_norm_with_global_normalization_grad(
+      op.inputs[0], op.inputs[1], op.inputs[2], op.inputs[4], grad,
+      op.get_attr("variance_epsilon"), op.get_attr("scale_after_normalization"))
+  return dx, dm, dv, db, dg
+
+
+@ops.RegisterGradient("L2Loss")
+def _L2LossGrad(op, grad):
+  """Return the gradients for L2Loss.
+
+  Args:
+    op: The L2LossOp for which we need to generate gradients.
+    grad: Tensor containing a single number.
+
+  Returns:
+    The gradient, which is (x * grad).
+  """
+  return op.inputs[0] * grad
diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
new file mode 100644
index 0000000000..0ffe95de2b
--- /dev/null
+++ b/tensorflow/python/ops/nn_ops.py
@@ -0,0 +1,365 @@
+"""Wrappers for primitive Neural Net (NN) Operations."""
+
+import tensorflow.python.platform
+import numpy as np
+
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.framework import types
+from tensorflow.python.ops import common_shapes
+from tensorflow.python.ops import gen_nn_ops
+# pylint: disable=wildcard-import
+from tensorflow.python.ops.gen_nn_ops import *
+
+
+# Aliases for some automatically-generated names.
+local_response_normalization = gen_nn_ops.lrn
+
+
+def deconv2d(value, filter, output_shape, strides, padding="SAME",
+             name=None):
+  """The transpose of `conv2d`.
+
+  This used to be called "deconvolution", but it is actually the transpose
+  (gradient) of `conv2d`, not an actual deconvolution.
+
+  Args:
+    value: A 4-D `Tensor` of type `float` and shape
+      `[batch, height, width, in_channels]`.
+    filter: A 4-D `Tensor` with the same type as `value` and shape
+      `[height, width, output_channels, in_channels]`.  `filter`'s
+      `in_channels` dimension must match that of `value`.
+    output_shape: A 1-D `Tensor` representing the output shape of the
+      deconvolution op.
+    strides: A list of ints. The stride of the sliding window for each
+      dimension of the input tensor.
+    padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm.
+    name: Optional name for the returned tensor.
+
+  Returns:
+    A `Tensor` with the same type as `value`.
+
+  Raises:
+    ValueError: If input/output depth does not match `filter`'s shape, or if
+      padding is other than `'VALID'` or `'SAME'`.
+  """
+  with ops.op_scope([value, filter, output_shape], name, "DeConv2D") as name:
+    value = ops.convert_to_tensor(value, name="value")
+    filter = ops.convert_to_tensor(filter, name="filter")
+    if not value.get_shape()[3].is_compatible_with(filter.get_shape()[3]):
+      raise ValueError(
+          "input channels does not match filter's input channels, "
+          "{} != {}".format(value.get_shape()[3], filter.get_shape()[3]))
+
+    output_shape_ = ops.convert_to_tensor(output_shape, name="output_shape")
+    if not output_shape_.get_shape().is_compatible_with(tensor_shape.vector(4)):
+      raise ValueError("output_shape must have shape (4,), got {}"
+                       .format(output_shape_.get_shape()))
+
+    if isinstance(output_shape, (list, np.ndarray)):
+      # output_shape's shape should be == [4] if reached this point.
+      if not filter.get_shape()[2].is_compatible_with(output_shape[3]):
+        raise ValueError(
+            "output_shape does not match filter's output channels, "
+            "{} != {}".format(output_shape[3], filter.get_shape()[2]))
+
+    if padding != "VALID" and padding != "SAME":
+      raise ValueError("padding must be either VALID or SAME:"
+                       " {}".format(padding))
+
+    return gen_nn_ops.conv2d_backprop_input(input_sizes=output_shape_,
+                                            filter=filter,
+                                            out_backprop=value,
+                                            strides=strides,
+                                            padding=padding,
+                                            name=name)
+
+# pylint: disable=protected-access
+def bias_add(value, bias, name=None):
+  """Adds `bias` to `value`.
+
+  This is (mostly) a special case of `tf.add` where `bias` is restricted to 1-D.
+  Broadcasting is supported, so `value` may have any number of dimensions.
+  Unlike `tf.add`, the type of `bias` is allowed to differ from `value` in the
+  case where both types are quantized.
+
+  Args:
+    value: A `Tensor` with type `float`, `double`, `int64`, `int32`, `uint8`,
+      `int16`, `int8`, or `complex64`.
+    bias: A 1-D `Tensor` with size matching the last dimension of `value`.
+      Must be the same type as `value` unless `value` is a quantized type,
+      in which case a different quantized type may be used.
+    name: A name for the operation (optional).
+
+  Returns:
+    A `Tensor` with the same type as `value`.
+  """
+  with ops.op_scope([value, bias], name, "BiasAdd") as name:
+    value = ops.convert_to_tensor(value, name="input")
+    bias = ops.convert_to_tensor(bias, dtype=value.dtype, name="bias")
+    return gen_nn_ops._bias_add(value, bias, name=name)
+
+
+ops.RegisterShape("BiasAdd")(common_shapes.bias_add_shape)
+
+
+
+def relu6(features, name=None):
+  """Computes Rectified Linear 6: `min(max(features, 0), 6)`.
+
+  Args:
+    features: A `Tensor` with type `float`, `double`, `int32`, `int64`, `uint8`,
+      `int16`, or `int8`.
+    name: A name for the operation (optional).
+
+  Returns:
+    A `Tensor` with the same type as `features`.
+  """
+  with ops.op_scope([features], name, "Relu6") as name:
+    features = ops.convert_to_tensor(features, name="features")
+    return gen_nn_ops._relu6(features, name=name)
+
+
+def softmax_cross_entropy_with_logits(logits, labels, name=None):
+  """Computes softmax cross entropy between `logits` and `labels`.
+
+  Measures the probability error in discrete classification tasks in which the
+  classes are mutually exclusive (each entry is in exactly one class).  For
+  example, each CIFAR-10 image is labeled with one and only one label: an image
+  can be a dog or a truck, but not both.
+
+  **WARNING:** This op expects unscaled logits, since it performs a `softmax`
+  on `logits` internally for efficiency.  Do not call this op with the
+  output of `softmax`, as it will produce incorrect results.
+
+  `logits` and `labels` must have the same shape `[batch_size, num_classes]`
+  and the same dtype (either `float32` or `float64`).
+
+  Args:
+    logits: Unscaled log probabilities.
+    labels: Each row `labels[i]` must be a valid probability distribution.
+    name: A name for the operation (optional).
+
+  Returns:
+    A 1-D `Tensor` of length `batch_size` of the same type as `logits` with the
+    softmax cross entropy loss.
+  """
+  # The second output tensor contains the gradients.  We use it in
+  # _CrossEntropyGrad() in nn_grad but not here.
+  cost, unused_backprop = gen_nn_ops._softmax_cross_entropy_with_logits(
+      logits, labels, name=name)
+  return cost
+
+
+@ops.RegisterShape("SoftmaxCrossEntropyWithLogits")
+def _SoftmaxCrossEntropyWithLogitsShape(op):
+  """Shape function for SoftmaxCrossEntropyWithLogits op."""
+  logits_shape = op.inputs[0].get_shape()
+  labels_shape = op.inputs[1].get_shape()
+  input_shape = logits_shape.merge_with(labels_shape).with_rank(2)
+  batch_size = input_shape[0]
+  return [tensor_shape.vector(batch_size.value), input_shape]
+
+
+def avg_pool(value, ksize, strides, padding, name=None):
+  """Performs the average pooling on the input.
+
+  Each entry in `output` is the mean of the corresponding size `ksize`
+  window in `value`.
+
+  Args:
+    value: A 4-D `Tensor` of shape `[batch, height, width, channels]` and type
+      `float32`, `float64`, `qint8`, `quint8`, or `qint32`.
+    ksize: A list of ints that has length >= 4.
+      The size of the window for each dimension of the input tensor.
+    strides: A list of ints that has length >= 4.
+      The stride of the sliding window for each dimension of the
+      input tensor.
+    padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm.
+    name: Optional name for the operation.
+
+  Returns:
+    A `Tensor` with the same type as `value`.  The average pooled output tensor.
+  """
+  with ops.op_scope([value], name, "AvgPool") as name:
+    value = ops.convert_to_tensor(value, name="input")
+    return gen_nn_ops._avg_pool(value, ksize=ksize, strides=strides,
+                                padding=padding,
+                                name=name)
+
+
+def max_pool(value, ksize, strides, padding, name=None):
+  """Performs the max pooling on the input.
+
+  Args:
+    value: A 4-D `Tensor` with shape `[batch, height, width, channels]` and
+      type `float32`, `float64`, `qint8`, `quint8`, `qint32`.
+    ksize: A list of ints that has length >= 4.  The size of the window for
+      each dimension of the input tensor.
+    strides: A list of ints that has length >= 4.  The stride of the sliding
+      window for each dimension of the input tensor.
+    padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm.
+    name: Optional name for the operation.
+
+  Returns:
+    A `Tensor` with the same type as `value`.  The max pooled output tensor.
+  """
+  with ops.op_scope([value], name, "MaxPool") as name:
+    value = ops.convert_to_tensor(value, name="input")
+    return gen_nn_ops._max_pool(value, ksize=ksize, strides=strides,
+                                padding=padding,
+                                name=name)
+
+
+ops.RegisterShape("Relu")(common_shapes.unchanged_shape)
+ops.RegisterShape("Relu6")(common_shapes.unchanged_shape)
+ops.RegisterShape("Softplus")(common_shapes.unchanged_shape)
+
+
+@ops.RegisterShape("ReluGrad")
+@ops.RegisterShape("Relu6Grad")
+@ops.RegisterShape("SoftplusGrad")
+def _BinaryElementwiseShape(op):
+  """Returns same shape as both inputs to op.
+
+  Args:
+    op: Input operation.
+
+  Returns:
+    Shape of both inputs to `op`.
+  """
+  return [op.inputs[0].get_shape().merge_with(op.inputs[1].get_shape())]
+
+
+ops.RegisterShape("L2Loss")(common_shapes.scalar_shape)
+
+
+ops.RegisterShape("LRN")(common_shapes.unchanged_shape_with_rank(4))
+
+
+@ops.RegisterShape("LRNGrad")
+def _LRNGradShape(op):
+  """Shape function for LRNGrad op."""
+  in_grads_shape = op.inputs[0].get_shape().with_rank(4)
+  in_image_shape = op.inputs[1].get_shape().with_rank(4)
+  out_image_shape = op.inputs[2].get_shape().with_rank(4)
+  return [in_grads_shape.merge_with(in_image_shape).merge_with(out_image_shape)]
+
+
+ops.RegisterShape("Softmax")(
+    common_shapes.unchanged_shape_with_rank(2))
+
+
+@ops.RegisterShape("InTopK")
+def _InTopKShape(op):
+  """Shape function for InTopK op."""
+  predictions_shape = op.inputs[0].get_shape().with_rank(2)
+  targets_shape = op.inputs[1].get_shape().with_rank(1)
+  batch_size = predictions_shape[0].merge_with(targets_shape[0])
+  return [tensor_shape.vector(batch_size.value)]
+
+
+@ops.RegisterShape("TopK")
+def _TopKShape(op):
+  """Shape function for TopK op."""
+  input_shape = op.inputs[0].get_shape().with_rank(2)
+  k = op.get_attr("k")
+  num_rows = input_shape[0]
+  num_cols = input_shape[1]
+  if num_cols.value is not None and num_cols.value < k:
+    raise ValueError("input must have at least k (%d) columns" % k)
+  return [tensor_shape.TensorShape([num_rows, k]),
+          tensor_shape.TensorShape([num_rows, k])]
+
+
+@ops.RegisterShape("BatchNormWithGlobalNormalization")
+def _BatchNormShape(op):
+  """Shape function for BatchNormWithGlobalNormalization op."""
+  input_shape = op.inputs[0].get_shape().with_rank(4)
+  mean_shape = op.inputs[1].get_shape().with_rank(1)
+  var_shape = op.inputs[2].get_shape().with_rank(1)
+  beta_shape = op.inputs[3].get_shape().with_rank(1)
+  gamma_shape = op.inputs[4].get_shape().with_rank(1)
+  mean_shape[0].merge_with(input_shape[3])
+  var_shape[0].merge_with(input_shape[3])
+  beta_shape[0].merge_with(input_shape[3])
+  gamma_shape[0].merge_with(input_shape[3])
+  return [input_shape]
+
+
+@ops.RegisterShape("BatchNormWithGlobalNormalizationGrad")
+def _BatchNormGradShape(op):
+  """Shape function for BatchNormWithGlobalNormalizationGrad op."""
+  input_shape = op.inputs[0].get_shape().with_rank(4)
+  mean_shape = op.inputs[1].get_shape().with_rank(1)
+  var_shape = op.inputs[2].get_shape().with_rank(1)
+  beta_shape = op.inputs[3].get_shape().with_rank(1)
+  out_backprop_shape = op.inputs[4].get_shape().with_rank(4)
+  input_shape = input_shape.merge_with(out_backprop_shape)
+  vector_dim = input_shape[3]
+  vector_dim = vector_dim.merge_with(mean_shape[0])
+  vector_dim = vector_dim.merge_with(var_shape[0])
+  vector_dim = vector_dim.merge_with(beta_shape[0])
+  return [input_shape] + ([tensor_shape.vector(vector_dim)] * 4)
+
+
+ops.RegisterShape("Conv2D")(common_shapes.conv2d_shape)
+ops.RegisterShape("AvgPool")(common_shapes.avg_pool_shape)
+ops.RegisterShape("MaxPool")(common_shapes.max_pool_shape)
+
+
+@ops.RegisterShape("MaxPoolWithArgmax")
+def _MaxPoolWithArgMaxShape(op):
+  """Shape function for MaxPoolWithArgmax op."""
+  return common_shapes.max_pool_shape(op) * 2
+
+
+@ops.RegisterShape("AvgPoolGrad")
+def _AvgPoolGradShape(op):
+  """Shape function for the AvgPoolGrad op."""
+  orig_input_shape = tensor_util.ConstantValue(op.inputs[0])
+  if orig_input_shape is not None:
+    return [tensor_shape.TensorShape(orig_input_shape.tolist())]
+  else:
+    # NOTE(mrry): We could in principle work out the shape from the
+    # gradients and the attrs, but if we do not know orig_input_shape
+    # statically, then we are unlikely to know the shape of the
+    # gradients either.
+    return [tensor_shape.unknown_shape(ndims=4)]
+
+
+@ops.RegisterShape("Conv2DBackpropFilter")
+def _Conv2DBackpropFilterShape(op):
+  """Shape function for the Conv2DBackpropFilter op."""
+  filter_shape = tensor_util.ConstantValue(op.inputs[1])
+  if filter_shape is not None:
+    return [tensor_shape.TensorShape(filter_shape.tolist())]
+  else:
+    # NOTE(mrry): We could in principle work out the shape from the
+    # gradients and the attrs, but if we do not know filter_shape
+    # statically, then we are unlikely to know the shape of the
+    # gradients either.
+    return [tensor_shape.unknown_shape(ndims=4)]
+
+
+@ops.RegisterShape("Conv2DBackpropInput")
+def _Conv2DBackpropInputShape(op):
+  """Shape function for the Conv2DBackpropInput op."""
+  input_shape = tensor_util.ConstantValue(op.inputs[0])
+  if input_shape is not None:
+    return [tensor_shape.TensorShape(input_shape.tolist())]
+  else:
+    # NOTE(mrry): We could in principle work out the shape from the
+    # gradients and the attrs, but if we do not know input_shape
+    # statically, then we are unlikely to know the shape of the
+    # gradients either.
+    return [tensor_shape.unknown_shape(ndims=4)]
+
+
+@ops.RegisterShape("MaxPoolGrad")
+@ops.RegisterShape("MaxPoolGradWithArgmax")
+def _MaxPoolGradShape(op):
+  """Shape function for the MaxPoolGrad op."""
+  orig_input_shape = op.inputs[0].get_shape().with_rank(4)
+  return [orig_input_shape]
diff --git a/tensorflow/python/ops/nn_test.py b/tensorflow/python/ops/nn_test.py
new file mode 100644
index 0000000000..11ce56e359
--- /dev/null
+++ b/tensorflow/python/ops/nn_test.py
@@ -0,0 +1,882 @@
+"""Tests for tensorflow.ops.nn."""
+import math
+
+import tensorflow.python.platform
+
+import numpy as np
+
+from tensorflow.python.framework import test_util
+from tensorflow.python.framework import types
+from tensorflow.python.kernel_tests import gradient_checker as gc
+from tensorflow.python.ops import constant_op
+from tensorflow.python.ops import gen_nn_ops
+from tensorflow.python.ops import gradients
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn
+from tensorflow.python.ops import nn_grad
+from tensorflow.python.platform import googletest
+
+exp = math.exp
+log = math.log
+
+
+class SigmoidCrossEntropyWithLogitsTest(test_util.TensorFlowTestCase):
+
+  def _SigmoidCrossEntropyWithLogits(self, logits, targets):
+    assert len(logits) == len(targets)
+    pred = [1 / (1 + exp(-x)) for x in logits]
+    eps = 0.0001
+    pred = [min(max(p, eps), 1 - eps) for p in pred]
+    return [-z * log(y) - (1 - z) * log(1 - y) for y, z in zip(pred, targets)]
+
+  def _Inputs(self, x=None, y=None, dtype=types.float64, sizes=None):
+    x = [-100, -2, -2, 0, 2, 2, 2, 100] if x is None else x
+    y = [0, 0, 1, 0, 0, 1, 0.5, 1] if y is None else y
+    assert len(x) == len(y)
+    sizes = sizes if sizes else [len(x)]
+    logits = constant_op.constant(x, shape=sizes, dtype=dtype, name="logits")
+    targets = constant_op.constant(y, shape=sizes, dtype=dtype, name="targets")
+    losses = np.array(self._SigmoidCrossEntropyWithLogits(x, y)).reshape(*sizes)
+    return logits, targets, losses
+
+  def testConstructionNamed(self):
+    with self.test_session():
+      logits, targets, _ = self._Inputs()
+      loss = nn.sigmoid_cross_entropy_with_logits(logits, targets,
+                                                  name="mylogistic")
+    self.assertEqual("mylogistic", loss.op.name)
+
+  def testLogisticOutput(self):
+    for use_gpu in [True, False]:
+      with self.test_session(use_gpu=use_gpu):
+        logits, targets, losses = self._Inputs(dtype=types.float32)
+        loss = nn.sigmoid_cross_entropy_with_logits(logits, targets)
+        np_loss = np.array(losses).astype(np.float32)
+        tf_loss = loss.eval()
+      self.assertAllClose(np_loss, tf_loss, atol=0.001)
+
+  def testLogisticOutputMultiDim(self):
+    for use_gpu in [True, False]:
+      with self.test_session(use_gpu=use_gpu):
+        logits, targets, losses = self._Inputs(dtype=types.float32,
+                                               sizes=[2, 2, 2])
+        loss = nn.sigmoid_cross_entropy_with_logits(logits, targets)
+        np_loss = np.array(losses).astype(np.float32)
+        tf_loss = loss.eval()
+      self.assertAllClose(np_loss, tf_loss, atol=0.001)
+
+  def testGradient(self):
+    sizes = [4, 2]
+    with self.test_session():
+      logits, targets, _ = self._Inputs(sizes=sizes)
+      loss = nn.sigmoid_cross_entropy_with_logits(logits, targets)
+      err = gc.ComputeGradientError(logits, sizes, loss, sizes)
+    print "logistic loss gradient err = ", err
+    self.assertLess(err, 1e-7)
+
+
+class ZeroFractionTest(test_util.TensorFlowTestCase):
+
+  def _ZeroFraction(self, x):
+    assert x.shape
+    total_elements = float(np.prod(x.shape))
+    nonzeros = float(np.count_nonzero(x.flatten()))
+    return 1.0 - (nonzeros / total_elements)
+
+  def testZeroFraction(self):
+    x_shape = [5, 17]
+    x_np = np.random.randint(0, 2, size=x_shape).astype(np.float32)
+    y_np = self._ZeroFraction(x_np)
+    with self.test_session():
+      x_tf = constant_op.constant(x_np)
+      x_tf.set_shape(x_shape)
+      y_tf = nn.zero_fraction(x_tf)
+      y_tf_np = y_tf.eval()
+    eps = 1e-8
+    self.assertAllClose(y_tf_np, y_np, eps)
+
+  def testZeroFractionEmpty(self):
+    with self.test_session():
+      x = np.zeros(0)
+      y = nn.zero_fraction(x).eval()
+      self.assertTrue(np.isnan(y))
+
+
+class SoftmaxTest(test_util.TensorFlowTestCase):
+
+  def _softmax(self, x):
+    assert len(x.shape) == 2
+    m = x.max(1)[:, np.newaxis]
+    u = np.exp(x - m)
+    z = u.sum(1)[:, np.newaxis]
+    return u / z
+
+  def testSoftmax(self):
+    x_shape = [5, 10]
+    x_np = np.random.randn(*x_shape).astype(np.float32)
+    y_np = self._softmax(x_np)
+    with self.test_session():
+      x_tf = constant_op.constant(x_np)
+      y_tf = nn.softmax(x_tf)
+      y_tf_np = y_tf.eval()
+    eps = 1e-3
+    self.assertAllClose(y_tf_np, y_np, eps)
+
+  def testGradient(self):
+    x_shape = [5, 10]
+    x_np = np.random.randn(*x_shape).astype(np.float64)
+    with self.test_session():
+      x_tf = constant_op.constant(x_np)
+      y_tf = nn.softmax(x_tf)
+      err = gc.ComputeGradientError(x_tf, x_shape, y_tf, x_shape)
+    eps = 1e-8
+    self.assertLess(err, eps)
+
+
+class DeConv2DTest(test_util.TensorFlowTestCase):
+
+  def testDeConv2DSingleStride(self):
+    with self.test_session():
+      strides = [1, 1, 1, 1]
+
+      # Input, output: [batch, height, width, depth]
+      x_shape = [2, 6, 4, 3]
+      y_shape = [2, 6, 4, 2]
+
+      # Filter: [kernel_height, kernel_width, output_depth, input_depth]
+      f_shape = [3, 3, 2, 3]
+
+      x = constant_op.constant(1.0, shape=x_shape, name="x",
+                               dtype=types.float32)
+      f = constant_op.constant(1.0, shape=f_shape, name="filter",
+                               dtype=types.float32)
+      output = nn.deconv2d(x, f, y_shape, strides=strides, padding="SAME")
+      value = output.eval()
+
+      # We count the number of cells being added at the locations in the output.
+      # At the center, #cells=kernel_height * kernel_width
+      # At the corners, #cells=ceil(kernel_height/2) * ceil(kernel_width/2)
+      # At the borders, #cells=ceil(kernel_height/2)*kernel_width or
+      #                        kernel_height * ceil(kernel_width/2)
+
+      for n in xrange(x_shape[0]):
+        for k in xrange(f_shape[2]):
+          for w in xrange(y_shape[2]):
+            for h in xrange(y_shape[1]):
+              target = 4 * 3.0
+              h_in = h > 0 and h < y_shape[1] - 1
+              w_in = w > 0 and w < y_shape[2] - 1
+              if h_in and w_in:
+                target += 5 * 3.0
+              elif h_in or w_in:
+                target += 2 * 3.0
+              self.assertAllClose(target, value[n, h, w, k])
+
+  def testDeConv2DSame(self):
+    with self.test_session():
+      strides = [1, 2, 2, 1]
+
+      # Input, output: [batch, height, width, depth]
+      x_shape = [2, 6, 4, 3]
+      y_shape = [2, 12, 8, 2]
+
+      # Filter: [kernel_height, kernel_width, output_depth, input_depth]
+      f_shape = [3, 3, 2, 3]
+
+      x = constant_op.constant(1.0, shape=x_shape, name="x",
+                               dtype=types.float32)
+      f = constant_op.constant(1.0, shape=f_shape, name="filter",
+                               dtype=types.float32)
+      output = nn.deconv2d(x, f, y_shape, strides=strides, padding="SAME")
+      value = output.eval()
+
+      for n in xrange(x_shape[0]):
+        for k in xrange(f_shape[2]):
+          for w in xrange(y_shape[2]):
+            for h in xrange(y_shape[1]):
+              target = 3.0
+              # We add a case for locations divisible by the stride.
+              h_in = h % strides[1] == 0 and h > 0 and h < y_shape[1] - 1
+              w_in = w % strides[2] == 0 and w > 0 and w < y_shape[2] - 1
+              if h_in and w_in:
+                target += 9.0
+              elif h_in or w_in:
+                target += 3.0
+              self.assertAllClose(target, value[n, h, w, k])
+
+  def testDeConv2DValid(self):
+    with self.test_session():
+      strides = [1, 2, 2, 1]
+
+      # Input, output: [batch, height, width, depth]
+      x_shape = [2, 6, 4, 3]
+      y_shape = [2, 13, 9, 2]
+
+      # Filter: [kernel_height, kernel_width, output_depth, input_depth]
+      f_shape = [3, 3, 2, 3]
+
+      x = constant_op.constant(1.0, shape=x_shape, name="x",
+                               dtype=types.float32)
+      f = constant_op.constant(1.0, shape=f_shape, name="filter",
+                               dtype=types.float32)
+      output = nn.deconv2d(x, f, y_shape, strides=strides, padding="VALID")
+      value = output.eval()
+
+      cache_values = np.zeros(y_shape, dtype=np.float32)
+
+      # The amount of padding added
+      pad = 1
+
+      for n in xrange(x_shape[0]):
+        for k in xrange(f_shape[2]):
+          for w in xrange(pad, y_shape[2] - pad):
+            for h in xrange(pad, y_shape[1] - pad):
+              target = 3.0
+              # We add a case for locations divisible by the stride.
+              h_in = h % strides[
+                  1] == 0 and h > pad and h < y_shape[1] - 1 - pad
+              w_in = w % strides[
+                  2] == 0 and w > pad and w < y_shape[2] - 1 - pad
+              if h_in and w_in:
+                target += 9.0
+              elif h_in or w_in:
+                target += 3.0
+              cache_values[n, h, w, k] = target
+
+          # copy values in the border
+          cache_values[n, :, 0, k] = cache_values[n, :, 1, k]
+          cache_values[n, :, -1, k] = cache_values[n, :, -2, k]
+          cache_values[n, 0, :, k] = cache_values[n, 1, :, k]
+          cache_values[n, -1, :, k] = cache_values[n, -2, :, k]
+
+    self.assertAllClose(cache_values, value)
+
+  def testGradient(self):
+    x_shape = [2, 6, 4, 3]
+    f_shape = [3, 3, 2, 3]
+    y_shape = [2, 12, 8, 2]
+    strides = [1, 2, 2, 1]
+    np.random.seed(1)  # Make it reproducible.
+    x_val = np.random.random_sample(x_shape).astype(np.float64)
+    f_val = np.random.random_sample(f_shape).astype(np.float64)
+    with self.test_session():
+      x = constant_op.constant(x_val, name="x", dtype=types.float32)
+      f = constant_op.constant(f_val, name="f", dtype=types.float32)
+      output = nn.deconv2d(x, f, y_shape, strides=strides, padding="SAME")
+      err = gc.ComputeGradientError([x, f], [x_shape, f_shape], output, y_shape)
+    print "DeConv gradient err = %g " % err
+    err_tolerance = 0.0005
+    self.assertLess(err, err_tolerance)
+
+
+class L2LossTest(test_util.TensorFlowTestCase):
+
+  def testL2Loss(self):
+    with self.test_session():
+      x = constant_op.constant([1.0, 0.0, 3.0, 2.0], shape=[2, 2], name="x")
+      l2loss = nn.l2_loss(x)
+      value = l2loss.eval()
+    self.assertAllClose(7.0, value)
+
+  def testGradient(self):
+    x_shape = [20, 7, 3]
+    np.random.seed(1)  # Make it reproducible.
+    x_val = np.random.random_sample(x_shape).astype(np.float64)
+    with self.test_session():
+      x = constant_op.constant(x_val, name="x")
+      output = nn.l2_loss(x)
+      err = gc.ComputeGradientError(x, x_shape, output, [1])
+    print "L2Loss gradient err = %g " % err
+    err_tolerance = 1e-11
+    self.assertLess(err, err_tolerance)
+
+
+class L2NormalizeTest(test_util.TensorFlowTestCase):
+
+  def _l2Normalize(self, x, dim):
+    norm = np.apply_along_axis(np.linalg.norm, dim, x)
+    return x / np.expand_dims(norm, dim)
+
+  def testL2Normalize(self):
+    x_shape = [20, 7, 3]
+    np.random.seed(1)
+    x_np = np.random.random_sample(x_shape).astype(np.float32)
+    for dim in range(len(x_shape)):
+      y_np = self._l2Normalize(x_np, dim)
+      with self.test_session():
+        x_tf = constant_op.constant(x_np, name="x")
+        y_tf = nn.l2_normalize(x_tf, dim)
+        self.assertAllClose(y_np, y_tf.eval())
+
+  def testL2NormalizeGradient(self):
+    x_shape = [20, 7, 3]
+    np.random.seed(1)
+    x_np = np.random.random_sample(x_shape).astype(np.float64)
+    for dim in range(len(x_shape)):
+      with self.test_session():
+        x_tf = constant_op.constant(x_np, name="x")
+        y_tf = nn.l2_normalize(x_tf, dim)
+        err = gc.ComputeGradientError(x_tf, x_shape, y_tf, x_shape)
+      print "L2Normalize gradient err = %g " % err
+      self.assertLess(err, 1e-4)
+
+
+class DropoutTest(test_util.TensorFlowTestCase):
+
+  def testDropout(self):
+    # Runs dropout with 0-1 tensor 10 times, sum the number of ones and validate
+    # that it is producing approximately the right number of ones over a large
+    # number of samples, based on the keep probability.
+    x_dim = 40
+    y_dim = 30
+    num_iter = 10
+    for keep_prob in [0.1, 0.5, 0.8]:
+      with self.test_session():
+        t = constant_op.constant(1.0,
+                                 shape=[x_dim, y_dim],
+                                 dtype=types.float32)
+        dropout = nn.dropout(t, keep_prob)
+        final_count = 0
+        self.assertEqual([x_dim, y_dim], dropout.get_shape())
+        for _ in xrange(0, num_iter):
+          value = dropout.eval()
+          final_count += np.count_nonzero(value)
+          # Verifies that there are only two values: 0 and 1/keep_prob.
+          sorted_value = np.unique(np.sort(value))
+          self.assertEqual(0, sorted_value[0])
+          self.assertAllClose(1 / keep_prob, sorted_value[1])
+      # Check that we are in the 15% error range
+      expected_count = x_dim * y_dim * keep_prob * num_iter
+      rel_error = math.fabs(final_count - expected_count) / expected_count
+      print rel_error
+      self.assertTrue(rel_error < 0.15)
+
+  def testShapedDropout(self):
+    # Runs dropout with 0-1 tensor 10 times, sum the number of ones and validate
+    # that it is producing approximately the right number of ones over a large
+    # number of samples, based on the keep probability. This time with shaped
+    # noise.
+    x_dim = 40 * 30
+    y_dim = 3
+    num_iter = 10
+    for keep_prob in [0.1, 0.5, 0.8]:
+      with self.test_session():
+        t = constant_op.constant(1.0,
+                                 shape=[x_dim, y_dim],
+                                 dtype=types.float32)
+        dropout = nn.dropout(t, keep_prob, noise_shape=[x_dim, 1])
+        self.assertEqual([x_dim, y_dim], dropout.get_shape())
+        final_count = 0
+        for _ in xrange(0, num_iter):
+          value = dropout.eval()
+          final_count += np.count_nonzero(value)
+          # Verifies that there are only two values: 0 and 1/keep_prob.
+          sorted_value = np.unique(np.sort(value))
+          self.assertEqual(0, sorted_value[0])
+          self.assertAllClose(1 / keep_prob, sorted_value[1])
+      # Check that we are in the 15% error range
+      expected_count = x_dim * y_dim * keep_prob * num_iter
+      rel_error = math.fabs(final_count - expected_count) / expected_count
+      print rel_error
+      self.assertTrue(rel_error < 0.15)
+
+  def testShapedDropoutCorrelation(self):
+    # Runs a shaped dropout and tests that the correlations are correct.
+    x_dim = 40
+    y_dim = 30
+    num_iter = 10
+    for keep_prob in [0.1, 0.5, 0.8]:
+      with self.test_session():
+        t = constant_op.constant(1.0,
+                                 shape=[x_dim, y_dim],
+                                 dtype=types.float32)
+        dropout = nn.dropout(t, keep_prob, noise_shape=[x_dim, 1])
+        self.assertEqual([x_dim, y_dim], dropout.get_shape())
+        for _ in xrange(0, num_iter):
+          value = dropout.eval()
+          # Verifies that each y column as only one type of activation.
+          for i in xrange(x_dim):
+            sorted_value = np.unique(np.sort(value[i, :]))
+            self.assertEqual(sorted_value.size, 1)
+
+  def testShapedDropoutShapeError(self):
+    # Runs shaped dropout and verifies an error is thrown on misshapen noise.
+    x_dim = 40
+    y_dim = 30
+    keep_prob = 0.5
+    with self.test_session():
+      t = constant_op.constant(1.0,
+                               shape=[x_dim, y_dim],
+                               dtype=types.float32)
+      with self.assertRaises(ValueError):
+        _ = nn.dropout(t, keep_prob, noise_shape=[x_dim, y_dim + 10])
+      with self.assertRaises(ValueError):
+        _ = nn.dropout(t, keep_prob, noise_shape=[x_dim, y_dim, 5])
+      with self.assertRaises(ValueError):
+        _ = nn.dropout(t, keep_prob, noise_shape=[x_dim + 3])
+      with self.assertRaises(ValueError):
+        _ = nn.dropout(t, keep_prob, noise_shape=[x_dim])
+      # test that broadcasting proceeds
+      _ = nn.dropout(t, keep_prob, noise_shape=[y_dim])
+      _ = nn.dropout(t, keep_prob, noise_shape=[1, y_dim])
+      _ = nn.dropout(t, keep_prob, noise_shape=[x_dim, 1])
+      _ = nn.dropout(t, keep_prob, noise_shape=[1, 1])
+
+
+class BatchNormWithGlobalNormalizationTest(test_util.TensorFlowTestCase):
+
+  def _npBatchNorm(self, x, m, v, beta, gamma, epsilon,
+                   scale_after_normalization):
+    y = (x - m) / np.sqrt(v + epsilon)
+    y = y * gamma if scale_after_normalization else y
+    y += beta
+    return y
+
+  def _opsBatchNorm(self, x, m, v, beta, gamma, epsilon,
+                    scale_after_normalization):
+    y = (x - m) * math_ops.rsqrt(v + epsilon)
+    if scale_after_normalization:
+      y = gamma * y
+    y += beta
+    return y
+
+  def testBatchNorm(self):
+    x_shape = [3, 5, 4, 2]
+    param_shape = [2]
+    x_val = np.random.random_sample(x_shape).astype(np.float32)
+    m_val = np.random.random_sample(param_shape).astype(np.float32)
+    v_val = np.random.random_sample(param_shape).astype(np.float32)
+    beta_val = np.random.random_sample(param_shape).astype(np.float32)
+    gamma_val = np.random.random_sample(param_shape).astype(np.float32)
+    for use_gpu in [True, False]:
+      with self.test_session(use_gpu=use_gpu) as sess:
+        x = constant_op.constant(x_val, name="x")
+        m = constant_op.constant(m_val, name="m")
+        v = constant_op.constant(v_val, name="v")
+        beta = constant_op.constant(beta_val, name="beta")
+        gamma = constant_op.constant(gamma_val, name="gamma")
+        epsilon = 0.001
+        for scale_after_normalization in [True, False]:
+          bn = nn.batch_norm_with_global_normalization(
+              x, m, v, beta, gamma, epsilon, scale_after_normalization)
+          on = self._opsBatchNorm(
+              x, m, v, beta, gamma, epsilon, scale_after_normalization)
+          np_batch_norm = self._npBatchNorm(
+              x_val, m_val, v_val, beta_val, gamma_val, epsilon,
+              scale_after_normalization)
+          tf_batch_norm, ops_batch_norm = sess.run([bn, on])
+          self.assertAllClose(np_batch_norm, tf_batch_norm, atol=0.000001)
+          self.assertAllClose(np_batch_norm, ops_batch_norm, atol=0.000001)
+          self.assertAllClose(tf_batch_norm, ops_batch_norm, atol=0.000001)
+
+  def _testBatchNormGradient(self, param_index, tag, scale_after_normalization,
+                             err_tolerance=1e-11):
+    x_shape = [3, 5, 4, 5]
+    param_shape = [5]
+    np.random.seed(1)  # Make it reproducible.
+    x_val = np.random.random_sample(x_shape).astype(np.float64)
+    m_val = np.random.random_sample(param_shape).astype(np.float64)
+    v_val = np.random.random_sample(param_shape).astype(np.float64)
+    beta_val = np.random.random_sample(param_shape).astype(np.float64)
+    gamma_val = np.random.random_sample(param_shape).astype(np.float64)
+    with self.test_session():
+      x = constant_op.constant(x_val, name="x")
+      m = constant_op.constant(m_val, name="m")
+      v = constant_op.constant(v_val, name="v")
+      beta = constant_op.constant(beta_val, name="beta")
+      gamma = constant_op.constant(gamma_val, name="gamma")
+      epsilon = 0.001
+      # If scale_after_normalization is False, backprop for gamma
+      # will be 0. gamma is unchanged.
+      output = nn.batch_norm_with_global_normalization(
+          x, m, v, beta, gamma, epsilon, scale_after_normalization)
+      all_params = [x, m, v, beta, gamma]
+      all_shapes = [x_shape, param_shape, param_shape, param_shape, param_shape]
+      err = gc.ComputeGradientError(all_params[param_index],
+                                    all_shapes[param_index], output, x_shape)
+    print "Batch normalization %s gradient %s scale err = " % (
+        tag, "with" if scale_after_normalization else "without"
+    ), err
+    self.assertLess(err, err_tolerance)
+
+  def testBatchNormInputGradient(self):
+    for scale_after_normalization in [True, False]:
+      self._testBatchNormGradient(0, "x", scale_after_normalization)
+
+  def testBatchNormMeanGradient(self):
+    for scale_after_normalization in [True, False]:
+      self._testBatchNormGradient(1, "mean", scale_after_normalization)
+
+  def testBatchNormVarianceGradient(self):
+    for scale_after_normalization in [True, False]:
+      self._testBatchNormGradient(2, "variance", scale_after_normalization,
+                                  err_tolerance=1e-03)
+
+  def testBatchNormBetaGradient(self):
+    for scale_after_normalization in [True, False]:
+      self._testBatchNormGradient(3, "beta", scale_after_normalization)
+
+  def testBatchNormGammaGradient(self):
+    for scale_after_normalization in [True, False]:
+      self._testBatchNormGradient(4, "gamma", scale_after_normalization)
+
+  def testBatchNormGradImpl(self):
+    x_shape = [7, 5, 4, 6]
+    param_shape = [6]
+    np.random.seed(1)  # Make it reproducible.
+    x_val = np.random.random_sample(x_shape).astype(np.float32)
+    m_val = np.random.random_sample(param_shape).astype(np.float32)
+    v_val = np.random.random_sample(param_shape).astype(np.float32)
+    beta_val = np.random.random_sample(param_shape).astype(np.float32)
+    gamma_val = np.random.random_sample(param_shape).astype(np.float32)
+    backprop_val = np.random.random_sample(x_shape).astype(np.float32)
+    for use_gpu in [False, True]:
+      with self.test_session(use_gpu=use_gpu) as sess:
+        x = constant_op.constant(x_val, name="x")
+        m = constant_op.constant(m_val, name="m")
+        v = constant_op.constant(v_val, name="v")
+        beta = constant_op.constant(beta_val, name="beta")
+        gamma = constant_op.constant(gamma_val, name="gamma")
+        backprop = constant_op.constant(backprop_val, name="backprop")
+        epsilon = 0.001
+        for scale_after_normalization in [True, False]:
+          dx, dm, dv, db, dg = (
+              gen_nn_ops._batch_norm_with_global_normalization_grad(
+              x, m, v, gamma, backprop, epsilon, scale_after_normalization))
+          on = self._opsBatchNorm(
+              x, m, v, beta, gamma, epsilon, scale_after_normalization)
+          odx, odm, odv, odb, odg = gradients.gradients(
+              [on], [x, m, v, beta, gamma], [backprop])
+          if scale_after_normalization:
+            all_grads = sess.run([dx, dm, dv, db, dg, odx, odm, odv, odb, odg])
+            to_check = ["dx", "dm", "dv", "db", "dg"]
+          else:
+            all_grads = sess.run([dx, dm, dv, db, odx, odm, odv, odb])
+            to_check = ["dx", "dm", "dv", "db"]
+          for i, n in enumerate(to_check):
+            print n
+            self.assertAllClose(
+                all_grads[i + len(to_check)], all_grads[i], atol=0.000001)
+
+
+class MomentsTest(test_util.TensorFlowTestCase):
+
+  def RunMomentTest(self, shape, global_norm):
+    with self.test_session():
+      # shape = [batch, width, height, depth]
+      assert len(shape) == 4
+
+      x_numpy = np.random.normal(size=shape).astype(np.float32)
+      x = constant_op.constant(x_numpy)
+      x.set_shape(shape)
+      axes = [0, 1, 2] if global_norm else [0]
+      mean, var = nn.moments(x, axes)
+
+      num_elements = np.prod([shape[i] for i in axes])
+
+      ax = (0, 1, 2) if global_norm else (0)
+      expected_mean = np.sum(x_numpy, axis=ax) / num_elements
+      expected_mean_squared = np.multiply(expected_mean, expected_mean)
+      expected_x_squared = np.sum(
+          np.multiply(x_numpy, x_numpy), axis=ax) / num_elements
+      expected_variance = expected_x_squared - expected_mean_squared
+
+      # Check that the moments are correct.
+      self.assertAllClose(expected_mean, mean.eval())
+      self.assertAllClose(expected_variance, var.eval())
+
+  def testBasic(self):
+    self.RunMomentTest(shape=[2, 3, 5, 4], global_norm=False)
+
+  def testGlobalNormalization(self):
+    self.RunMomentTest(shape=[2, 3, 5, 4], global_norm=True)
+
+  def _testGlobalGradient(self, from_y="mean"):
+    with self.test_session():
+      x_shape = [3, 5, 4, 2]
+      x_val = np.random.random_sample(x_shape).astype(np.float64)
+      x = constant_op.constant(x_val)
+      x.set_shape(x_shape)
+
+      axes = [0, 1, 2]
+      y_shape = [2]  # Depth of x
+      out_mean, out_var = nn.moments(x, axes)
+      if from_y == "mean":
+        y = out_mean
+      elif from_y == "var":
+        y = out_var
+      err = gc.ComputeGradientError(x, x_shape, y, y_shape)
+      print "Moments %s gradient err = %g" % (from_y, err)
+      self.assertLess(err, 1e-11)
+
+  def testMeanGlobalGradient(self):
+    self._testGlobalGradient(from_y="mean")
+
+  def testVarGlobalGradient(self):
+    self._testGlobalGradient(from_y="var")
+
+
+class ComputeSampledLogitsTest(test_util.TensorFlowTestCase):
+
+  def setUp(self):
+    self._num_classes = 5
+    self._dim = 10
+    self._batch_size = 3
+
+  def _GenerateTestInputs(self):
+    np.random.seed(0)
+    weights = np.random.randn(self._num_classes, self._dim).astype(np.float32)
+    biases = np.random.randn(self._num_classes).astype(np.float32)
+    hidden_acts = np.random.randn(self._batch_size, self._dim).astype(
+        np.float32)
+
+    return weights, biases, hidden_acts
+
+  def _ComputeSampledLogitsNP(self, true_w, true_b, sampled_w, sampled_b,
+                              hidden_acts,
+                              num_true=1,
+                              true_expected=None,
+                              sampled_expected=None):
+
+    batch_size, dim = hidden_acts.shape
+    true_logits = np.sum(
+        hidden_acts.reshape((batch_size, 1, dim)) * true_w.reshape(
+            (batch_size, num_true, dim)),
+        axis=2)
+    true_b = true_b.reshape((batch_size, num_true))
+    true_logits += true_b
+    sampled_logits = np.dot(hidden_acts, sampled_w.T) + sampled_b
+
+    if true_expected is not None:
+      true_logits -= np.log(true_expected)
+    if sampled_expected is not None:
+      sampled_logits -= np.log(sampled_expected[np.newaxis, :])
+
+    out_logits = np.concatenate([true_logits, sampled_logits], axis=1)
+    out_labels = np.hstack((np.ones_like(true_logits) / num_true,
+                            np.zeros_like(sampled_logits)))
+
+    return out_logits, out_labels
+
+  def _ComputeSampledLogitsTF(self, weights, biases, hidden_acts, labels,
+                              num_sampled, num_classes, num_true, sampled_vals,
+                              subtract_log_q, remove_accidental_hits,
+                              name="sampled_loss_TF"):
+    # Should be called from within a `with test_session():` block
+    weights_tf = constant_op.constant(weights)
+    biases_tf = constant_op.constant(biases)
+    hidden_acts_tf = constant_op.constant(hidden_acts,
+                                          shape=(self._batch_size, self._dim))
+    labels_tf = constant_op.constant(labels, dtype=types.int64,
+                                     shape=(self._batch_size, num_true))
+
+    pred_logits_tf, pred_labels_tf = nn._compute_sampled_logits(
+        weights_tf, biases_tf, hidden_acts_tf, labels_tf, num_sampled,
+        num_classes, num_true, sampled_vals,
+        subtract_log_q=subtract_log_q,
+        remove_accidental_hits=remove_accidental_hits,
+        name=name)
+    return pred_logits_tf, pred_labels_tf
+
+  def testComputeSampledLogitsShapes(self):
+    # We just check that the shapes of the returned values are correct.
+    weights, biases, hidden_acts = self._GenerateTestInputs()
+    sampled = [1, 0, 2, 3]
+    num_sampled = len(sampled)
+    true_exp = sampled_exp = [1., 1., 1., 1.]
+    test_sampled_vals = (sampled, true_exp, sampled_exp)
+    sampled_w, sampled_b = weights[sampled], biases[sampled]
+
+    with self.test_session() as sess:
+      for num_true_test in range(1, 5):
+        labels = np.random.randint(low=0, high=self._num_classes,
+                                   size=self._batch_size * num_true_test)
+        true_w, true_b = weights[labels], biases[labels]
+
+        logits_np, labels_np = self._ComputeSampledLogitsNP(
+            true_w, true_b, sampled_w, sampled_b, hidden_acts,
+            num_true=num_true_test)
+
+        logits_tf, labels_tf = self._ComputeSampledLogitsTF(
+            weights, biases, hidden_acts, labels, num_sampled,
+            self._num_classes,
+            num_true=num_true_test,
+            sampled_vals=test_sampled_vals,
+            remove_accidental_hits=True,
+            subtract_log_q=False)
+
+      logits_tf_val, labels_tf_val = sess.run([logits_tf, labels_tf])
+      self.assertEqual(logits_np.shape, logits_tf_val.shape)
+      self.assertEqual(labels_np.shape, labels_tf_val.shape)
+
+  def testComputeSampledLogitsValues(self):
+    # Here we check the actual numerics.
+    weights, biases, hidden_acts = self._GenerateTestInputs()
+    eps = 1e-3
+    sampled = [1, 0, 2, 3]
+    num_sampled = len(sampled)
+    true_exp = np.empty([self._batch_size, 1], dtype=np.float32)
+    true_exp.fill(0.5)
+    sampled_exp = np.empty([num_sampled], dtype=np.float32)
+    sampled_exp.fill(0.5)
+    sampled_w, sampled_b = weights[sampled], biases[sampled]
+    test_sampled_vals = (sampled, true_exp, sampled_exp)
+
+    with self.test_session() as sess:
+      for num_true_test in range(1, 5):
+        # Generate test data for this run
+        labels = np.random.randint(low=0, high=self._num_classes,
+                                   size=self._batch_size * num_true_test)
+        true_w, true_b = weights[labels], biases[labels]
+
+        # Test 1: Without accidental hit removal or subtract_log_q
+        logits_np, labels_np = self._ComputeSampledLogitsNP(
+            true_w, true_b, sampled_w, sampled_b, hidden_acts,
+            num_true=num_true_test)
+        logits_tf, labels_tf = self._ComputeSampledLogitsTF(
+            weights, biases, hidden_acts, labels, num_sampled,
+            self._num_classes,
+            num_true=num_true_test,
+            sampled_vals=test_sampled_vals,
+            subtract_log_q=False,
+            remove_accidental_hits=False,
+            name="sampled_loss_test1_num_true%d" % num_true_test)
+
+        logits_tf_val, labels_tf_val = sess.run([logits_tf, labels_tf])
+        self.assertAllClose(logits_np, logits_tf_val, eps)
+        self.assertAllClose(labels_np, labels_tf_val, eps)
+
+        # Test 2: With accidental hit removal, no subtract_log_q
+        logits_tf, labels_tf = self._ComputeSampledLogitsTF(
+            weights, biases, hidden_acts, labels, num_sampled,
+            self._num_classes,
+            num_true=num_true_test,
+            sampled_vals=test_sampled_vals,
+            subtract_log_q=False,
+            remove_accidental_hits=True,
+            name="sampled_loss_test2_num_true%d" % num_true_test)
+
+        # Test that the exponentiated logits of accidental hits are near 0.
+        # First we need to find the hits in this random test run:
+        labels_reshape = labels.reshape((self._batch_size, num_true_test))
+        logits_tf_np = logits_tf.eval()
+        for row in xrange(self._batch_size):
+          row_labels = labels_reshape[row, :]
+          for col in xrange(num_sampled):
+            if sampled[col] in row_labels:
+              # We need to add the num_true_test offset into logits_*
+              self.assertNear(
+                  np.exp(logits_tf_np[row, col + num_true_test]), 0., eps)
+
+        # Test 3: With subtract_log_q, no accidental hit removal
+        logits_np, labels_np = self._ComputeSampledLogitsNP(
+            true_w, true_b, sampled_w, sampled_b, hidden_acts,
+            num_true=num_true_test,
+            true_expected=true_exp,
+            sampled_expected=sampled_exp)
+        logits_tf, labels_tf = self._ComputeSampledLogitsTF(
+            weights, biases, hidden_acts, labels, num_sampled,
+            self._num_classes,
+            num_true=num_true_test,
+            sampled_vals=test_sampled_vals,
+            subtract_log_q=True,
+            remove_accidental_hits=False,
+            name="sampled_loss_test3_num_true%d" % num_true_test)
+
+        logits_tf_val, labels_tf_val = sess.run([logits_tf, labels_tf])
+        self.assertAllClose(logits_np, logits_tf_val, eps)
+        self.assertAllClose(labels_np, labels_tf_val, eps)
+
+  def testNCELoss(self):
+    # A simple test to verify the numerics.
+
+    def _SigmoidCrossEntropyWithLogits(logits, targets):
+      # logits, targets: float arrays of the same shape.
+      assert logits.shape == targets.shape
+      pred = 1. / (1. + np.exp(-logits))
+      eps = 0.0001
+      pred = np.minimum(np.maximum(pred, eps), 1 - eps)
+      return -targets * np.log(pred) - (1. - targets) * np.log(1. - pred)
+
+    weights, biases, hidden_acts = self._GenerateTestInputs()
+    labels = [0, 1, 2]
+    true_w, true_b = weights[labels], biases[labels]
+    sampled = [1, 0, 2, 3]
+    num_sampled = len(sampled)
+    true_exp = np.empty([self._batch_size, 1], dtype=np.float32)
+    true_exp.fill(0.5)
+    sampled_exp = np.empty([num_sampled], dtype=np.float32)
+    sampled_exp.fill(0.5)
+    sampled_w, sampled_b = weights[sampled], biases[sampled]
+    test_sampled_vals = (sampled, true_exp, sampled_exp)
+
+    with self.test_session():
+      logits_np, labels_np = self._ComputeSampledLogitsNP(
+          true_w, true_b, sampled_w, sampled_b, hidden_acts,
+          true_expected=true_exp,
+          sampled_expected=sampled_exp)
+      nce_loss_np = np.sum(
+          _SigmoidCrossEntropyWithLogits(logits_np, labels_np), 1)
+
+      labels_tf = constant_op.constant(labels, shape=(self._batch_size, 1))
+      weights_tf = constant_op.constant(weights)
+      biases_tf = constant_op.constant(biases)
+      inputs_tf = constant_op.constant(hidden_acts)
+
+      nce_loss_tf = nn.nce_loss(
+          weights_tf, biases_tf, inputs_tf, labels_tf,
+          num_sampled=1,
+          num_classes=self._num_classes,
+          num_true=1,
+          sampled_values=test_sampled_vals)
+
+      self.assertAllClose(nce_loss_np, nce_loss_tf.eval(), 1e-4)
+
+  def testSampledSoftmaxLoss(self):
+    # A simple test to verify the numerics.
+
+    def _SoftmaxCrossEntropyWithLogits(logits, targets):
+      # logits, targets: float arrays of the same shape.
+      assert logits.shape == targets.shape
+      stable_exp_logits = np.exp(logits - np.amax(
+          logits, axis=1, keepdims=True))
+      pred = stable_exp_logits / np.sum(stable_exp_logits, 1, keepdims=True)
+      return -np.sum(targets * np.log(pred + 1.0e-20), axis=1)
+
+    weights, biases, hidden_acts = self._GenerateTestInputs()
+    labels = [0, 1, 2]
+    true_w, true_b = weights[labels], biases[labels]
+    sampled = [1, 0, 2, 3]
+    num_sampled = len(sampled)
+    true_exp = np.full([self._batch_size, 1], fill_value=0.5, dtype=np.float32)
+    sampled_exp = np.full([num_sampled], fill_value=0.5, dtype=np.float32)
+    sampled_w, sampled_b = weights[sampled], biases[sampled]
+    test_sampled_vals = (sampled, true_exp, sampled_exp)
+
+    with self.test_session():
+      logits_np, labels_np = self._ComputeSampledLogitsNP(
+          true_w, true_b, sampled_w, sampled_b, hidden_acts,
+          true_expected=true_exp,
+          sampled_expected=sampled_exp)
+      sampled_softmax_loss_np = _SoftmaxCrossEntropyWithLogits(logits_np,
+                                                               labels_np)
+
+      labels_tf = constant_op.constant(labels, shape=(self._batch_size, 1))
+      weights_tf = constant_op.constant(weights)
+      biases_tf = constant_op.constant(biases)
+      inputs_tf = constant_op.constant(hidden_acts)
+
+      sampled_softmax_loss_tf = nn.sampled_softmax_loss(
+          weights_tf, biases_tf, inputs_tf, labels_tf,
+          num_sampled=1,
+          num_classes=self._num_classes,
+          num_true=1,
+          sampled_values=test_sampled_vals,
+          remove_accidental_hits=False)
+
+      self.assertAllClose(
+          sampled_softmax_loss_np, sampled_softmax_loss_tf.eval(), 1e-4)
+
+
+if __name__ == "__main__":
+  googletest.main()
diff --git a/tensorflow/python/ops/numerics.py b/tensorflow/python/ops/numerics.py
new file mode 100644
index 0000000000..93f5d5db20
--- /dev/null
+++ b/tensorflow/python/ops/numerics.py
@@ -0,0 +1,50 @@
+"""Connects all float and double tensors to CheckNumericsOp."""
+
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import types
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+
+
+def verify_tensor_all_finite(t, msg, name=None):
+  """Assert that the tensor does not contain any NaN's or Inf's.
+
+  Args:
+    t: Tensor to check.
+    msg: Message to log on failure.
+    name: A name for this operation (optional).
+
+  Returns:
+    Same tensor as `t`.
+  """
+  with ops.op_scope([t], name, "VerifyFinite") as name:
+    t = ops.convert_to_tensor(t, name="t")
+    with ops.device(t.device or t.graph.get_default_device()):
+      verify_input = array_ops.check_numerics(t, message=msg)
+      out = control_flow_ops.with_dependencies([verify_input], t)
+  return out
+
+
+def add_check_numerics_ops():
+  """Connect a check_numerics to every floating point tensor.
+
+  `check_numerics` operations themselves are added for each `float` or `double`
+  tensor in the graph. For all ops in the graph, the `check_numerics` op for
+  all of its (`float` or `double`) inputs is guaranteed to run before the
+  `check_numerics` op on any of its outputs.
+
+  Returns:
+    A `group` op depending on all `check_numerics` ops added.
+  """
+  check_op = []
+  # This code relies on the ordering of ops in get_operations().
+  # The consumer of a tensor always comes before that tensor's producer in
+  # this list. This is true because get_operations() returns ops in the order
+  # added, and ops can only be added once its inputs are added.
+  for op in ops.get_default_graph().get_operations():
+    for output in op.outputs:
+      if output.dtype in [types.float32, types.float64]:
+        message = op.name + ":" + str(output.value_index)
+        with ops.control_dependencies(check_op):
+          check_op = [array_ops.check_numerics(output, message=message)]
+  return control_flow_ops.group(*check_op)
diff --git a/tensorflow/python/ops/op_def_library.py b/tensorflow/python/ops/op_def_library.py
new file mode 100644
index 0000000000..5947b6df89
--- /dev/null
+++ b/tensorflow/python/ops/op_def_library.py
@@ -0,0 +1,640 @@
+"""Class to hold a library of OpDefs and use it to create Brain operations."""
+
+import numbers
+
+from tensorflow.core.framework import attr_value_pb2
+from tensorflow.core.framework import op_def_pb2
+from tensorflow.core.framework import tensor_pb2
+from tensorflow.core.framework import tensor_shape_pb2
+from tensorflow.core.framework import types_pb2
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import types as types_lib
+from tensorflow.python.ops import constant_op
+from tensorflow.python.platform import logging
+
+
+def _Attr(op_def, name):
+  for attr in op_def.attr:
+    if attr.name == name:
+      return attr
+  raise TypeError("Inconsistent OpDef for '%s', missing attr '%s'" %
+                  (op_def.name, name))
+
+
+def _AttrValue(attr_protos, name):
+  if name in attr_protos:
+    return attr_protos[name]
+  raise TypeError("Inconsistent OpDef, missing attr '%s' from '%s'." %
+                  (name, attr_protos))
+
+
+def _SatisfiesTypeConstraint(dtype, attr_def):
+  if attr_def.HasField("allowed_values"):
+    allowed_list = attr_def.allowed_values.list.type
+    if dtype not in allowed_list:
+      raise TypeError(
+          "DataType %s for attr '%s' not in list of allowed values: %s" %
+          (types_lib.as_dtype(dtype).name, attr_def.name,
+           ", ".join(types_lib.as_dtype(x).name for x in allowed_list)))
+
+
+def _IsListParameter(arg):
+  if arg.number_attr:
+    return True
+  elif arg.type_list_attr:
+    return True
+  return False
+
+
+def _NumTypeFields(arg):
+  num = 0
+  if arg.type != types_pb2.DT_INVALID: num += 1
+  if arg.type_attr: num += 1
+  if arg.type_list_attr: num += 1
+  return num
+
+
+def _IsListValue(v):
+  return isinstance(v, (list, tuple))
+
+
+def _Flatten(l):
+  """Converts [1, 2, [3, 4], [5]] to [1, 2, 3, 4, 5]."""
+  # [1, 2, [3, 4], [5]] -> [[1], [2], [3, 4], [5]]
+  l_of_l = [x if _IsListValue(x) else [x] for x in l]
+  # [[1], [2], [3, 4], [5]] -> [1, 2, 3, 4, 5]
+  return [item for sublist in l_of_l for item in sublist]
+
+
+def _Restructure(l, structure):
+  """Returns the elements of list l structured according to the given structure.
+
+  A structure is represented by a list whose elements are either
+  `None` or a non-negative integer. `None` corresponds to a single
+  element in the output list, and an integer N corresponds to a nested
+  list of length N.
+
+  The function returns a data structure whose shape is given by
+  `structure`, and whose elements are taken from `l`. If `structure`
+  is a singleton, the function returns the single data structure
+  implied by the 0th element of `structure`. For example:
+
+      _Restructure(["foo", "bar", "baz", "qux"], [None, 2, None])
+        -> ["foo", ["bar", "baz"], "qux"]
+
+      _Restructure(["foo"], [None]) -> "foo"
+
+      _Restructure(["foo"], [1]) -> ["foo"]
+
+      _Restructure([], [0]) -> []
+
+  Args:
+    l: A list.
+    structure: A list whose elements are either `None` or a non-negative
+      integer.
+
+  Returns:
+    The elements of `l`, restructured according to `structure`. If
+    `structure` is a list of length 1, this function returns the
+    single data structure implied by `structure[0]`.
+
+  """
+  result = []
+  current_index = 0
+  for element in structure:
+    if element is None:
+      result.append(l[current_index])
+      current_index += 1
+    else:
+      result.append(l[current_index:current_index+element])
+      current_index += element
+
+  if len(result) == 1:
+    return result[0]
+  else:
+    return tuple(result)
+
+
+def _MakeFloat(v, arg_name):
+  if not isinstance(v, numbers.Real):
+    raise TypeError("Expected float for argument '%s' not %s." %
+                    (arg_name, repr(v)))
+  return float(v)
+
+
+def _MakeInt(v, arg_name):
+  if isinstance(v, basestring):
+    raise TypeError("Expected int for argument '%s' not %s." %
+                    (arg_name, repr(v)))
+  try:
+    return int(v)
+  except (ValueError, TypeError):
+    raise TypeError("Expected int for argument '%s' not %s." %
+                    (arg_name, repr(v)))
+
+
+def _MakeStr(v, arg_name):
+  if not isinstance(v, basestring):
+    raise TypeError("Expected string for argument '%s' not %s." %
+                    (arg_name, repr(v)))
+  return str(v)  # Convert unicode strings to bytes.
+
+
+def _MakeBool(v, arg_name):
+  if not isinstance(v, bool):
+    raise TypeError("Expected bool for argument '%s' not %s." %
+                    (arg_name, repr(v)))
+  return v
+
+
+def _MakeType(v, attr_def):
+  try:
+    v = types_lib.as_dtype(v)
+  except TypeError:
+    raise TypeError("Expected DataType for argument '%s' not %s." %
+                    (attr_def.name, repr(v)))
+  i = v.as_datatype_enum
+  _SatisfiesTypeConstraint(i, attr_def)
+  return i
+
+
+def _MakeShape(v, arg_name):
+  """Convert v into a TensorShapeProto."""
+  # Args:
+  #   v: A TensorShapeProto, a list of ints, or a tensor_shape.TensorShape.
+  #   arg_name: String, for error messages.
+
+  # Returns:
+  #   A TensorShapeProto.
+  if isinstance(v, tensor_shape_pb2.TensorShapeProto):
+    for d in v.dim:
+      if d.name:
+        logging.warning("Warning: TensorShapeProto with a named dimension: %s",
+                        str(v))
+        break
+    return v
+  s = tensor_shape.as_shape(v)
+  ret = tensor_shape_pb2.TensorShapeProto()
+  for i in s.as_dimension_list():
+    ret.dim.add(size = i)
+  return ret
+
+
+def _MakeTensor(v, arg_name):
+  """Ensure v is a TensorProto."""
+  if isinstance(v, tensor_pb2.TensorProto):
+    return v
+  raise TypeError(
+      "Don't know how to convert %s to a TensorProto for argument '%s'" %
+      (repr(v), arg_name))
+
+
+class _OpInfo(object):
+  """All per-Op state we would like to precompute/validate."""
+
+  def __init__(self, op_def):
+    self.op_def = op_def
+    # TODO(josh11b): SWIG the ValidateOpDef() function from C++ and call it
+    # here, instead of these checks.
+    for arg in list(op_def.input_arg) + list(op_def.output_arg):
+      num_type_fields = _NumTypeFields(arg)
+      if num_type_fields != 1:
+        raise TypeError("Arg '%s' of '%s' must have one type field not %d" %
+                        (arg.name, op_def.name, num_type_fields))
+      if arg.type_attr:
+        attr_type = _Attr(op_def, arg.type_attr).type
+        if attr_type != "type":
+          raise TypeError("Attr '%s' of '%s' used as a type_attr "
+                          "but has type %s" %
+                          (arg.type_attr, op_def.name, attr_type))
+      if arg.type_list_attr:
+        attr_type = _Attr(op_def, arg.type_list_attr).type
+        if attr_type != "list(type)":
+          raise TypeError(
+              "Attr '%s' of '%s' used as a type_list_attr but has type %s" %
+              (arg.type_attr, op_def.name, attr_type))
+      if arg.number_attr:
+        attr_type = _Attr(op_def, arg.number_attr).type
+        if attr_type != "int":
+          raise TypeError(
+              "Attr '%s' of '%s' used as a number_attr but has type %s" %
+              (arg.number_attr, op_def.name, attr_type))
+
+
+class OpDefLibrary(object):
+  """Holds a collection of OpDefs, can add the corresponding Ops to a graph."""
+
+  def __init__(self):
+    self._ops = {}
+
+  def add_op(self, op_def):
+    """Register an OpDef. May call apply_op with the name afterwards."""
+    if not isinstance(op_def, op_def_pb2.OpDef):
+      raise TypeError("%s is %s, not an op_def_pb2.OpDef" %
+                      (op_def, type(op_def)))
+    if not op_def.name:
+      raise ValueError("%s missing name." % op_def)
+    if op_def.name in self._ops:
+      raise RuntimeError("Op name %s registered twice." % op_def.name)
+    self._ops[op_def.name] = _OpInfo(op_def)
+
+  def add_op_list(self, op_list):
+    """Register the OpDefs from an OpList."""
+    if not isinstance(op_list, op_def_pb2.OpList):
+      raise TypeError("%s is %s, not an op_def_pb2.OpList" %
+                      (op_list, type(op_list)))
+    for op_def in op_list.op:
+      self.add_op(op_def)
+
+  def apply_op(self, op_type_name, g=None, name=None, **keywords):
+    # pylint: disable=g-doc-args
+    """Add a node invoking a registered Op to a graph.
+
+    Config proto extensions must be provided via the 'ext' keyword argument.
+    Example usage:
+       # input1 and input2 can be Tensors or anything ops.convert_to_tensor()
+       # will convert to a Tensor.
+       op_def_library.apply_op("op", input1=input1, input2=input2)
+       # If none of the inputs are Tensors and your session doesn't have a
+       # default graph, you will have to specify the graph.
+       op_def_library.apply_op("op", input1=input1, g=g)
+       # Can specify a node name.
+       op_def_library.apply_op("op", input1=input1, name="node_name")
+       # Must use keyword arguments, with the names specified in the OpDef.
+       op_def_library.apply_op("op", input_name=input, attr_name=attr)
+
+    All attrs must either be inferred from an input or specified.
+    (If inferred, the attr must not be specified.)  If an attr has a default
+    value specified in the Op's OpDef, then you may pass None as the value
+    of that attr to get the default.
+
+    Args:
+      op_type_name: string. Must match the name field of a registered Op.
+      g: The graph context (optional)
+      name: string. Optional name of the created op.
+      **keywords: input Tensor and attr arguments specified by name,
+        and optional parameters to pass when constructing the Operation.
+
+    Returns:
+      The Tensor(s) representing the output of the operation, or the Operation
+      itself if there are no outputs.
+
+    Raises:
+      RuntimeError: On some errors.
+      TypeError: On some errors.
+      ValueError: On some errors.
+    """
+    op_info = self._ops.get(op_type_name, None)
+    if op_info is None:
+      raise RuntimeError("Unrecognized Op name " + op_type_name)
+    op_def = op_info.op_def
+
+    # Determine the graph context.
+    try:
+      # Need to flatten all the arguments into a list.
+      # pylint: disable=protected-access
+      g = ops._get_graph_from_inputs(_Flatten(keywords.values()), graph=g)
+      # pyline: enable=protected-access
+    except AssertionError as e:
+      raise RuntimeError(
+          "Need to specify g=graph to Op '%s' (could not determine graph due "
+          "to: %s)" % (op_type_name, e.message))
+
+    # Default name if not specified.
+    if name is None:
+      name = op_type_name
+
+    # Requires that op_def has passed validation (using the C++
+    # ValidateOpDef() from ../framework/op_def_util.h).
+    attrs = {}
+    inputs = []
+    input_types = []
+    with g.as_default(), ops.name_scope(name) as scope:
+
+      # Perform input type inference
+      inferred_from = {}
+      for input_arg in op_def.input_arg:
+        input_name = input_arg.name
+        if input_name in keywords:
+          values = keywords.pop(input_name)
+        elif input_name + "_" in keywords:
+          # Handle the case where the name is a keyword or built-in
+          # for Python so we use the name + _ instead.
+          input_name += "_"
+          values = keywords.pop(input_name)
+        else:
+          raise TypeError("No argument for input " + input_name)
+
+        # Goals:
+        # * Convert values to Tensors if it contains constants.
+        # * Verify that values is a list if that matches the input_arg's
+        #   type.
+        # * If the input_arg's type is determined by attrs, either set
+        #   those attrs and validate those attr values are legal (if
+        #   they have not yet been set) or validate the input matches
+        #   the type indicated by the attrs (if they have already been
+        #   inferred via an earlier input).
+        # * If the input_arg has an explicit type, make sure the input
+        #   conforms.
+
+        if _IsListParameter(input_arg):
+          if not _IsListValue(values):
+            raise TypeError(
+                "Expected list for '%s' argument to '%s' Op, not %s." %
+                (input_name, op_type_name, values))
+          # In cases where we expect all elements of the list to have the
+          # same dtype, try to cast non-Tensor elements to that type.
+          dtype = None
+          if input_arg.type != types_pb2.DT_INVALID:
+            dtype = input_arg.type
+          elif input_arg.number_attr:
+            if input_arg.type_attr in attrs:
+              dtype = attrs[input_arg.type_attr]
+            else:
+              for t in values:
+                if isinstance(t, ops.Tensor):
+                  dtype = t.dtype
+                  break
+
+          try:
+            values = ops.convert_n_to_tensor_or_indexed_slices(
+                values, name=input_arg.name,
+                dtype=types_lib.as_dtype(dtype).base_dtype if dtype else None)
+          except (TypeError, ValueError):
+            assert dtype is not None, "Should not fail if dtype is None"
+            assert input_arg.number_attr, "Should be number_attr case"
+            # What types does the conversion function think values have?
+            values = ops.convert_n_to_tensor_or_indexed_slices(values)
+            observed = ", ".join(v.dtype.base_dtype.name for v in values)
+
+            prefix = (
+                "Tensors in list passed to '%s' of '%s' Op have types [%s]" %
+                (input_name, op_type_name, observed))
+            if input_arg.type != types_pb2.DT_INVALID:
+              raise TypeError("%s that do not match expected type %s." %
+                              (prefix, types_lib.as_dtype(dtype).name))
+            elif input_arg.type_attr in attrs:
+              raise TypeError("%s that do not match type %s inferred from "
+                              "earlier arguments." %
+                              (prefix, types_lib.as_dtype(dtype).name))
+            else:
+              raise TypeError("%s that don't all match." % prefix)
+
+          types = [x.dtype for x in values]
+          inputs.extend(values)
+        else:
+          # In cases where we have an expected type, try to convert non-Tensor
+          # arguments to that type.
+          dtype = None
+          if input_arg.type != types_pb2.DT_INVALID:
+            dtype = input_arg.type
+          elif input_arg.type_attr in attrs:
+            dtype = attrs[input_arg.type_attr]
+
+          try:
+            values = ops.convert_to_tensor(
+                values, name=input_arg.name, dtype=dtype)
+          except ValueError:
+            # What type does convert_to_tensor think it has?
+            observed = ops.convert_to_tensor(values).dtype.name
+            prefix = ("Input '%s' of '%s' Op has type %s that does not match" %
+                      (input_name, op_type_name, observed))
+            if input_arg.type != types_pb2.DT_INVALID:
+              raise TypeError("%s expected type of %s." %
+                              (prefix, types_lib.as_dtype(input_arg.type).name))
+            else:
+              raise TypeError(
+                  "%s type %s of argument '%s'." %
+                  (prefix, types_lib.as_dtype(attrs[input_arg.type_attr]).name,
+                   inferred_from[input_arg.type_attr]))
+
+          types = [values.dtype]
+          inputs.append(values)
+        base_types = [x.base_dtype for x in types]
+
+        if input_arg.number_attr:
+          # <number-attr> * <type> or <number-attr> * <type-attr>
+          if input_arg.number_attr in attrs:
+            if len(values) != attrs[input_arg.number_attr]:
+              raise ValueError(
+                  "List argument '%s' to '%s' Op with length %d must match "
+                  "length %d of argument '%s'." %
+                  (input_name, op_type_name, len(values),
+                   attrs[input_arg.number_attr],
+                   inferred_from[input_arg.number_attr]))
+          else:
+            attrs[input_arg.number_attr] = len(values)
+            inferred_from[input_arg.number_attr] = input_name
+            num_attr = _Attr(op_def, input_arg.number_attr)
+            if num_attr.has_minimum and len(values) < num_attr.minimum:
+              raise ValueError(
+                  "List argument '%s' to '%s' Op with length %d shorter "
+                  "than minimum length %d." %
+                  (input_name, op_type_name, len(values), num_attr.minimum))
+          # All tensors must have the same base type.
+          if any([bt != base_types[0] for bt in base_types]):
+            raise TypeError(
+                "All tensors passed to '%s' of '%s' Op "
+                "must have the same type." %
+                (input_name, op_type_name))
+          if input_arg.type != types_pb2.DT_INVALID:
+            # <number-attr> * <type> case
+            if base_types and base_types[0] != input_arg.type:
+              assert False, "Unreachable"
+          elif input_arg.type_attr in attrs:
+            # <number-attr> * <type-attr> case, where <type-attr> already
+            # has an inferred value.
+            if base_types and base_types[0] != attrs[input_arg.type_attr]:
+              assert False, "Unreachable"
+          else:
+            # <number-attr> * <type-attr> case, where we are now setting
+            # the <type-attr> based on this input
+            if not base_types:
+              raise TypeError(
+                  "Don't know how to infer type variable from empty input "
+                  "list passed to input '%s' of '%s' Op." %
+                  (input_name, op_type_name))
+            attrs[input_arg.type_attr] = base_types[0]
+            inferred_from[input_arg.type_attr] = input_name
+            type_attr = _Attr(op_def, input_arg.type_attr)
+            _SatisfiesTypeConstraint(base_types[0], type_attr)
+        elif input_arg.type_attr:
+          # <type-attr>
+          attr_value = base_types[0]
+          if input_arg.type_attr in attrs:
+            if attrs[input_arg.type_attr] != attr_value:
+              assert False, "Unreachable"
+          else:
+            for base_type in base_types:
+              _SatisfiesTypeConstraint(base_type,
+                                       _Attr(op_def, input_arg.type_attr))
+            attrs[input_arg.type_attr] = attr_value
+            inferred_from[input_arg.type_attr] = input_name
+        elif input_arg.type_list_attr:
+          # <type-list-attr>
+          attr_value = base_types
+          if input_arg.type_list_attr in attrs:
+            if attrs[input_arg.type_list_attr] != attr_value:
+              raise TypeError(
+                  "Input '%s' of '%s' Op has type list of %s that does not "
+                  "match type list %s of argument '%s'." %
+                  (input_name, op_type_name,
+                   ", ".join(types_lib.as_dtype(x).name for x in attr_value),
+                   ", ".join(types_lib.as_dtype(x).name
+                             for x in attrs[input_arg.type_list_attr]),
+                   inferred_from[input_arg.type_list_attr]))
+          else:
+            for base_type in base_types:
+              _SatisfiesTypeConstraint(base_type,
+                                       _Attr(op_def, input_arg.type_list_attr))
+            attrs[input_arg.type_list_attr] = attr_value
+            inferred_from[input_arg.type_list_attr] = input_name
+        else:
+          # single Tensor with specified type
+          if base_types[0] != input_arg.type:
+            assert False, "Unreachable"
+
+        if input_arg.is_ref:
+          if not all(x.is_ref_dtype for x in types):
+            raise TypeError(
+                "Input '%s' of '%s' Op requires l-value input" %
+                (input_name, op_type_name))
+          input_types.extend(types)
+        else:
+          input_types.extend(base_types)
+
+      # Process remaining attrs
+      for attr in op_def.attr:
+        # Skip attrs that have already had their values inferred
+        if attr.name in attrs:
+          if attr.name in keywords:
+            raise TypeError(
+                "Should not specify value for inferred attr '%s'." % attr.name)
+          continue
+        if attr.name in keywords:
+          attrs[attr.name] = keywords.pop(attr.name)
+        elif attr.name + "_" in keywords:
+          # Attrs whose names match Python keywords have an extra '_'
+          # appended, so we must check for that as well.
+          attrs[attr.name] = keywords.pop(attr.name + "_")
+        else:
+          raise TypeError("No argument for attr " + attr.name)
+
+      # Convert attr values to AttrValue protos.
+      attr_protos = {}
+      for attr_def in op_def.attr:
+        key = attr_def.name
+        value = attrs[key]
+        attr_value = attr_value_pb2.AttrValue()
+        if attr_def.HasField("default_value") and value is None:
+          attr_value.CopyFrom(attr_def.default_value)
+          attr_protos[key] = attr_value
+          continue
+        if attr_def.type.startswith("list("):
+          if not _IsListValue(value):
+            raise TypeError("Expected list for attr " + key)
+          if attr_def.has_minimum:
+            if len(value) < attr_def.minimum:
+              raise ValueError("Attr '%s' of '%s' Op passed list of length %d "
+                               "less than minimum %d." %
+                               (key, op_type_name, len(value),
+                                attr_def.minimum))
+        if attr_def.type == "string":
+          attr_value.s = _MakeStr(value, key)
+          if attr_def.HasField("allowed_values"):
+            if attr_value.s not in attr_def.allowed_values.list.s:
+              raise ValueError(
+                  "Attr '%s' of '%s' Op passed string '%s' not in: \"%s\"." %
+                  (key, op_type_name, attr_value.s,
+                   '", "'.join(attr_def.allowed_values.list.s)))
+        elif attr_def.type == "list(string)":
+          attr_value.list.s.extend([_MakeStr(x, key) for x in value])
+          if attr_def.HasField("allowed_values"):
+            for x in attr_value.list.s:
+              if x not in attr_def.allowed_values.list.s:
+                raise ValueError(
+                    "Attr '%s' of '%s' Op passed string '%s' not in: \"%s\"." %
+                    (key, op_type_name, x,
+                     '", "'.join(attr_def.allowed_values.list.s)))
+        elif attr_def.type == "int":
+          attr_value.i = _MakeInt(value, key)
+          if attr_def.has_minimum:
+            if attr_value.i < attr_def.minimum:
+              raise ValueError(
+                  "Attr '%s' of '%s' Op passed %d less than minimum %d." %
+                  (key, op_type_name, attr_value.i, attr_def.minimum))
+        elif attr_def.type == "list(int)":
+          attr_value.list.i.extend([_MakeInt(x, key) for x in value])
+        elif attr_def.type == "float":
+          attr_value.f = _MakeFloat(value, key)
+        elif attr_def.type == "list(float)":
+          attr_value.list.f.extend([_MakeFloat(x, key) for x in value])
+        elif attr_def.type == "bool":
+          attr_value.b = _MakeBool(value, key)
+        elif attr_def.type == "list(bool)":
+          attr_value.list.b.extend([_MakeBool(x, key) for x in value])
+        elif attr_def.type == "type":
+          attr_value.type = _MakeType(value, attr_def)
+        elif attr_def.type == "list(type)":
+          attr_value.list.type.extend(
+              [_MakeType(x, attr_def) for x in value])
+        elif attr_def.type == "shape":
+          attr_value.shape.CopyFrom(_MakeShape(value, key))
+        elif attr_def.type == "list(shape)":
+          attr_value.list.shape.extend(
+              [_MakeShape(x, key) for x in value])
+        elif attr_def.type == "tensor":
+          attr_value.tensor.CopyFrom(_MakeTensor(value, key))
+        elif attr_def.type == "list(tensor)":
+          attr_value.list.tensor.extend(
+              [_MakeTensor(x, key) for x in value])
+        else:
+          raise TypeError("Unrecognized Attr type " + attr_def.type)
+
+        attr_protos[key] = attr_value
+      del attrs  # attrs is no longer authoritative, use attr_protos instead
+
+      # Determine output types (possibly using attrs)
+      output_types = []
+      output_structure = []
+      for arg in op_def.output_arg:
+        types = []
+        if arg.number_attr:
+          n = _AttrValue(attr_protos, arg.number_attr).i
+          if arg.type_attr:
+            types = [_AttrValue(attr_protos, arg.type_attr).type] * n
+          else:
+            types = [arg.type] * n
+          output_structure.append(n)
+        elif arg.type_attr:
+          t = _AttrValue(attr_protos, arg.type_attr)
+          types = [t.type]
+          output_structure.append(None)
+        elif arg.type_list_attr:
+          t = _AttrValue(attr_protos, arg.type_list_attr)
+          types = t.list.type
+          output_structure.append(len(t.list.type))
+        else:
+          types = [arg.type]
+          output_structure.append(None)
+        if arg.is_ref:
+          types = [types_lib.as_dtype(x).as_ref for x in types]
+        output_types.extend(types)
+
+      if keywords:
+        raise TypeError("apply_op() got unexpected keyword arguments: " +
+                        ", ".join(sorted(keywords.keys())))
+
+      # Add Op to graph
+      if output_structure:
+        op = g.create_op(op_type_name, inputs, output_types, name=scope,
+                         input_types=input_types, attrs=attr_protos,
+                         op_def=op_def)
+        outputs = op.outputs
+        return _Restructure(ops.convert_n_to_tensor_or_indexed_slices(outputs),
+                            output_structure)
+      else:
+        return g.create_op(op_type_name, inputs, output_types, name=scope,
+                           input_types=input_types, attrs=attr_protos,
+                           op_def=op_def)
diff --git a/tensorflow/python/ops/op_def_library_test.py b/tensorflow/python/ops/op_def_library_test.py
new file mode 100644
index 0000000000..72de4586a3
--- /dev/null
+++ b/tensorflow/python/ops/op_def_library_test.py
@@ -0,0 +1,1402 @@
+"""Tests for tensorflow.python.ops.op_def_library."""
+
+from google.protobuf import text_format
+
+from tensorflow.core.framework import op_def_pb2
+from tensorflow.core.framework import tensor_shape_pb2
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import types
+from tensorflow.python.ops.op_def_library import OpDefLibrary
+from tensorflow.python.platform import googletest
+
+
+# NOTE(mrry): Dummy shape registrations for ops used in the tests.
+ops.RegisterShape("Attr")(None)
+ops.RegisterShape("AttrBool")(None)
+ops.RegisterShape("AttrBoolList")(None)
+ops.RegisterShape("AttrDefault")(None)
+ops.RegisterShape("AttrEmptyListDefault")(None)
+ops.RegisterShape("AttrEnum")(None)
+ops.RegisterShape("AttrEnumList")(None)
+ops.RegisterShape("AttrFloat")(None)
+ops.RegisterShape("AttrListDefault")(None)
+ops.RegisterShape("AttrListMin")(None)
+ops.RegisterShape("AttrMin")(None)
+ops.RegisterShape("AttrShape")(None)
+ops.RegisterShape("AttrShapeList")(None)
+ops.RegisterShape("Binary")(None)
+ops.RegisterShape("ComplexStruct")(None)
+ops.RegisterShape("InPolymorphicTwice")(None)
+ops.RegisterShape("MixedStruct")(None)
+ops.RegisterShape("NInPolymorphicTwice")(None)
+ops.RegisterShape("NInTwice")(None)
+ops.RegisterShape("NInTwoTypeVariables")(None)
+ops.RegisterShape("NIntsIn")(None)
+ops.RegisterShape("NIntsOut")(None)
+ops.RegisterShape("NIntsOutDefault")(None)
+ops.RegisterShape("NPolymorphicIn")(None)
+ops.RegisterShape("NPolymorphicOut")(None)
+ops.RegisterShape("NPolymorphicOutDefault")(None)
+ops.RegisterShape("NPolymorphicRestrictIn")(None)
+ops.RegisterShape("NPolymorphicRestrictOut")(None)
+ops.RegisterShape("OutT")(None)
+ops.RegisterShape("OutTypeList")(None)
+ops.RegisterShape("OutTypeListRestrict")(None)
+ops.RegisterShape("Polymorphic")(None)
+ops.RegisterShape("PolymorphicDefaultOut")(None)
+ops.RegisterShape("PolymorphicOut")(None)
+ops.RegisterShape("RefIn")(None)
+ops.RegisterShape("RefOut")(None)
+ops.RegisterShape("ReservedAttr")(None)
+ops.RegisterShape("ReservedInput")(None)
+ops.RegisterShape("Restrict")(None)
+ops.RegisterShape("Simple")(None)
+ops.RegisterShape("SimpleStruct")(None)
+ops.RegisterShape("TypeList")(None)
+ops.RegisterShape("TypeListRestrict")(None)
+ops.RegisterShape("TypeListTwice")(None)
+
+
+class OpDefLibraryTest(test_util.TensorFlowTestCase):
+
+  def setUp(self):
+    self._lib = OpDefLibrary()
+    self._g = ops.Graph()
+    self._default_graph_controller = self._g.as_default()
+    self._default_graph_controller.__enter__()
+    self._add_op("name: 'Simple' input_arg { name: 'a' type: DT_INT32 } "
+                 "output_arg { name: 'out' type: DT_FLOAT }")
+    self._add_op("name: 'OutT' output_arg { name: 'a' type_attr: 'T' } "
+                 "attr { name: 'T' type: 'type' }")
+
+  def tearDown(self):
+    self._default_graph_controller.__exit__(None, None, None)
+
+  def _add_op(self, ascii):
+    op_def = op_def_pb2.OpDef()
+    text_format.Merge(ascii, op_def)
+    self._lib.add_op(op_def)
+
+  def Tensor(self, t, name="in"):
+    return self._lib.apply_op("OutT", T=t, name=name)
+
+  def testNoRegisteredOpFails(self):
+    with self.assertRaises(RuntimeError) as cm:
+      self._lib.apply_op("unknown", g=self._g)
+    self.assertEqual(cm.exception.message, "Unrecognized Op name unknown")
+
+  def testAddOpValidation(self):
+    with self.assertRaises(TypeError) as cm:
+      self._add_op("name: 'MissingTypeAttr' "
+                   "input_arg { name: 'a' type_attr: 'T' } ")
+    self.assertEqual(cm.exception.message,
+                     "Inconsistent OpDef for 'MissingTypeAttr', "
+                     "missing attr 'T'")
+
+    with self.assertRaises(TypeError) as cm:
+      self._add_op("name: 'BadTypeAttr' "
+                   "output_arg { name: 'a' type_attr: 'T' } "
+                   "attr { name: 'T' type: 'int' }")
+    self.assertEqual(
+        cm.exception.message,
+        "Attr 'T' of 'BadTypeAttr' used as a type_attr but has type int")
+
+    with self.assertRaises(TypeError) as cm:
+      self._add_op("name: 'MissingNumberAttr' "
+                   "input_arg { name: 'a' type: DT_INT32 number_attr: 'N' } ")
+    self.assertEqual(cm.exception.message,
+                     "Inconsistent OpDef for 'MissingNumberAttr', "
+                     "missing attr 'N'")
+
+    with self.assertRaises(TypeError) as cm:
+      self._add_op("name: 'BadNumberAttr' "
+                   "output_arg { name: 'a' type: DT_INT32 number_attr: 'N' } "
+                   "attr { name: 'N' type: 'type' }")
+    self.assertEqual(
+        cm.exception.message,
+        "Attr 'N' of 'BadNumberAttr' used as a number_attr but has type type")
+
+    with self.assertRaises(TypeError) as cm:
+      self._add_op("name: 'TwoTypesA' "
+                   "input_arg { name: 'a' type: DT_INT32 type_attr: 'T' } "
+                   "attr { name: 'T' type: 'type' }")
+    self.assertEqual(cm.exception.message,
+                     "Arg 'a' of 'TwoTypesA' must have one type field not 2")
+
+    with self.assertRaises(TypeError) as cm:
+      self._add_op("name: 'TwoTypesB' "
+                   "input_arg { name: 'a' type: DT_INT32 type_list_attr: 'T' } "
+                   "attr { name: 'T' type: 'list(type)' }")
+    self.assertEqual(cm.exception.message,
+                     "Arg 'a' of 'TwoTypesB' must have one type field not 2")
+
+    with self.assertRaises(TypeError) as cm:
+      self._add_op("name: 'ThreeTypes' "
+                   "input_arg { name: 'a' type: DT_INT32 type_attr: 'T' "
+                   "type_list_attr: 'U' } "
+                   "attr { name: 'T' type: 'type' } "
+                   "attr { name: 'U' type: 'list(type)' }")
+    self.assertEqual(cm.exception.message,
+                     "Arg 'a' of 'ThreeTypes' must have one type field not 3")
+
+    with self.assertRaises(TypeError) as cm:
+      self._add_op("name: 'NoTypes' output_arg { name: 'a' } ")
+    self.assertEqual(cm.exception.message,
+                     "Arg 'a' of 'NoTypes' must have one type field not 0")
+
+  def testSimple(self):
+    out = self._lib.apply_op("Simple", a=3)
+    self.assertEquals(types.float32, out.dtype)
+    self.assertProtoEquals("""
+      name: 'Simple' op: 'Simple' input: 'Simple/a'
+      """, out.op.node_def)
+
+    out = self._lib.apply_op("Simple", a=4)
+    self.assertProtoEquals("""
+      name: 'Simple_1' op: 'Simple' input: 'Simple_1/a'
+      """, out.op.node_def)
+
+    out = self._lib.apply_op("Simple", a=5, name="named")
+    self.assertProtoEquals("""
+      name: 'named' op: 'Simple' input: 'named/a'
+      """, out.op.node_def)
+
+    out = self._lib.apply_op("Simple", a=[[1, 2, 3], [4, 5, 6]], name="two_d")
+    self.assertProtoEquals("""
+      name: 'two_d' op: 'Simple' input: 'two_d/a'
+      """, out.op.node_def)
+
+  def testSimpleFailures(self):
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("Simple", a="Bad string")
+    self.assertEqual(cm.exception.message,
+                     "Expected int32, got 'Bad string' instead.")
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("Simple", a=self.Tensor(types.string))
+    self.assertEqual(cm.exception.message,
+                     "Input 'a' of 'Simple' Op has type string "
+                     "that does not match expected type of int32.")
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("Simple", a=6, extra="bogus")
+    self.assertEqual(cm.exception.message,
+                     "apply_op() got unexpected keyword arguments: extra")
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("Simple", a=6, extra1="bogus", extra2="also_bogus")
+    self.assertEqual(cm.exception.message,
+                     "apply_op() got unexpected keyword arguments: extra1, "
+                     "extra2")
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("Simple")
+    self.assertEqual(cm.exception.message, "No argument for input a")
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("Simple", wrong=7)
+    self.assertEqual(cm.exception.message, "No argument for input a")
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("Simple", a=[self.Tensor(types.int32)])
+    self.assertStartsWith(cm.exception.message, "Expected int32, got")
+
+  def testReservedInput(self):
+    self._add_op("name: 'ReservedInput' "
+                 "input_arg { name: 'input' type: DT_INT32 } ")
+    op = self._lib.apply_op("ReservedInput", input_=7, name="x")
+    self.assertProtoEquals("""
+      name: 'x' op: 'ReservedInput' input: 'x/input'
+      """, op.node_def)
+
+  def testPolymorphic(self):
+    self._add_op("name: 'Polymorphic' "
+                 "input_arg { name: 'a' type_attr: 'T' } "
+                 "output_arg { name: 'out' type_attr: 'T' } "
+                 "attr { name: 'T' type: 'type' }")
+
+    out = self._lib.apply_op("Polymorphic", a=7, name="p")
+    self.assertEquals(types.int32, out.dtype)
+    self.assertProtoEquals("""
+      name: 'p' op: 'Polymorphic' input: 'p/a'
+      attr { key: 'T' value { type: DT_INT32 } }
+      """, out.op.node_def)
+
+    out = self._lib.apply_op("Polymorphic", a="s", name="q")
+    self.assertEquals(types.string, out.dtype)
+    self.assertProtoEquals("""
+      name: 'q' op: 'Polymorphic' input: 'q/a'
+      attr { key: 'T' value { type: DT_STRING } }
+      """, out.op.node_def)
+
+    out = self._lib.apply_op("Polymorphic", a=["s", "t", "u"], name="r")
+    self.assertEquals(types.string, out.dtype)
+    self.assertProtoEquals("""
+      name: 'r' op: 'Polymorphic' input: 'r/a'
+      attr { key: 'T' value { type: DT_STRING } }
+      """, out.op.node_def)
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("Polymorphic", a="s", T=types.string)
+    self.assertEqual(cm.exception.message,
+                     "Should not specify value for inferred attr 'T'.")
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("Polymorphic", a=[self.Tensor(types.bool)])
+    self.assertEqual(cm.exception.message,
+                     "List of Tensors when single Tensor expected")
+
+  def testPolymorphicOut(self):
+    self._add_op("name: 'PolymorphicOut' "
+                 "output_arg { name: 'out' type_attr: 'T' } "
+                 "attr { name: 'T' type: 'type' }")
+
+    out = self._lib.apply_op("PolymorphicOut", T=types.int32, name="p")
+    self.assertEquals(types.int32, out.dtype)
+    self.assertProtoEquals("""
+      name: 'p' op: 'PolymorphicOut'
+      attr { key: 'T' value { type: DT_INT32 } }
+      """, out.op.node_def)
+
+    out = self._lib.apply_op("PolymorphicOut", T=types.bool, name="q")
+    self.assertEquals(types.bool, out.dtype)
+    self.assertProtoEquals("""
+      name: 'q' op: 'PolymorphicOut'
+      attr { key: 'T' value { type: DT_BOOL } }
+      """, out.op.node_def)
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("PolymorphicOut")
+    self.assertEqual(cm.exception.message,
+                     "No argument for attr T")
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("PolymorphicOut", T=None)
+    self.assertEqual(cm.exception.message,
+                     "Expected DataType for argument 'T' not None.")
+
+  def testPolymorphicDefaultOut(self):
+    self._add_op("name: 'PolymorphicDefaultOut' "
+                 "output_arg { name: 'out' type_attr: 'T' } "
+                 "attr { name: 'T' type: 'type' "
+                 "  default_value { type: DT_STRING } }")
+
+    out = self._lib.apply_op("PolymorphicDefaultOut", T=None, name="p")
+    self.assertEquals(types.string, out.dtype)
+    self.assertProtoEquals("""
+      name: 'p' op: 'PolymorphicDefaultOut'
+      attr { key: 'T' value { type: DT_STRING } }
+      """, out.op.node_def)
+
+    out = self._lib.apply_op("PolymorphicDefaultOut", T=types.bool,
+                            name="q")
+    self.assertEquals(types.bool, out.dtype)
+    self.assertProtoEquals("""
+      name: 'q' op: 'PolymorphicDefaultOut'
+      attr { key: 'T' value { type: DT_BOOL } }
+      """, out.op.node_def)
+
+  def testBinary(self):
+    self._add_op("name: 'Binary' "
+                 "input_arg { name: 'a' type_attr: 'T' } "
+                 "input_arg { name: 'b' type_attr: 'T' } "
+                 "output_arg { name: 'out' type_attr: 'T' } "
+                 "attr { name: 'T' type: 'type' }")
+
+    out = self._lib.apply_op("Binary", a=8, b=9, name="b")
+    self.assertEquals(types.int32, out.dtype)
+    self.assertProtoEquals("""
+      name: 'b' op: 'Binary' input: 'b/a' input: 'b/b'
+      attr { key: 'T' value { type: DT_INT32 } }
+      """, out.op.node_def)
+
+    out = self._lib.apply_op("Binary", a="left", b="right", name="c")
+    self.assertEquals(types.string, out.dtype)
+    self.assertProtoEquals("""
+      name: 'c' op: 'Binary' input: 'c/a' input: 'c/b'
+      attr { key: 'T' value { type: DT_STRING } }
+      """, out.op.node_def)
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("Binary", a="left", b=12)
+    self.assertEqual(cm.exception.message,
+                     "Expected string, got 12 instead.")
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("Binary", a=self.Tensor(types.string),
+                        b=self.Tensor(types.int32))
+    self.assertEqual(cm.exception.message,
+                     "Input 'b' of 'Binary' Op has type int32 "
+                     "that does not match type string of argument 'a'.")
+
+  def testRestrict(self):
+    self._add_op("name: 'Restrict' "
+                 "input_arg { name: 'a' type_attr: 'T' } "
+                 "output_arg { name: 'out' type_attr: 'T' } "
+                 "attr { name: 'T' type: 'type' allowed_values { list { "
+                 "  type: DT_STRING type: DT_BOOL } } }")
+
+    out = self._lib.apply_op("Restrict", a="foo", name="g")
+    self.assertEquals(types.string, out.dtype)
+    self.assertProtoEquals("""
+      name: 'g' op: 'Restrict' input: 'g/a'
+      attr { key: 'T' value { type: DT_STRING } }
+      """, out.op.node_def)
+
+    out = self._lib.apply_op("Restrict", a=True, name="h")
+    self.assertEquals(types.bool, out.dtype)
+    self.assertProtoEquals("""
+      name: 'h' op: 'Restrict' input: 'h/a'
+      attr { key: 'T' value { type: DT_BOOL } }
+      """, out.op.node_def)
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("Restrict", a=17)
+    self.assertEqual(cm.exception.message,
+                     "DataType int32 for attr 'T' "
+                     "not in list of allowed values: "
+                     "string, bool")
+
+  def testTypeList(self):
+    self._add_op("name: 'TypeList' "
+                 "input_arg { name: 'a' type_list_attr: 'T' } "
+                 "attr { name: 'T' type: 'list(type)' }")
+
+    op = self._lib.apply_op("TypeList", a=["foo"], name="z")
+    self.assertProtoEquals("""
+      name: 'z' op: 'TypeList' input: 'z/a_0'
+      attr { key: 'T' value { list { type: DT_STRING } } }
+      """, op.node_def)
+
+    op = self._lib.apply_op("TypeList", a=[True, 12], name="y")
+    self.assertProtoEquals("""
+      name: 'y' op: 'TypeList' input: 'y/a_0' input: 'y/a_1'
+      attr { key: 'T' value { list { type: DT_BOOL type: DT_INT32 } } }
+      """, op.node_def)
+
+    op = self._lib.apply_op("TypeList", a=[], name="empty")
+    self.assertProtoEquals("""
+      name: 'empty' op: 'TypeList' attr { key: 'T' value { list { } } }
+      """, op.node_def)
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("TypeList", a=17)
+    self.assertStartsWith(cm.exception.message,
+                          "Expected list for 'a' "
+                          "argument to 'TypeList' Op, not ")
+
+  def testTypeListTwice(self):
+    self._add_op("name: 'TypeListTwice' "
+                 "input_arg { name: 'a' type_list_attr: 'T' } "
+                 "input_arg { name: 'b' type_list_attr: 'T' } "
+                 "attr { name: 'T' type: 'list(type)' }")
+
+    op = self._lib.apply_op("TypeListTwice", a=["foo", True], b=["bar", False],
+                           name="z")
+    self.assertProtoEquals("""
+      name: 'z' op: 'TypeListTwice'
+      input: 'z/a_0' input: 'z/a_1' input: 'z/b_0' input: 'z/b_1'
+      attr { key: 'T' value { list { type: DT_STRING type: DT_BOOL } } }
+      """, op.node_def)
+
+    op = self._lib.apply_op("TypeListTwice", a=[], b=[], name="empty")
+    self.assertProtoEquals("""
+      name: 'empty' op: 'TypeListTwice' attr { key: 'T' value { list { } } }
+      """, op.node_def)
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("TypeListTwice", a=["foo", True], b=["bar", 6])
+    self.assertEqual(cm.exception.message,
+                     "Input 'b' of 'TypeListTwice' Op has type list of "
+                     "string, int32 that does not match type list "
+                     "string, bool of argument 'a'.")
+
+  def testOutTypeList(self):
+    self._add_op("name: 'OutTypeList' "
+                 "output_arg { name: 'out' type_list_attr: 'T' } "
+                 "attr { name: 'T' type: 'list(type)' }")
+
+    out, = self._lib.apply_op("OutTypeList", T=[types.float32], name="x")
+    self.assertEquals(types.float32, out.dtype)
+    self.assertProtoEquals("""
+      name: 'x' op: 'OutTypeList'
+      attr { key: 'T' value { list { type: DT_FLOAT } } }
+      """, out.op.node_def)
+
+    out1, out2 = self._lib.apply_op("OutTypeList",
+                                   T=[types.int32, types.bool],
+                                   name="w")
+    self.assertEquals(types.int32, out1.dtype)
+    self.assertEquals(types.bool, out2.dtype)
+    self.assertProtoEquals("""
+      name: 'w' op: 'OutTypeList'
+      attr { key: 'T' value { list { type: DT_INT32 type: DT_BOOL } } }
+      """, out1.op.node_def)
+
+    out = self._lib.apply_op("OutTypeList", T=[], name="empty")
+    self.assertEqual([], out)
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("OutTypeList", T=types.int32)
+    self.assertEqual(cm.exception.message, "Expected list for attr T")
+
+  def testTypeListRestrict(self):
+    self._add_op("name: 'TypeListRestrict' "
+                 "input_arg { name: 'a' type_list_attr: 'T' } "
+                 "attr { name: 'T' type: 'list(type)' allowed_values { list { "
+                 "  type: DT_STRING type: DT_BOOL } } }")
+
+    op = self._lib.apply_op("TypeListRestrict", a=["foo", False], name="v")
+    self.assertProtoEquals("""
+      name: 'v' op: 'TypeListRestrict' input: 'v/a_0' input: 'v/a_1'
+      attr { key: 'T' value { list { type: DT_STRING type: DT_BOOL } } }
+      """, op.node_def)
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("TypeListRestrict", a=[True, 12])
+    self.assertEqual(cm.exception.message,
+                     "DataType int32 for attr 'T' "
+                     "not in list of allowed values: string, bool")
+
+  def testOutTypeListRestrict(self):
+    self._add_op("name: 'OutTypeListRestrict' "
+                 "output_arg { name: 'out' type_list_attr: 't' } "
+                 "attr { name: 't' type: 'list(type)' allowed_values { list { "
+                 "  type: DT_STRING type: DT_BOOL } } }")
+
+    out1, out2 = self._lib.apply_op("OutTypeListRestrict",
+                                   t=[types.bool, types.string],
+                                   name="u")
+    self.assertEquals(types.bool, out1.dtype)
+    self.assertEquals(types.string, out2.dtype)
+    self.assertProtoEquals("""
+      name: 'u' op: 'OutTypeListRestrict'
+      attr { key: 't' value { list { type: DT_BOOL type: DT_STRING } } }
+      """, out1.op.node_def)
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("OutTypeListRestrict",
+                        t=[types.string, types.int32])
+    self.assertEqual(cm.exception.message,
+                     "DataType int32 for attr 't' "
+                     "not in list of allowed values: string, bool")
+
+  def testAttr(self):
+    self._add_op("name: 'Attr' attr { name: 'a' type: 'int' }")
+    op = self._lib.apply_op("Attr", a=12, name="t")
+    self.assertProtoEquals("""
+      name: 't' op: 'Attr' attr { key: 'a' value { i: 12 } }
+      """, op.node_def)
+
+    op = self._lib.apply_op("Attr", a=tensor_shape.Dimension(13), name="u")
+    self.assertProtoEquals("""
+      name: 'u' op: 'Attr' attr { key: 'a' value { i: 13 } }
+      """, op.node_def)
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("Attr", a="bad")
+    self.assertEqual(cm.exception.message,
+                     "Expected int for argument 'a' not 'bad'.")
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("Attr", a=[12])
+    self.assertEqual(cm.exception.message,
+                     "Expected int for argument 'a' not [12].")
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("Attr", a=None)
+    self.assertEqual(cm.exception.message,
+                     "Expected int for argument 'a' not None.")
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("Attr")
+    self.assertEqual(cm.exception.message, "No argument for attr a")
+
+  def testAttrFloat(self):
+    self._add_op("name: 'AttrFloat' attr { name: 'a' type: 'float' }")
+
+    op = self._lib.apply_op("AttrFloat", a=1.2, name="t")
+    self.assertProtoEquals("""
+      name: 't' op: 'AttrFloat' attr { key: 'a' value { f: 1.2 } }
+      """, op.node_def)
+
+    op = self._lib.apply_op("AttrFloat", a=12, name="u")
+    self.assertProtoEquals("""
+      name: 'u' op: 'AttrFloat' attr { key: 'a' value { f: 12 } }
+      """, op.node_def)
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("AttrFloat", a="bad")
+    self.assertEqual(cm.exception.message,
+                     "Expected float for argument 'a' not 'bad'.")
+
+  def testAttrBool(self):
+    self._add_op("name: 'AttrBool' attr { name: 'a' type: 'bool' }")
+
+    op = self._lib.apply_op("AttrBool", a=True, name="t")
+    self.assertProtoEquals("""
+      name: 't' op: 'AttrBool' attr { key: 'a' value { b: true } }
+      """, op.node_def)
+
+    op = self._lib.apply_op("AttrBool", a=False, name="u")
+    self.assertProtoEquals("""
+      name: 'u' op: 'AttrBool' attr { key: 'a' value { b: false } }
+      """, op.node_def)
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("AttrBool", a=0)
+    self.assertEqual(cm.exception.message,
+                     "Expected bool for argument 'a' not 0.")
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("AttrBool", a=1)
+    self.assertEqual(cm.exception.message,
+                     "Expected bool for argument 'a' not 1.")
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("AttrBool", a=[])
+    self.assertEqual(cm.exception.message,
+                     "Expected bool for argument 'a' not [].")
+
+  def testAttrBoolList(self):
+    self._add_op("name: 'AttrBoolList' attr { name: 'a' type: 'list(bool)' }")
+
+    op = self._lib.apply_op("AttrBoolList", a=[True, False, True], name="t")
+    self.assertProtoEquals("""
+      name: 't' op: 'AttrBoolList'
+      attr { key: 'a' value { list { b: true b: false b:true } } }
+      """, op.node_def)
+
+    op = self._lib.apply_op("AttrBoolList", a=[], name="u")
+    self.assertProtoEquals("""
+      name: 'u' op: 'AttrBoolList' attr { key: 'a' value { list { } } }
+      """, op.node_def)
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("AttrBoolList", a=[0])
+    self.assertEqual(cm.exception.message,
+                     "Expected bool for argument 'a' not 0.")
+
+  def testAttrMin(self):
+    self._add_op("name: 'AttrMin' attr { name: 'a' type: 'int' "
+                 "has_minimum: true minimum: 5 }")
+    op = self._lib.apply_op("AttrMin", a=12, name="s")
+    self.assertProtoEquals("""
+      name: 's' op: 'AttrMin' attr { key: 'a' value { i: 12 } }
+      """, op.node_def)
+
+    with self.assertRaises(ValueError) as cm:
+      self._lib.apply_op("AttrMin", a=2)
+    self.assertEqual(cm.exception.message,
+                     "Attr 'a' of 'AttrMin' Op passed 2 less than minimum 5.")
+
+  def testAttrListMin(self):
+    self._add_op("name: 'AttrListMin' attr { name: 'a' type: 'list(int)' "
+                 "has_minimum: true minimum: 2 }")
+
+    op = self._lib.apply_op("AttrListMin", a=[1, 2], name="r")
+    self.assertProtoEquals("""
+      name: 'r' op: 'AttrListMin'
+      attr { key: 'a' value { list { i: 1 i: 2 } } }
+      """, op.node_def)
+
+    with self.assertRaises(ValueError) as cm:
+      self._lib.apply_op("AttrListMin", a=[17])
+    self.assertEqual(cm.exception.message,
+                     "Attr 'a' of 'AttrListMin' Op "
+                     "passed list of length 1 less than minimum 2.")
+
+  def testAttrEnum(self):
+    self._add_op("name: 'AttrEnum' "
+                 "attr { name: 'a' type: 'string' "
+                 "  allowed_values { list { s: 'apples' s: 'oranges' } } }")
+
+    op = self._lib.apply_op("AttrEnum", a="oranges", name="e")
+    self.assertProtoEquals("""
+      name: 'e' op: 'AttrEnum' attr { key: 'a' value { s: 'oranges' } }
+      """, op.node_def)
+
+    with self.assertRaises(ValueError) as cm:
+      self._lib.apply_op("AttrEnum", a="invalid")
+    self.assertEqual(cm.exception.message,
+                     'Attr \'a\' of \'AttrEnum\' Op '
+                     'passed string \'invalid\' not in: '
+                     '"apples", "oranges".')
+
+  def testAttrEnumList(self):
+    self._add_op("name: 'AttrEnumList' "
+                 "attr { name: 'a' type: 'list(string)' "
+                 "  allowed_values { list { s: 'apples' s: 'oranges' } } }")
+
+    op = self._lib.apply_op("AttrEnumList", a=["oranges", "apples"], name="f")
+    self.assertProtoEquals("""
+      name: 'f' op: 'AttrEnumList'
+      attr { key: 'a' value { list { s: 'oranges' s: 'apples' } } }
+      """, op.node_def)
+
+    with self.assertRaises(ValueError) as cm:
+      self._lib.apply_op("AttrEnumList", a=["apples", "invalid", "oranges"])
+    self.assertEqual(cm.exception.message,
+                     'Attr \'a\' of \'AttrEnumList\' Op '
+                     'passed string \'invalid\' not '
+                     'in: "apples", "oranges".')
+
+  def testAttrShape(self):
+    self._add_op("name: 'AttrShape' attr { name: 'a' type: 'shape' }")
+
+    op = self._lib.apply_op("AttrShape", a=[5], name="s1")
+    self.assertProtoEquals("""
+      name: 's1' op: 'AttrShape'
+      attr { key: 'a' value { shape { dim { size: 5 } } } }
+      """, op.node_def)
+
+    op = self._lib.apply_op("AttrShape", a=(4, 3, 2), name="s2")
+    self.assertProtoEquals("""
+      name: 's2' op: 'AttrShape'
+      attr { key: 'a' value {
+        shape { dim { size: 4 } dim { size: 3 } dim { size: 2 } } } }
+      """, op.node_def)
+
+    op = self._lib.apply_op(
+        "AttrShape", a=tensor_shape.TensorShape([3, 2]), name="s3")
+    self.assertProtoEquals("""
+      name: 's3' op: 'AttrShape'
+      attr { key: 'a' value {
+        shape { dim { size: 3 } dim { size: 2 } } } }
+      """, op.node_def)
+
+    op = self._lib.apply_op("AttrShape", a=[], name="s4")
+    self.assertProtoEquals("""
+      name: 's4' op: 'AttrShape' attr { key: 'a' value { shape { } } }
+      """, op.node_def)
+
+    shape = tensor_shape_pb2.TensorShapeProto()
+    shape.dim.add().size = 6
+    shape.dim.add().size = 3
+    op = self._lib.apply_op("AttrShape", a=shape, name="s5")
+    self.assertProtoEquals("""
+      name: 's5' op: 'AttrShape'
+      attr { key: 'a' value { shape { dim { size: 6 } dim { size: 3 } } } }
+      """, op.node_def)
+
+    # TODO(josh11b): Re-enable this test once we stop promoting scalars to shapes.
+    # with self.assertRaises(TypeError) as cm:
+    #   self._lib.apply_op("AttrShape", a=5)
+    # self.assertEqual(cm.exception.message,
+    #                  "Don't know how to convert 5 to a TensorShapeProto for "
+    #                  "argument 'a'")
+
+    with self.assertRaises(ValueError) as cm:
+      self._lib.apply_op("AttrShape", a="ABC")
+
+  def testAttrShapeList(self):
+    self._add_op("name: 'AttrShapeList' attr { name: 'a' type: 'list(shape)' }")
+
+    op = self._lib.apply_op("AttrShapeList", a=[[3, 2], [6, 5, 4]], name="sl")
+    self.assertProtoEquals("""
+      name: 'sl' op: 'AttrShapeList'
+      attr { key: 'a' value { list {
+        shape { dim { size: 3 } dim { size: 2 } }
+        shape { dim { size: 6 } dim { size: 5 } dim { size: 4 } } } } }
+      """, op.node_def)
+
+    op = self._lib.apply_op("AttrShapeList", a=[], name="esl")
+    self.assertProtoEquals("""
+      name: 'esl' op: 'AttrShapeList' attr { key: 'a' value { list { } } }
+      """, op.node_def)
+
+  def testAttrDefault(self):
+    self._add_op("name: 'AttrDefault' "
+                 "attr { name: 'a' type: 'string' "
+                 "  default_value { s: 'banana' } }")
+
+    op = self._lib.apply_op("AttrDefault", a=None, name="d")
+    self.assertProtoEquals("""
+      name: 'd' op: 'AttrDefault' attr { key: 'a' value { s: 'banana' } }
+      """, op.node_def)
+
+    op = self._lib.apply_op("AttrDefault", a="kiwi", name="c")
+    self.assertProtoEquals("""
+      name: 'c' op: 'AttrDefault' attr { key: 'a' value { s: 'kiwi' } }
+      """, op.node_def)
+
+  def testAttrListDefault(self):
+    self._add_op("name: 'AttrListDefault' "
+                 "attr { name: 'a' type: 'list(int)' "
+                 "  default_value { list { i: 5 i: 15 } } }")
+
+    op = self._lib.apply_op("AttrListDefault", a=None, name="b")
+    self.assertProtoEquals("""
+      name: 'b' op: 'AttrListDefault'
+      attr { key: 'a' value { list { i: 5 i: 15 } } }
+      """, op.node_def)
+
+    op = self._lib.apply_op("AttrListDefault", a=[3], name="a")
+    self.assertProtoEquals("""
+      name: 'a' op: 'AttrListDefault'
+      attr { key: 'a' value { list { i: 3 } } }
+      """, op.node_def)
+
+    op = self._lib.apply_op("AttrListDefault", a=[], name="empty")
+    self.assertProtoEquals("""
+      name: 'empty' op: 'AttrListDefault'
+      attr { key: 'a' value { list { } } }
+      """, op.node_def)
+
+  def testAttrEmptyListDefault(self):
+    self._add_op("name: 'AttrEmptyListDefault' "
+                 "attr { name: 'a' type: 'list(float)' "
+                 "       default_value { list { } } }")
+
+    op = self._lib.apply_op("AttrEmptyListDefault", a=None, name="b")
+    self.assertProtoEquals("""
+      name: 'b' op: 'AttrEmptyListDefault'
+      attr { key: 'a' value { list { } } }
+      """, op.node_def)
+
+    op = self._lib.apply_op("AttrEmptyListDefault", a=[3], name="a")
+    self.assertProtoEquals("""
+      name: 'a' op: 'AttrEmptyListDefault'
+      attr { key: 'a' value { list { f: 3 } } }
+      """, op.node_def)
+
+    op = self._lib.apply_op("AttrEmptyListDefault", a=[], name="empty")
+    self.assertProtoEquals("""
+      name: 'empty' op: 'AttrEmptyListDefault'
+      attr { key: 'a' value { list { } } }
+      """, op.node_def)
+
+  def testReservedAttr(self):
+    self._add_op("name: 'ReservedAttr' "
+                 "attr { name: 'range' type: 'int' } ")
+    op = self._lib.apply_op("ReservedAttr", range_=7, name="x")
+    self.assertProtoEquals("""
+      name: 'x' op: 'ReservedAttr' attr { key: 'range' value { i: 7 } }
+      """, op.node_def)
+
+  def testNIntsIn(self):
+    self._add_op("name: 'NIntsIn' "
+                 "input_arg { name: 'a' type: DT_INT32 number_attr: 'N' } "
+                 "attr { name: 'N' type: 'int' has_minimum: true minimum: 2 }")
+
+    op = self._lib.apply_op("NIntsIn", a=[1, 2], name="n")
+    self.assertProtoEquals("""
+      name: 'n' op: 'NIntsIn' input: 'n/a_0' input: 'n/a_1'
+      attr { key: 'N' value { i: 2 } }
+      """, op.node_def)
+
+    op = self._lib.apply_op("NIntsIn", a=[5, 4, 3, 2, 1], name="o")
+    self.assertProtoEquals("""
+      name: 'o' op: 'NIntsIn'
+      input: 'o/a_0' input: 'o/a_1' input: 'o/a_2' input: 'o/a_3' input: 'o/a_4'
+      attr { key: 'N' value { i: 5 } }
+      """, op.node_def)
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("NIntsIn", a=["foo", "bar"])
+    self.assertEqual(cm.exception.message,
+                     "Tensors in list passed to 'a' of 'NIntsIn' Op have types "
+                     "[string, string] that do not match expected type int32.")
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("NIntsIn", a=[self.Tensor(types.string),
+                                      self.Tensor(types.string)])
+    self.assertEqual(cm.exception.message,
+                     "Tensors in list passed to 'a' of 'NIntsIn' Op have "
+                     "types [string, string] that do not match expected type "
+                     "int32.")
+
+    with self.assertRaises(ValueError) as cm:
+      self._lib.apply_op("NIntsIn", a=[99])
+    self.assertEqual(cm.exception.message,
+                     "List argument 'a' to 'NIntsIn' Op "
+                     "with length 1 shorter than "
+                     "minimum length 2.")
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("NIntsIn", a=[38, "bar"])
+    self.assertEqual(cm.exception.message,
+                     "Tensors in list passed to 'a' of 'NIntsIn' Op have types "
+                     "[int32, string] that do not match expected type int32.")
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("NIntsIn", a=[self.Tensor(types.int32),
+                                      self.Tensor(types.string)])
+    self.assertEqual(cm.exception.message,
+                     "Tensors in list passed to 'a' of 'NIntsIn' Op "
+                     "have types [int32, string] that do not match expected "
+                     "type int32.")
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("NIntsIn", a=17)
+    self.assertStartsWith(cm.exception.message,
+                          "Expected list for 'a' argument "
+                          "to 'NIntsIn' Op, not ")
+
+  def testNPolymorphicIn(self):
+    self._add_op("name: 'NPolymorphicIn' "
+                 "input_arg { name: 'a' type_attr: 'T' number_attr: 'N' } "
+                 "attr { name: 'T' type: 'type' } "
+                 "attr { name: 'N' type: 'int' has_minimum: true minimum: 2 }")
+
+    op = self._lib.apply_op("NPolymorphicIn", a=[1, 2], name="n")
+    self.assertProtoEquals("""
+      name: 'n' op: 'NPolymorphicIn' input: 'n/a_0' input: 'n/a_1'
+      attr { key: 'T' value { type: DT_INT32 } }
+      attr { key: 'N' value { i: 2 } }
+      """, op.node_def)
+
+    op = self._lib.apply_op("NPolymorphicIn", a=[5, 4, 3, 2, 1], name="o")
+    self.assertProtoEquals("""
+      name: 'o' op: 'NPolymorphicIn'
+      input: 'o/a_0' input: 'o/a_1' input: 'o/a_2' input: 'o/a_3' input: 'o/a_4'
+      attr { key: 'T' value { type: DT_INT32 } }
+      attr { key: 'N' value { i: 5 } }
+      """, op.node_def)
+
+    op = self._lib.apply_op("NPolymorphicIn", a=["foo", "bar"], name="p")
+    self.assertProtoEquals("""
+      name: 'p' op: 'NPolymorphicIn' input: 'p/a_0' input: 'p/a_1'
+      attr { key: 'T' value { type: DT_STRING } }
+      attr { key: 'N' value { i: 2 } }
+      """, op.node_def)
+
+    op = self._lib.apply_op("NPolymorphicIn",
+                           a=[1, self.Tensor(types.float32, name="x")],
+                           name="q")
+    self.assertProtoEquals("""
+      name: 'q' op: 'NPolymorphicIn' input: 'q/a_0' input: 'x'
+      attr { key: 'T' value { type: DT_FLOAT } }
+      attr { key: 'N' value { i: 2 } }
+      """, op.node_def)
+
+    with self.assertRaises(ValueError) as cm:
+      self._lib.apply_op("NPolymorphicIn", a=[99])
+    self.assertEqual(cm.exception.message,
+                     "List argument 'a' to 'NPolymorphicIn' Op with length 1 "
+                     "shorter than minimum length 2.")
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("NPolymorphicIn", a=[38, "bar"])
+    self.assertEqual(cm.exception.message,
+                     "All tensors passed to 'a' of 'NPolymorphicIn' "
+                     "Op must have the same type.")
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("NPolymorphicIn",
+                        a=[38, self.Tensor(types.string)])
+    self.assertEqual(cm.exception.message,
+                     "Tensors in list passed to 'a' of 'NPolymorphicIn' Op "
+                     "have types [int32, string] that don't all match.")
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("NPolymorphicIn",
+                        a=["abcd", self.Tensor(types.int32)])
+    self.assertEqual(cm.exception.message,
+                     "Tensors in list passed to 'a' of 'NPolymorphicIn' Op "
+                     "have types [string, int32] that don't all match.")
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("NPolymorphicIn", a=17)
+    self.assertStartsWith(cm.exception.message,
+                          "Expected list for 'a' argument "
+                          "to 'NPolymorphicIn' Op, not ")
+
+  def testNPolymorphicRestrictIn(self):
+    self._add_op("name: 'NPolymorphicRestrictIn' "
+                 "input_arg { name: 'a' type_attr: 'T' number_attr: 'N' } "
+                 "attr { name: 'T' type: 'type' allowed_values { "
+                 "  list { type: DT_STRING type: DT_BOOL } } } "
+                 "attr { name: 'N' type: 'int' has_minimum: true minimum: 2 }")
+
+    op = self._lib.apply_op("NPolymorphicRestrictIn", a=["foo", "bar"],
+                            name="p")
+    self.assertProtoEquals("""
+      name: 'p' op: 'NPolymorphicRestrictIn' input: 'p/a_0' input: 'p/a_1'
+      attr { key: 'T' value { type: DT_STRING } }
+      attr { key: 'N' value { i: 2 } }
+      """, op.node_def)
+
+    op = self._lib.apply_op("NPolymorphicRestrictIn", a=[False, True, False],
+                           name="b")
+    self.assertProtoEquals("""
+      name: 'b' op: 'NPolymorphicRestrictIn'
+      input: 'b/a_0' input: 'b/a_1' input: 'b/a_2'
+      attr { key: 'T' value { type: DT_BOOL } }
+      attr { key: 'N' value { i: 3 } }
+      """, op.node_def)
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("NPolymorphicRestrictIn", a=[1, 2])
+    self.assertEqual(cm.exception.message,
+                     "DataType int32 for attr 'T' "
+                     "not in list of allowed values: string, bool")
+
+  def testNInTwice(self):
+    self._add_op("name: 'NInTwice' "
+                 "input_arg { name: 'a' type: DT_INT32 number_attr: 'N' } "
+                 "input_arg { name: 'b' type: DT_STRING number_attr: 'N' } "
+                 "attr { name: 'N' type: 'int' has_minimum: true minimum: 0 }")
+
+    op = self._lib.apply_op("NInTwice", a=[1, 2], b=["one", "two"], name="n")
+    self.assertProtoEquals("""
+      name: 'n' op: 'NInTwice'
+      input: 'n/a_0' input: 'n/a_1' input: 'n/b_0' input: 'n/b_1'
+      attr { key: 'N' value { i: 2 } }
+      """, op.node_def)
+
+    op = self._lib.apply_op("NInTwice", a=[], b=[], name="o")
+    self.assertProtoEquals("""
+      name: 'o' op: 'NInTwice' attr { key: 'N' value { i: 0 } }
+      """, op.node_def)
+
+    with self.assertRaises(ValueError) as cm:
+      self._lib.apply_op("NInTwice", a=[1, 2, 3], b=["too short"])
+    self.assertEqual(cm.exception.message,
+                     "List argument 'b' to 'NInTwice' Op "
+                     "with length 1 must match "
+                     "length 3 of argument 'a'.")
+
+  def testNInPolymorphicTwice(self):
+    self._add_op("name: 'NInPolymorphicTwice' "
+                 "input_arg { name: 'a' type_attr: 'T' number_attr: 'N' } "
+                 "input_arg { name: 'b' type_attr: 'T' number_attr: 'N' } "
+                 "attr { name: 'T' type: 'type' } "
+                 "attr { name: 'N' type: 'int' has_minimum: true minimum: 0 }")
+
+    op = self._lib.apply_op("NInPolymorphicTwice", a=[1, 2], b=[3, 4], name="n")
+    self.assertProtoEquals("""
+      name: 'n' op: 'NInPolymorphicTwice'
+      input: 'n/a_0' input: 'n/a_1' input: 'n/b_0' input: 'n/b_1'
+      attr { key: 'T' value { type: DT_INT32 } }
+      attr { key: 'N' value { i: 2 } }
+      """, op.node_def)
+
+    with self.assertRaises(ValueError) as cm:
+      self._lib.apply_op("NInPolymorphicTwice", a=[1, 2, 3], b=[5])
+    self.assertEqual(cm.exception.message,
+                     "List argument 'b' to 'NInPolymorphicTwice' Op "
+                     "with length 1 "
+                     "must match length 3 of argument 'a'.")
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("NInPolymorphicTwice", a=[1, 2], b=["one", "two"])
+    self.assertEqual(cm.exception.message,
+                     "Tensors in list passed to 'b' of 'NInPolymorphicTwice' "
+                     "Op have types [string, string] that do not match type "
+                     "int32 inferred from earlier arguments.")
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("NInPolymorphicTwice",
+                        a=[self.Tensor(types.int32)],
+                        b=[self.Tensor(types.string)])
+    self.assertEqual(cm.exception.message,
+                     "Tensors in list passed to 'b' of "
+                     "'NInPolymorphicTwice' Op have types [string] that do not "
+                     "match type int32 inferred from earlier arguments.")
+
+  def testNInTwoTypeVariables(self):
+    self._add_op("name: 'NInTwoTypeVariables' "
+                 "input_arg { name: 'a' type_attr: 'S' number_attr: 'N' } "
+                 "input_arg { name: 'b' type_attr: 'T' number_attr: 'N' } "
+                 "attr { name: 'S' type: 'type' } "
+                 "attr { name: 'T' type: 'type' } "
+                 "attr { name: 'N' type: 'int' has_minimum: true minimum: 0 }")
+
+    op = self._lib.apply_op("NInTwoTypeVariables", a=[1, 2], b=[True, False],
+                           name="n")
+    self.assertProtoEquals("""
+      name: 'n' op: 'NInTwoTypeVariables'
+      input: 'n/a_0' input: 'n/a_1' input: 'n/b_0' input: 'n/b_1'
+      attr { key: 'S' value { type: DT_INT32 } }
+      attr { key: 'T' value { type: DT_BOOL } }
+      attr { key: 'N' value { i: 2 } }
+      """, op.node_def)
+
+    op = self._lib.apply_op("NInTwoTypeVariables", a=[1, 2], b=[3, 4], name="o")
+    self.assertProtoEquals("""
+      name: 'o' op: 'NInTwoTypeVariables'
+      input: 'o/a_0' input: 'o/a_1' input: 'o/b_0' input: 'o/b_1'
+      attr { key: 'S' value { type: DT_INT32 } }
+      attr { key: 'T' value { type: DT_INT32 } }
+      attr { key: 'N' value { i: 2 } }
+      """, op.node_def)
+
+    op = self._lib.apply_op("NInTwoTypeVariables",
+                           a=[self.Tensor(types.int32, name="q")],
+                           b=[self.Tensor(types.string, name="r")],
+                           name="p")
+    self.assertProtoEquals("""
+      name: 'p' op: 'NInTwoTypeVariables' input: 'q' input: 'r'
+      attr { key: 'S' value { type: DT_INT32 } }
+      attr { key: 'T' value { type: DT_STRING } }
+      attr { key: 'N' value { i: 1 } }
+      """, op.node_def)
+
+    with self.assertRaises(ValueError) as cm:
+      self._lib.apply_op("NInTwoTypeVariables", a=[1, 2, 3], b=["5"])
+    self.assertEqual(cm.exception.message,
+                     "List argument 'b' to 'NInTwoTypeVariables' Op "
+                     "with length 1 "
+                     "must match length 3 of argument 'a'.")
+
+  def testInPolymorphicTwice(self):
+    self._add_op("name: 'InPolymorphicTwice' "
+                 "input_arg { name: 'a' type_attr: 'T' number_attr: 'N' } "
+                 "input_arg { name: 'b' type_attr: 'T' number_attr: 'M' } "
+                 "attr { name: 'T' type: 'type' } "
+                 "attr { name: 'N' type: 'int' has_minimum: true minimum: 0 } "
+                 "attr { name: 'M' type: 'int' has_minimum: true minimum: 0 } ")
+
+    op = self._lib.apply_op("InPolymorphicTwice", a=[8], b=[3, 4, 5], name="n")
+    self.assertProtoEquals("""
+      name: 'n' op: 'InPolymorphicTwice'
+      input: 'n/a_0' input: 'n/b_0' input: 'n/b_1' input: 'n/b_2'
+      attr { key: 'T' value { type: DT_INT32 } }
+      attr { key: 'N' value { i: 1 } }
+      attr { key: 'M' value { i: 3 } }
+      """, op.node_def)
+
+    op = self._lib.apply_op("InPolymorphicTwice", a=[8], b=[], name="o")
+    self.assertProtoEquals("""
+      name: 'o' op: 'InPolymorphicTwice' input: 'o/a_0'
+      attr { key: 'T' value { type: DT_INT32 } }
+      attr { key: 'N' value { i: 1 } }
+      attr { key: 'M' value { i: 0 } }
+      """, op.node_def)
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("InPolymorphicTwice", a=[], b=[3, 4, 5])
+    self.assertEqual(cm.exception.message,
+                     "Don't know how to infer type variable from empty input "
+                     "list passed to input 'a' of 'InPolymorphicTwice' Op.")
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("InPolymorphicTwice", a=[1, 2], b=["one", "two"])
+    self.assertEqual(cm.exception.message,
+                     "Tensors in list passed to 'b' of 'InPolymorphicTwice' Op "
+                     "have types [string, string] that do not match type int32 "
+                     "inferred from earlier arguments.")
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("InPolymorphicTwice",
+                        a=[self.Tensor(types.int32)],
+                        b=[self.Tensor(types.string)])
+    self.assertEqual(cm.exception.message,
+                     "Tensors in list passed to 'b' of 'InPolymorphicTwice' "
+                     "Op have types [string] that do not match type int32 "
+                     "inferred from earlier arguments.")
+
+  def testNIntsOut(self):
+    self._add_op("name: 'NIntsOut' "
+                 "output_arg { name: 'a' type: DT_INT32 number_attr: 'N' } "
+                 "attr { name: 'N' type: 'int' has_minimum: true minimum: 2 }")
+
+    out1, out2 = self._lib.apply_op("NIntsOut", N=2, name="n")
+    self.assertEquals(types.int32, out1.dtype)
+    self.assertEquals(types.int32, out2.dtype)
+    self.assertProtoEquals("""
+      name: 'n' op: 'NIntsOut' attr { key: 'N' value { i: 2 } }
+      """, out1.op.node_def)
+
+    out1, out2, out3, out4, out5 = self._lib.apply_op(
+        "NIntsOut", N=5, name="o")
+    self.assertEquals(types.int32, out1.dtype)
+    self.assertEquals(types.int32, out2.dtype)
+    self.assertEquals(types.int32, out3.dtype)
+    self.assertEquals(types.int32, out4.dtype)
+    self.assertEquals(types.int32, out5.dtype)
+    self.assertProtoEquals("""
+      name: 'o' op: 'NIntsOut' attr { key: 'N' value { i: 5 } }
+      """, out5.op.node_def)
+
+    with self.assertRaises(ValueError) as cm:
+      self._lib.apply_op("NIntsOut", N=1)
+    self.assertEqual(cm.exception.message,
+                     "Attr 'N' of 'NIntsOut' Op passed 1 less than minimum 2.")
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("NIntsOut", N=[3])
+    self.assertEqual(cm.exception.message,
+                     "Expected int for argument 'N' not [3].")
+
+  def testNIntsOutDefault(self):
+    self._add_op("name: 'NIntsOutDefault' "
+                 "output_arg { name: 'a' type: DT_INT32 number_attr: 'N' } "
+                 "attr { name: 'N' type: 'int' has_minimum: true minimum: 2"
+                 "  default_value { i:3 } }")
+
+    out1, out2, out3 = self._lib.apply_op(
+        "NIntsOutDefault", N=None, name="z")
+    self.assertEquals(types.int32, out1.dtype)
+    self.assertEquals(types.int32, out2.dtype)
+    self.assertEquals(types.int32, out3.dtype)
+    self.assertProtoEquals("""
+      name: 'z' op: 'NIntsOutDefault' attr { key: 'N' value { i: 3 } }
+      """, out1.op.node_def)
+
+    out1, out2 = self._lib.apply_op("NIntsOutDefault", N=2, name="y")
+    self.assertEquals(types.int32, out1.dtype)
+    self.assertEquals(types.int32, out2.dtype)
+    self.assertProtoEquals("""
+      name: 'y' op: 'NIntsOutDefault' attr { key: 'N' value { i: 2 } }
+      """, out2.op.node_def)
+
+  def testNPolymorphicOut(self):
+    self._add_op("name: 'NPolymorphicOut' "
+                 "output_arg { name: 'a' type_attr: 'T' number_attr: 'N' } "
+                 "attr { name: 'T' type: 'type' } "
+                 "attr { name: 'N' type: 'int' has_minimum: true minimum: 2 }")
+
+    out1, out2 = self._lib.apply_op("NPolymorphicOut", N=2,
+                                   T=types.int32, name="n")
+    self.assertEquals(types.int32, out1.dtype)
+    self.assertEquals(types.int32, out2.dtype)
+    self.assertProtoEquals("""
+      name: 'n' op: 'NPolymorphicOut'
+      attr { key: 'T' value { type: DT_INT32 } }
+      attr { key: 'N' value { i: 2 } }
+      """, out1.op.node_def)
+
+    out1, out2, out3 = self._lib.apply_op(
+        "NPolymorphicOut", T=types.string, N=3, name="o")
+    self.assertEquals(types.string, out1.dtype)
+    self.assertEquals(types.string, out2.dtype)
+    self.assertEquals(types.string, out3.dtype)
+    self.assertProtoEquals("""
+      name: 'o' op: 'NPolymorphicOut'
+      attr { key: 'T' value { type: DT_STRING } }
+      attr { key: 'N' value { i: 3 } }
+      """, out3.op.node_def)
+
+    with self.assertRaises(ValueError) as cm:
+      self._lib.apply_op("NPolymorphicOut", N=1, T=types.string)
+    self.assertEqual(cm.exception.message,
+                     "Attr 'N' of 'NPolymorphicOut' Op "
+                     "passed 1 less than minimum 2.")
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("NPolymorphicOut", N=3, T=[types.string])
+    self.assertEqual(
+        cm.exception.message,
+        "Expected DataType for argument 'T' not [tf.string].")
+
+  def testNPolymorphicOutDefault(self):
+    self._add_op("name: 'NPolymorphicOutDefault' "
+                 "output_arg { name: 'a' type_attr: 'T' number_attr: 'N' } "
+                 "attr { name: 'T' type: 'type'"
+                 "  default_value { type: DT_BOOL } } "
+                 "attr { name: 'N' type: 'int' has_minimum: true minimum: 2 "
+                 "  default_value { i: 2 } }")
+
+    out1, out2 = self._lib.apply_op(
+        "NPolymorphicOutDefault", N=None, T=None, name="r")
+    self.assertEquals(types.bool, out1.dtype)
+    self.assertEquals(types.bool, out2.dtype)
+    self.assertProtoEquals("""
+      name: 'r' op: 'NPolymorphicOutDefault'
+      attr { key: 'T' value { type: DT_BOOL } }
+      attr { key: 'N' value { i: 2 } }
+      """, out1.op.node_def)
+
+    out1, out2, out3 = self._lib.apply_op(
+        "NPolymorphicOutDefault", N=3, T=None, name="s")
+    self.assertEquals(types.bool, out1.dtype)
+    self.assertEquals(types.bool, out2.dtype)
+    self.assertEquals(types.bool, out3.dtype)
+    self.assertProtoEquals("""
+      name: 's' op: 'NPolymorphicOutDefault'
+      attr { key: 'T' value { type: DT_BOOL } }
+      attr { key: 'N' value { i: 3 } }
+      """, out1.op.node_def)
+
+    out1, out2 = self._lib.apply_op(
+        "NPolymorphicOutDefault", N=None, T=types.int32, name="t")
+    self.assertEquals(types.int32, out1.dtype)
+    self.assertEquals(types.int32, out2.dtype)
+    self.assertProtoEquals("""
+      name: 't' op: 'NPolymorphicOutDefault'
+      attr { key: 'T' value { type: DT_INT32 } }
+      attr { key: 'N' value { i: 2 } }
+      """, out1.op.node_def)
+
+    out1, out2, out3 = self._lib.apply_op(
+        "NPolymorphicOutDefault", N=3, T=types.int32, name="u")
+    self.assertEquals(types.int32, out1.dtype)
+    self.assertEquals(types.int32, out2.dtype)
+    self.assertEquals(types.int32, out3.dtype)
+    self.assertProtoEquals("""
+      name: 'u' op: 'NPolymorphicOutDefault'
+      attr { key: 'T' value { type: DT_INT32 } }
+      attr { key: 'N' value { i: 3 } }
+      """, out1.op.node_def)
+
+  def testNPolymorphicRestrictOut(self):
+    self._add_op("name: 'NPolymorphicRestrictOut' "
+                 "output_arg { name: 'a' type_attr: 'T' number_attr: 'N' } "
+                 "attr { name: 'T' type: 'type' allowed_values { "
+                 "  list { type: DT_STRING type: DT_BOOL } } } "
+                 "attr { name: 'N' type: 'int' has_minimum: true minimum: 2 }")
+
+    out1, out2, out3 = self._lib.apply_op(
+        "NPolymorphicRestrictOut", N=3, T=types.bool, name="u")
+    self.assertEquals(types.bool, out1.dtype)
+    self.assertEquals(types.bool, out2.dtype)
+    self.assertEquals(types.bool, out3.dtype)
+    self.assertProtoEquals("""
+      name: 'u' op: 'NPolymorphicRestrictOut'
+      attr { key: 'T' value { type: DT_BOOL } }
+      attr { key: 'N' value { i: 3 } }
+      """, out1.op.node_def)
+
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("NPolymorphicRestrictOut", N=2, T=types.int32)
+    self.assertEqual(cm.exception.message,
+                     "DataType int32 for attr 'T' "
+                     "not in list of allowed values: string, bool")
+
+  def testRef(self):
+    self._add_op("name: 'RefIn' "
+                 "input_arg { name: 'a' type_attr: 'T' is_ref: true } "
+                 "attr { name: 'T' type: 'type' } ")
+    self._add_op("name: 'RefOut' "
+                 "output_arg { name: 'a' type_attr: 'T' is_ref: true } "
+                 "attr { name: 'T' type: 'type' } ")
+
+    out = self._lib.apply_op("RefOut", T=types.bool, name="o")
+    self.assertEquals(types.bool_ref, out.dtype)
+    self.assertProtoEquals("""
+      name: 'o' op: 'RefOut'
+      attr { key: 'T' value { type: DT_BOOL } }
+      """, out.op.node_def)
+
+    op = self._lib.apply_op("RefIn", a=out, name="i")
+    self.assertProtoEquals("""
+      name: 'i' op: 'RefIn' input: 'o'
+      attr { key: 'T' value { type: DT_BOOL } }
+      """, op.node_def)
+
+    # Can pass ref to non-ref input.
+    out = self._lib.apply_op("RefOut", T=types.int32, name="r")
+    out = self._lib.apply_op("Simple", a=out, name="s")
+    self.assertProtoEquals("""
+      name: 's' op: 'Simple' input: 'r'
+      """, out.op.node_def)
+
+    # Can't pass non-ref to ref input.
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op("RefIn", a=2)
+    self.assertEqual(cm.exception.message,
+                     "Input 'a' of 'RefIn' Op requires l-value input")
+
+  def testSpecifyDevice(self):
+    with self._g.device("ADevice"):
+      self._lib.apply_op("Simple", a=3)
+    # We look at the whole graph here to make sure the Const op is also given
+    # the specified device.
+    graph_def = self._g.as_graph_def()
+    self.assertEqual(len(graph_def.node), 2)
+    for node in graph_def.node:
+      self.assertEqual(node.device, "ADevice")
+
+  def testStructuredOutputSingleList(self):
+    self._add_op("name: 'SimpleStruct' "
+                 "output_arg { name: 'a' type: DT_INT32 number_attr: 'n_a' } "
+                 "attr { name: 'n_a' type: 'int' }")
+    for n_a in [0, 1, 3]:
+      a = self._lib.apply_op("SimpleStruct", n_a=n_a)
+      self.assertTrue(isinstance(a, list))
+      self.assertEqual(n_a, len(a))
+
+  def testStructuredOutputListAndSingle(self):
+    self._add_op("name: 'MixedStruct' "
+                 "output_arg { name: 'a' type: DT_INT32 number_attr: 'n_a' } "
+                 "output_arg { name: 'b' type: DT_FLOAT } "
+                 "attr { name: 'n_a' type: 'int' }")
+    for n_a in [0, 1, 3]:
+      a, b = self._lib.apply_op("MixedStruct", n_a=n_a)
+      self.assertTrue(isinstance(a, list))
+      self.assertEqual(n_a, len(a))
+      self.assertTrue(all(x.dtype == types.int32 for x in a))
+      self.assertTrue(isinstance(b, ops.Tensor))
+      self.assertEqual(types.float32, b.dtype)
+
+  def testStructuredOutputMultipleLists(self):
+    self._add_op("name: 'ComplexStruct' "
+                 "output_arg { name: 'a' type: DT_INT32 number_attr: 'n_a' } "
+                 "output_arg { name: 'b' type: DT_INT64 number_attr: 'n_b' } "
+                 "output_arg { name: 'c' type_list_attr: 't_c' } "
+                 "attr { name: 'n_a' type: 'int' } "
+                 "attr { name: 'n_b' type: 'int' } "
+                 "attr { name: 't_c' type: 'list(type)' }")
+    for n_a in [0, 1, 3]:
+      for n_b in [0, 1, 3]:
+        for t_c in [[],
+                    [types.int32],
+                    [types.int32, types.float32]]:
+          a, b, c = self._lib.apply_op("ComplexStruct",
+                                      n_a=n_a, n_b=n_b, t_c=t_c)
+
+          self.assertEqual(n_a, len(a))
+          self.assertTrue(all(x.dtype == types.int32 for x in a))
+          self.assertEqual(n_b, len(b))
+          self.assertTrue(all(x.dtype == types.int64 for x in b))
+          self.assertEqual(t_c, [x.dtype for x in c])
+
+
+class OpDefLibraryGraphTest(test_util.TensorFlowTestCase):
+
+  def setUp(self):
+    self._lib = OpDefLibrary()
+    self._g = ops.Graph()
+    self._add_op("name: 'Simple' input_arg { name: 'a' type: DT_INT32 } "
+                 "output_arg { name: 'out' type: DT_FLOAT }")
+    self._add_op("name: 'Binary' "
+                 "input_arg { name: 'a' type_attr: 'T' } "
+                 "input_arg { name: 'b' type_attr: 'T' } "
+                 "output_arg { name: 'out' type_attr: 'T' } "
+                 "attr { name: 'T' type: 'type' }")
+
+  def _add_op(self, ascii):
+    op_def = op_def_pb2.OpDef()
+    text_format.Merge(ascii, op_def)
+    self._lib.add_op(op_def)
+
+  def testNoGraph(self):
+    out = self._lib.apply_op("Simple", a=3)
+    self.assertEquals(out.graph, ops.get_default_graph())
+
+  def testDefaultGraph(self):
+    with self._g.as_default():
+      out = self._lib.apply_op("Simple", a=3)
+      self.assertEquals(out.graph, self._g)
+
+  def testIgnoreDefaultGraphWithGraphArgument(self):
+    default_g = ops.Graph()
+    with default_g.as_default():
+      out = self._lib.apply_op("Simple", a=3, g=self._g)
+      self.assertEquals(ops.get_default_graph(), default_g)
+      self.assertEquals(out.graph, self._g)
+
+  def testDifferentGraphFails(self):
+    a = self._lib.apply_op("Simple", a=3, g=self._g)
+    other_g = ops.Graph()
+    b = self._lib.apply_op("Simple", a=4, g=other_g)
+    with self.assertRaises(ValueError) as cm:
+      self._lib.apply_op("Binary", a=a, b=b)
+    self.assertTrue("must be from the same graph" in cm.exception.message)
+
+  def testDifferentGraphFailsWithGraphArgument(self):
+    other_g = ops.Graph()
+    a = self._lib.apply_op("Simple", a=3, g=other_g)
+    b = self._lib.apply_op("Simple", a=4, g=other_g)
+    with self.assertRaises(ValueError) as cm:
+      self._lib.apply_op("Binary", a=a, b=b, g=self._g)
+    self.assertTrue(
+        "not from the passed-in graph" in cm.exception.message)
+
+
+if __name__ == "__main__":
+  googletest.main()
diff --git a/tensorflow/python/ops/parsing_ops.py b/tensorflow/python/ops/parsing_ops.py
new file mode 100644
index 0000000000..dc954a3776
--- /dev/null
+++ b/tensorflow/python/ops/parsing_ops.py
@@ -0,0 +1,390 @@
+"""Parsing Ops."""
+
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import common_shapes
+from tensorflow.python.ops import constant_op
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import gen_parsing_ops
+from tensorflow.python.ops import logging_ops
+from tensorflow.python.ops import math_ops
+# pylint: disable=wildcard-import,undefined-variable
+from tensorflow.python.ops.gen_parsing_ops import *
+
+
+ops.NoGradient("DecodeRaw")
+ops.NoGradient("StringToNumber")
+
+
+# pylint: disable=protected-access
+def parse_example(serialized,
+                  names=None,
+                  sparse_keys=None,
+                  sparse_types=None,
+                  dense_keys=None,
+                  dense_types=None,
+                  dense_defaults=None,
+                  dense_shapes=None,
+                  name="ParseExample"):
+  """Parse Example protos.
+
+  Args:
+    serialized: string vector, a batch of binary serialized Example protos.
+    names: A string vector, the names of the serialized protos.
+      "names" may contain, e.g., table key (descriptive) names for the
+      corresponding serialized protos.  These are purely useful for debugging
+      purposes, and the presence of values here has no effect on the output.
+      "names" may be an empty vector, if no names are available.
+      If non-empty, this vector must be the same length as "serialized".
+    sparse_keys: A string list of keys in the Examples' features.
+      These keys are associated with sparse values.
+    sparse_types: A list of DTypes.
+      This list's length must match that of sparse_keys.  Currently
+      parse_example supports tf.float32 (FloatList), tf.int64 (Int64List),
+      and tf.string (BytesList).
+    dense_keys: A string list of keys in the Examples' features.
+      These keys are associated with dense values.
+    dense_types: A list of DTypes.
+      This list's length must match that of dense_keys.  Currently
+      parse_example supports tf.float32 (FloatList), tf.int64 (Int64List),
+      and tf.string (BytesList).
+    dense_defaults: A dict of {key:Tensor} (some may be missing).
+      The keys of the dict must match the dense_keys of the feature.
+      If a key is not present in this dictionary, the corresponding dense
+      Feature is required in all elements of serialized.
+    dense_shapes: A list of tuples.
+      Entries provide the shape of data in each dense Feature in features.
+      The length of dense_shapes must be the same as the length of dense_keys.
+      The number of elements in the Feature corresponding to dense_key[j]
+      must always have np.prod(dense_shapes[j]) entries.
+      If dense_shapes[j] == (D0, D1, ..., DN) then the the shape of output
+      Tensor dense_values[j] will be (|serialized|, D0, D1, ..., DN):
+      The dense outputs are just the inputs row-stacked by batch.
+    name: (Optional) Name of Op in the graph.
+
+  Returns:
+    A dictionary mapping keys to Tensors and SparseTensors.
+
+    The key dense_keys[j] is mapped to a tensor of type dense_types[j] and
+    of shape (serialized.size(),) + dense_shapes[j] (i.e., the dense outputs are
+    inputs, reshaped in row-major format and then row-stacked by batch).
+
+    The key sparse_keys[j] is mapped to a SparseTensor of type sparse_types[j].
+    The SparseTensor represents a ragged matrix.  Its indices are [batch, index]
+    where "batch" is is the batch entry the value is from, and "index" is the
+    value's index in the list of values associated with that feature
+    and example.  For example, if one expects a tf.float32 sparse feature "ft"
+    and three serialized examples are provided:
+
+    serialized = [
+      features:
+        { feature: [ key: { "ft" value: float_list: { value: [1.0, 2.0] } } ] },
+      features:
+        { feature: [] },
+      features:
+        { feature: [ key: { "ft" value: float_list: { value: [3.0] } } ] }
+    ]
+
+    then the output will look like:
+
+      {"ft": SparseTensor(indices=[[0, 0], [0, 1], [2, 0]],
+                          values=[1.0, 2.0, 3.0],
+                          shape=(3, 2)) }
+
+  Raises:
+    ValueError: If sparse and dense keys intersect, or input lengths do not
+      match up for sparse_* (similarly for dense_*).
+    TypeError: If an input is malformed.
+
+  Example input, format, and output: Just Sparse Inputs
+  ================================================
+
+  Given two brain.Example input protos:
+
+  serialized:  // serialized versions of the protos below
+    [features: {
+      feature: { key: "kw" value: { bytes_list: { value: [ "knit", "big" ] } } }
+      feature: { key: "gps" value: { float_list: { value: [] } } }
+     },
+     features: {
+      feature: { key: "kw" value: { bytes_list: { value: [ "emmy" ] } } }
+      feature: { key: "dank" value: { int64_list: { value: [ 42 ] } } }
+      feature: { key: "gps" value: { } }
+    }]
+  names: ["input0", "input1"],
+  sparse_keys: ["kw", "dank", "gps"]
+  sparse_types: [DT_STRING, DT_INT64, DT_FLOAT]
+
+  Then the expected output is a dictionary:
+  {
+    "kw": SparseTensor(
+        indices=[[0, 0], [0, 1], [1, 0]],
+        values=["knit", "big", "emmy"]
+        shape=[2, 2]),
+    "dank": SparseTensor(
+        indices=[[1, 0]],
+        values=[42],
+        shape=[2, 1]),
+    "gps": SparseTensor(
+        indices=[],
+        values=[],
+        shape=[2, 0]),
+  }
+
+
+  Example input, format, and output: Dense Inputs (without defaults)
+  ==================================================================
+
+  Given two brain.Example input protos:
+
+  serialized:  // serialized versions of the protos below
+    [features: {
+      feature: { key: "age" value: { int64_list: { value: [ 0 ] } } }
+      feature: { key: "gender" value: { bytes_list: { value: [ "f" ] } } }
+     },
+     features: {
+      feature: { key: "age" value: { int64_list: { value: [] } } }
+      feature: { key: "gender" value: { bytes_list: { value: [ "f" ] } } }
+    }]
+  names: ["input0", "input1"],
+  dense_keys: np.array(["age", "gender"])
+  dense_types: [tf.int64, tf.string]
+  dense_defaults: {
+    "age": -1  # defaults to -1 if missing
+               # "gender" has no specified default so it's required
+  }
+  dense_shapes: [(1,), (1,)]  # age, gender, label, weight
+
+  Then the expected output is a dictionary:
+  {
+    "age": [[0], [-1]],
+    "gender": [["f"], ["f"]],
+  }
+
+
+  Example input, format, and output: Dense Inputs (with defaults)
+  ===============================================================
+
+  Given two brain.Example input protos:
+
+  serialized:  // serialized versions of the protos below
+    [features: {
+      feature: { key: "weight" value: { float_list: { value: [ 1.0 ] } } }
+     },
+     features: {
+      feature: { key: "label" value: { float_list: { value: [ -1.0, 0.0 ] } } }
+    }]
+  names: ["input0", "input1"],
+  dense_keys: np.array(["label", "weight"])
+  dense_defaults: {
+    "label": [1.0, 2.0],  # float (default: vector)
+    "weight": 5.0         # float (default: scalar, 5.0)
+  }
+  dense_shapes: [(2,), (1,)]  # age, gender, label, weight
+
+  Then the expected output is a dictionary:
+  {
+    "label": [[1.0, 2.0], [-1.0, 0.0]],
+    "weight": [[1.0], [5.0]],
+  }
+  """
+  names = [] if names is None else names
+  dense_defaults = {} if dense_defaults is None else dense_defaults
+  sparse_keys = [] if sparse_keys is None else sparse_keys
+  sparse_types = [] if sparse_types is None else sparse_types
+  dense_keys = [] if dense_keys is None else dense_keys
+  dense_types = [] if dense_types is None else dense_types
+  dense_shapes = [
+      []] * len(dense_keys) if dense_shapes is None else dense_shapes
+
+  num_dense = len(dense_keys)
+  num_sparse = len(sparse_keys)
+
+  if len(dense_shapes) != num_dense:
+    raise ValueError("len(dense_shapes) != len(dense_keys): %d vs. %d"
+                     % (len(dense_shapes), num_dense))
+  if len(dense_types) != num_dense:
+    raise ValueError("len(dense_types) != len(num_dense): %d vs. %d"
+                     % (len(dense_types), num_dense))
+  if len(sparse_types) != num_sparse:
+    raise ValueError("len(sparse_types) != len(sparse_keys): %d vs. %d"
+                     % (len(sparse_types), num_sparse))
+  if num_dense + num_sparse == 0:
+    raise ValueError("Must provide at least one sparse key or dense key")
+  if not set(dense_keys).isdisjoint(set(sparse_keys)):
+    raise ValueError(
+        "Dense and sparse keys must not intersect; intersection: %s" %
+        set(dense_keys).intersection(set(sparse_keys)))
+
+  dense_defaults_vec = []
+  for i, key in enumerate(dense_keys):
+    default_value = dense_defaults.get(key)
+    if default_value is None:
+      default_value = constant_op.constant([], dtype=dense_types[i])
+    elif not isinstance(default_value, ops.Tensor):
+      default_value = ops.convert_to_tensor(
+          default_value, dtype=dense_types[i], name=key)
+      default_value = array_ops.reshape(default_value, dense_shapes[i])
+
+    dense_defaults_vec.append(default_value)
+
+  dense_shapes = [tensor_util.MakeTensorShapeProto(shape)
+                  if isinstance(shape, (list, tuple)) else shape
+                  for shape in dense_shapes]
+
+  outputs = gen_parsing_ops._parse_example(
+      serialized=serialized,
+      names=names,
+      dense_defaults=dense_defaults_vec,
+      sparse_keys=sparse_keys,
+      sparse_types=sparse_types,
+      dense_keys=dense_keys,
+      dense_shapes=dense_shapes,
+      name=name)
+
+  (sparse_indices, sparse_values, sparse_shapes, dense_values) = outputs
+
+  sparse_tensors = [ops.SparseTensor(ix, val, shape) for (ix, val, shape)
+                    in zip(sparse_indices, sparse_values, sparse_shapes)]
+
+  return dict(
+      zip(sparse_keys + dense_keys, sparse_tensors + dense_values))
+
+
+def parse_single_example(serialized,  # pylint: disable=invalid-name
+                         names=None,
+                         sparse_keys=None,
+                         sparse_types=None,
+                         dense_keys=None,
+                         dense_types=None,
+                         dense_defaults=None,
+                         dense_shapes=None,
+                         name="ParseSingleExample"):
+  """Identical to parse_example but for scalar serialized and names.
+
+  Args:
+    serialized: A scalar string, a single serialized Example.
+      See parse_example documentation for more details.
+    names: (Optional) A scalar string, the associated name.
+      See parse_example documentation for more details.
+    sparse_keys: See parse_example documentation for more details.
+    sparse_types: See parse_example documentation for more details.
+    dense_keys: See parse_example documentation for more details.
+    dense_types: See parse_example documentation for more details.
+    dense_defaults: See parse_example documentation for more details.
+    dense_shapes: See parse_example documentation for more details.
+    name: Optional op name.
+
+  Returns:
+    A dictionary mapping keys to Tensors and SparseTensors.
+
+    For dense tensors, the Tensor is identical to the output of parse_example,
+    except it is one less dimension (the first, batch, dimension is removed).
+
+    For SparseTensors:
+      The first (batch) column of the indices matrix is removed
+        (it is now a column vector).
+      The values vector is unchanged.
+      The first (batch_size) entry of the shape vector is removed
+        (it is now a single element vector).
+
+  Raises:
+    ValueError: if "scalar" or "names" have known shapes, and are not scalars.
+  """
+  with ops.op_scope([serialized], name, "parse_single_example"):
+    serialized = ops.convert_to_tensor(serialized)
+    serialized_shape = serialized.get_shape()
+    if serialized_shape.ndims is not None:
+      if serialized_shape.ndims != 0:
+        raise ValueError("Input serialized must be a scalar")
+    else:
+      serialized = control_flow_ops.with_dependencies(
+          [logging_ops.Assert(
+              math_ops.equal(array_ops.rank(serialized), 0),
+              ["Input serialized must be a scalar"],
+              name="SerializedIsScalar")],
+          serialized,
+          name="SerializedDependencies")
+    serialized = array_ops.expand_dims(serialized, 0)
+    if names is not None:
+      names = ops.convert_to_tensor(names)
+      names_shape = names.get_shape()
+      if names_shape.ndims is not None:
+        if names_shape.ndims != 0:
+          raise ValueError("Input names must be a scalar")
+      else:
+        names = control_flow_ops.with_dependencies(
+            [logging_ops.Assert(
+                math_ops.equal(array_ops.rank(names), 0),
+                ["Input names must be a scalar"],
+                name="NamesIsScalar")],
+            names,
+            name="NamesDependencies")
+      names = array_ops.expand_dims(names, 0)
+
+    outputs = parse_example(serialized,
+                            names=names,
+                            sparse_keys=sparse_keys,
+                            sparse_types=sparse_types,
+                            dense_keys=dense_keys,
+                            dense_types=dense_types,
+                            dense_defaults=dense_defaults,
+                            dense_shapes=dense_shapes,
+                            name=name)
+    if dense_keys is not None:
+      for d in dense_keys:
+        outputs[d] = array_ops.squeeze(outputs[d], [0], name="Squeeze_%s" % d)
+    if sparse_keys is not None:
+      for s in sparse_keys:
+        outputs[s] = ops.SparseTensor(
+            array_ops.slice(outputs[s].indices,
+                            [0, 1], [-1, -1], name="Slice_Indices_%s" % s),
+            outputs[s].values,
+            array_ops.slice(outputs[s].shape,
+                            [1], [-1], name="Squeeze_Shape_%s" % s))
+    return outputs
+
+
+@ops.RegisterShape("ParseExample")
+def _ParseExampleShape(op):
+  """Shape function for the ParseExample op."""
+  input_shape = op.inputs[0].get_shape().with_rank(1)
+  num_sparse = op.get_attr("Nsparse")
+  num_dense = op.get_attr("Ndense")
+  dense_shapes = op.get_attr("dense_shapes")
+  sparse_index_shapes = [
+      tensor_shape.matrix(None, 2) for _ in range(num_sparse)]
+  sparse_value_shapes = [tensor_shape.vector(None) for _ in range(num_sparse)]
+  sparse_shape_shapes = [tensor_shape.vector(2) for _ in range(num_sparse)]
+  assert num_dense == len(dense_shapes)
+  dense_shapes = [
+      input_shape.concatenate((d.size for d in dense_shape.dim))
+      for dense_shape in dense_shapes]
+  return (sparse_index_shapes + sparse_value_shapes + sparse_shape_shapes +
+          dense_shapes)
+
+
+ops.RegisterShape("StringToNumber")(
+    common_shapes.unchanged_shape)
+
+
+@ops.RegisterShape("DecodeRaw")
+def _DecodeRawShape(op):
+  """Shape function for the DecodeRaw op."""
+  # NOTE(mrry): Last dimension is data-dependent.
+  return [op.inputs[0].get_shape().concatenate([None])]
+
+
+@ops.RegisterShape("DecodeCSV")
+def _DecodeCSVShape(op):
+  """Shape function for the DecodeCSV op."""
+  input_shape = op.inputs[0].get_shape()
+  # Optionally check that all of other inputs are scalar or empty.
+  for default_input in op.inputs[1:]:
+    default_input_shape = default_input.get_shape().with_rank(1)
+    if default_input_shape[0] > 1:
+      raise ValueError(
+          "Shape of a default must be a length-0 or length-1 vector.")
+  return [input_shape] * len(op.outputs)
diff --git a/tensorflow/python/ops/random_ops.py b/tensorflow/python/ops/random_ops.py
new file mode 100644
index 0000000000..6bd8dd9e3d
--- /dev/null
+++ b/tensorflow/python/ops/random_ops.py
@@ -0,0 +1,181 @@
+"""Operations for generating random numbers."""
+
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.framework import types
+from tensorflow.python.framework import random_seed
+from tensorflow.python.ops import common_shapes
+from tensorflow.python.ops import gen_random_ops
+from tensorflow.python.ops import math_ops
+# pylint: disable=wildcard-import
+from tensorflow.python.ops.gen_random_ops import *
+# pylint: enable=wildcard-import
+
+
+def _ShapeTensor(shape):
+  """Convert to an int32 or int64 tensor, defaulting to int32 if empty."""
+  if isinstance(shape, (tuple, list)) and not shape:
+    dtype = types.int32
+  else:
+    dtype = None
+  return ops.convert_to_tensor(shape, dtype=dtype, name="shape")
+
+# pylint: disable=protected-access
+def random_normal(shape, mean=0.0, stddev=1.0, dtype=types.float32,
+                  seed=None, name=None):
+  """Outputs random values from a normal distribution.
+
+  Args:
+    shape: A 1-D integer Tensor or Python array. The shape of the output tensor.
+    mean: A 0-D Tensor or Python value of type `dtype`. The mean of the normal
+      distribution.
+    stddev: A 0-D Tensor or Python value of type `dtype`. The standard deviation
+      of the normal distribution.
+    dtype: The type of the output.
+    seed: A Python integer. Used to create a random seed for the distribution.
+      See [`set_random_seed`](constant_op.md#set_random_seed) for behavior.
+    name: A name for the operation (optional).
+
+  Returns:
+    A tensor of the specified shape filled with random normal values.
+  """
+  with ops.op_scope([shape, mean, stddev], name, "random_normal") as name:
+    shape_tensor = _ShapeTensor(shape)
+    mean_tensor = ops.convert_to_tensor(
+        mean, dtype=dtype, name="mean")
+    stddev_tensor = ops.convert_to_tensor(
+        stddev, dtype=dtype, name="stddev")
+    seed1, seed2 = random_seed.get_seed(seed)
+    rnd = gen_random_ops._random_standard_normal(shape_tensor, dtype,
+                                                 seed=seed1,
+                                                 seed2=seed2)
+    mul = rnd * stddev_tensor
+    value = math_ops.add(mul, mean_tensor, name=name)
+    return value
+
+
+ops.NoGradient("RandomStandardNormal")
+
+
+def truncated_normal(shape, mean=0.0, stddev=1.0, dtype=types.float32,
+                     seed=None, name=None):
+  """Outputs random values from a truncated normal distribution.
+
+  The generated values follow a normal distribution with specified mean and
+  standard deviation, except that values whose magnitude is more than 2 standard
+  deviations from the mean are dropped and re-picked.
+
+  Args:
+    shape: A 1-D integer Tensor or Python array. The shape of the output tensor.
+    mean: A 0-D Tensor or Python value of type `dtype`. The mean of the
+      truncated normal distribution.
+    stddev: A 0-D Tensor or Python value of type `dtype`. The standard deviation
+      of the truncated normal distribution.
+    dtype: The type of the output.
+    seed: A Python integer. Used to create a random seed for the distribution.
+      See [`set_random_seed`](constant_op.md#set_random_seed) for behavior.
+    name: A name for the operation (optional).
+
+  Returns:
+    A tensor of the specified shape filled with random truncated normal values.
+  """
+  with ops.op_scope([shape, mean, stddev], name, "truncated_normal") as name:
+    shape_tensor = _ShapeTensor(shape)
+    mean_tensor = ops.convert_to_tensor(
+        mean, dtype=dtype, name="mean")
+    stddev_tensor = ops.convert_to_tensor(
+        stddev, dtype=dtype, name="stddev")
+    seed1, seed2 = random_seed.get_seed(seed)
+    rnd = gen_random_ops._truncated_normal(shape_tensor, dtype,
+                                           seed=seed1,
+                                           seed2=seed2)
+    mul = rnd * stddev_tensor
+    value = math_ops.add(mul, mean_tensor, name=name)
+    return value
+
+
+ops.NoGradient("TruncatedNormal")
+
+
+def random_uniform(shape, minval=0.0, maxval=1.0,
+                   dtype=types.float32, seed=None,
+                   name=None):
+  """Outputs random values from a uniform distribution.
+
+  The generated values follow a uniform distribution in the range
+  `[minval, maxval)`. The lower bound `minval` is included in the range, while
+  the upper bound `maxval` is excluded.
+
+  Args:
+    shape: A 1-D integer Tensor or Python array. The shape of the output tensor.
+    minval: A 0-D Tensor or Python value of type `dtype`. The lower bound on the
+      range of random values to generate.
+    maxval: A 0-D Tensor or Python value of type `dtype`. The upper bound on
+      the range of random values to generate.
+    dtype: The type of the output.
+    seed: A Python integer. Used to create a random seed for the distribution.
+      See [`set_random_seed`](constant_op.md#set_random_seed) for behavior.
+    name: A name for the operation (optional).
+
+  Returns:
+    A tensor of the specified shape filled with random uniform values.
+  """
+  with ops.op_scope([shape, minval, maxval], name, "random_uniform") as name:
+    shape_tensor = _ShapeTensor(shape)
+    min_tensor = ops.convert_to_tensor(minval, dtype=dtype, name="min")
+    range_tensor = ops.convert_to_tensor(
+        maxval - minval, dtype=dtype, name="range")
+    seed1, seed2 = random_seed.get_seed(seed)
+    rnd = gen_random_ops._random_uniform(shape_tensor, dtype,
+                                         seed=seed1,
+                                         seed2=seed2)
+    mul = rnd * range_tensor
+    value = math_ops.add(mul, min_tensor, name=name)
+    return value
+
+
+def random_shuffle(value, seed=None, name=None):
+  """Randomly shuffles a tensor along its first dimension.
+
+  The tensor is shuffled along dimension 0, such that each `value[j]` is mapped
+  to one and only one `output[i]`. For example, a mapping that might occur for a
+  3x2 tensor is:
+
+  ```python
+  [[1, 2],       [[5, 6],
+   [3, 4],  ==>   [1, 2],
+   [5, 6]]        [3, 4]]
+  ```
+
+  Args:
+    value: A Tensor to be shuffled.
+    seed: A Python integer. Used to create a random seed for the distribution.
+      See [`set_random_seed`](constant_op.md#set_random_seed) for behavior.
+    name: A name for the operation (optional).
+
+  Returns:
+    A tensor of same shape and type as `value`, shuffled along its first
+    dimension.
+  """
+  seed1, seed2 = random_seed.get_seed(seed)
+  return gen_random_ops._random_shuffle(value, seed=seed1, seed2=seed2,
+                                        name=name)
+
+
+ops.NoGradient("RandomUniform")
+
+
+@ops.RegisterShape("TruncatedNormal")
+@ops.RegisterShape("RandomStandardNormal")
+@ops.RegisterShape("RandomUniform")
+def _RandomShape(op):
+  shape_val = tensor_util.ConstantValue(op.inputs[0])
+  if shape_val is not None:
+    return [tensor_shape.TensorShape(shape_val.tolist())]
+  else:
+    shape_shape = op.inputs[0].get_shape().with_rank_at_most(1)
+    return [tensor_shape.unknown_shape(ndims=shape_shape.num_elements())]
+
+
+ops.RegisterShape("RandomShuffle")(common_shapes.unchanged_shape)
diff --git a/tensorflow/python/ops/sparse_grad.py b/tensorflow/python/ops/sparse_grad.py
new file mode 100644
index 0000000000..3685b671b7
--- /dev/null
+++ b/tensorflow/python/ops/sparse_grad.py
@@ -0,0 +1,12 @@
+"""Gradients for operators defined in sparse_ops.py."""
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import sparse_ops
+
+
+ops.NoGradient("SparseToDense")
+
+
+ops.NoGradient("SparseConcat")
+
+
+ops.NoGradient("SparseReorder")
diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py
new file mode 100644
index 0000000000..c0dca6156d
--- /dev/null
+++ b/tensorflow/python/ops/sparse_ops.py
@@ -0,0 +1,458 @@
+"""## Sparse Tensor Representation.
+
+Tensorflow supports a `SparseTensor` representation for data that is sparse
+in multiple dimensions. Contrast this representation with `IndexedSlices`,
+which is efficient for representing tensors that are sparse in their first
+dimension, and dense along all other dimensions.
+
+@@SparseTensor
+@@SparseTensorValue
+
+## Sparse to Dense Conversion.
+
+@@sparse_to_dense
+@@sparse_tensor_to_dense
+@@sparse_to_indicator
+
+## Manipulation.
+
+@@sparse_concat
+@@sparse_reorder
+@@sparse_retain
+@@sparse_fill_empty_rows
+"""
+import tensorflow.python.platform
+
+import numpy as np
+
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.framework import types
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import constant_op
+from tensorflow.python.ops import gen_sparse_ops
+from tensorflow.python.ops import math_ops
+# pylint: disable=wildcard-import
+from tensorflow.python.ops.gen_sparse_ops import *
+# pylint: enable=wildcard-import
+# pylint: disable=protected-access
+
+
+def sparse_concat(concat_dim, sp_inputs, name=None):
+  """Concatenates a list of `SparseTensor` along the specified dimension.
+
+  Concatenation is with respect to the dense versions of each sparse input.
+  It is assumed that each inputs is a `SparseTensor` whose elements are ordered
+  along increasing dimension number.
+
+  All inputs' shapes must match, except for the concat dimension.  The
+  `indices`, `values`, and `shapes` lists must have the same length.
+
+  The output shape is identical to the inputs', except along the concat
+  dimension, where it is the sum of the inputs' sizes along that dimension.
+
+  The output elements will be resorted to preserve the sort order along
+  increasing dimension number.
+
+  This op runs in `O(M log M)` time, where `M` is the total number of non-empty
+  values across all inputs. This is due to the need for an internal sort in
+  order to concatenate efficiently across an arbitrary dimension.
+
+  For example, if `concat_dim = 1` and the inputs are
+
+      sp_inputs[0]: shape = [2, 3]
+      [0, 2]: "a"
+      [1, 0]: "b"
+      [1, 1]: "c"
+
+      sp_inputs[1]: shape = [2, 4]
+      [0, 1]: "d"
+      [0, 2]: "e"
+
+  then the output will be
+
+      shape = [2, 7]
+      [0, 2]: "a"
+      [0, 4]: "d"
+      [0, 5]: "e"
+      [1, 0]: "b"
+      [1, 1]: "c"
+
+  Graphically this is equivalent to doing
+
+      [    a] concat [  d e  ] = [    a   d e  ]
+      [b c  ]        [       ]   [b c          ]
+
+  Args:
+    concat_dim: Dimension to concatenate along.
+    sp_inputs: List of `SparseTensor` to concatenate.
+    name: A name prefix for the returned tensors (optional).
+
+  Returns:
+    A `SparseTensor` with the concatenated output.
+
+  Raises:
+    TypeError: If `sp_inputs` is not a list of `SparseTensor`.
+  """
+  if not isinstance(sp_inputs, list):
+    raise TypeError("Inputs must be a list")
+  if not all(isinstance(sp_input, ops.SparseTensor) for sp_input in sp_inputs):
+    raise TypeError("All inputs must be SparseTensors")
+
+  if len(sp_inputs) == 1:  # Degenerate case of one tensor.
+    return sp_inputs[0]
+
+  inds = [sp_input.indices for sp_input in sp_inputs]
+  vals = [sp_input.values for sp_input in sp_inputs]
+  shapes = [sp_input.shape for sp_input in sp_inputs]
+
+  output_ind, output_val, output_shape = (
+      gen_sparse_ops._sparse_concat(
+          inds,
+          vals,
+          shapes,
+          concat_dim,
+          name=name))
+
+  return ops.SparseTensor(output_ind, output_val, output_shape)
+
+
+@ops.RegisterShape("SparseConcat")
+def _SparseConcatShape(op):
+  """Shape function for SparseConcat op."""
+  num_inputs = int(op.get_attr("N"))
+
+  # TF flattens and concatenates all list inputs, so reconstruct the lists here.
+  ind_shapes = [ind.get_shape().with_rank(2) for ind in op.inputs[0:num_inputs]]
+  val_shapes = [val.get_shape().with_rank(1)
+                for val in op.inputs[num_inputs:2 * num_inputs]]
+  shape_shapes = [shape.get_shape().with_rank(1)
+                  for shape in op.inputs[2 * num_inputs:]]
+
+  output_ind_rows = tensor_shape.Dimension(0)
+  output_ind_cols = tensor_shape.Dimension(None)
+  output_val_elems = tensor_shape.Dimension(0)
+  output_shape_shape = tensor_shape.TensorShape(None)
+
+  for i in range(num_inputs):
+    num_elems_i = ind_shapes[i][0].merge_with(val_shapes[i][0])
+    output_ind_rows += num_elems_i
+    output_ind_cols = output_ind_cols.merge_with(ind_shapes[i][1])
+    output_val_elems += num_elems_i
+    output_shape_shape = output_shape_shape.merge_with(shape_shapes[i])
+
+  output_ind_shape = tensor_shape.matrix(output_ind_rows, output_ind_cols)
+  output_val_shape = tensor_shape.vector(output_val_elems)
+
+  return [output_ind_shape, output_val_shape, output_shape_shape]
+
+
+def sparse_reorder(sp_input, name=None):
+  """Reorders a `SparseTensor` into the canonical, row-major ordering.
+
+  Note that by convention, all sparse ops preserve the canonical ordering
+  along increasing dimension number. The only time ordering can be violated
+  is during manual manipulation of the indices and values to add entries.
+
+  Reordering does not affect the shape of the `SparseTensor`.
+
+  For example, if sp_input has shape `[4, 5]` and `indices` / `values`:
+
+      [0, 3]: b
+      [0, 1]: a
+      [3, 1]: d
+      [2, 0]: c
+
+  then the output will be a `SparseTensor` of shape `[4, 5]` and
+  `indices` / `values`:
+
+      [0, 1]: a
+      [0, 3]: b
+      [2, 0]: c
+      [3, 1]: d
+
+  Args:
+    sp_input: The input `SparseTensor`.
+    name: A name prefix for the returned tensors (optional)
+
+  Returns:
+    A `SparseTensor` with the same shape and non-empty values, but in
+    canonical ordering.
+
+  Raises:
+    TypeError: If `sp_input` is not a `SparseTensor`.
+  """
+  if not isinstance(sp_input, ops.SparseTensor):
+    raise TypeError("Input must be a SparseTensor")
+
+  reordered_ind, reordered_val = (
+      gen_sparse_ops._sparse_reorder(
+          sp_input.indices,
+          sp_input.values,
+          sp_input.shape,
+          name=name))
+
+  return ops.SparseTensor(
+      reordered_ind, reordered_val, array_ops.identity(sp_input.shape))
+
+
+@ops.RegisterShape("SparseReorder")
+def _SparseReorderShape(op):
+  """Shape function for SparseReorder op."""
+  input_indices_shape = op.inputs[0].get_shape().with_rank(2)
+  input_values_shape = op.inputs[1].get_shape().with_rank(1)
+  unused_shape_shape = op.inputs[2].get_shape().with_rank(1)
+
+  return [input_indices_shape, input_values_shape]
+
+
+@ops.RegisterShape("SparseToDense")
+def _SparseToDenseShape(op):
+  input_shape = tensor_util.ConstantValue(op.inputs[1])
+  if input_shape is not None:
+    if np.ndim(input_shape) > 1:
+      raise ValueError("Input shape should be a vector")
+    return [tensor_shape.TensorShape(input_shape.tolist())]
+  else:
+    input_shape_shape = op.inputs[1].get_shape().with_rank_at_most(1)
+    return [tensor_shape.unknown_shape(ndims=input_shape_shape.num_elements())]
+
+
+def sparse_tensor_to_dense(sp_input, default_value, name=None):
+  """Converts a `SparseTensor` into a dense tensor.
+
+  This op is a convenience wrapper around `sparse_to_dense` for `SparseTensor`s.
+
+  For example, if `sp_input` has shape `[3, 5]` and non-empty string values:
+
+      [0, 1]: a
+      [0, 3]: b
+      [2, 0]: c
+
+  and `default_value` is `x`, then the output will be a dense `[3, 5]`
+  string tensor with values:
+
+      [[x a x b x]
+       [x x x x x]
+       [c x x x x]]
+
+  Args:
+    sp_input: The input `SparseTensor`.
+    default_value: Scalar value to set for indices not specified in
+      `sp_input`.
+    name: A name prefix for the returned tensors (optional).
+
+  Returns:
+    A dense tensor with shape `sp_input.shape` and values specified by
+    the non-empty values in `sp_input`. Indices not in `sp_input` are assigned
+    `default_value`.
+
+  Raises:
+    TypeError: If `sp_input` is not a `SparseTensor`.
+  """
+  if not isinstance(sp_input, ops.SparseTensor):
+    raise TypeError("Input must be a SparseTensor")
+
+  return gen_sparse_ops.sparse_to_dense(
+      sp_input.indices,
+      sp_input.shape,
+      sp_input.values,
+      default_value,
+      name=name)
+
+
+def sparse_to_indicator(sp_input, vocab_size, name=None):
+  """Converts a `SparseTensor` of ids into a dense bool indicator tensor.
+
+  The last dimension of `sp_input` is discarded and replaced with the values of
+  `sp_input`.  If `sp_input.shape = [D0, D1, ..., Dn, K]`, then
+  `output.shape = [D0, D1, ..., Dn, vocab_size]`, where
+
+      output[d_0, d_1, ..., d_n, sp_input[d_0, d_1, ..., d_n, k]] = True
+
+  and False elsewhere in `output`.
+
+  For example, if `sp_input.shape = [2, 3, 4]` with non-empty values:
+
+      [0, 0, 0]: 0
+      [0, 1, 0]: 10
+      [1, 0, 3]: 103
+      [1, 1, 2]: 112
+      [1, 1, 3]: 113
+      [1, 2, 1]: 121
+
+  and `vocab_size = 200`, then the output will be a `[2, 3, 200]` dense bool
+  tensor with False everywhere except at positions
+
+      (0, 0, 0), (0, 1, 10), (1, 0, 103), (1, 1, 112), (1, 1, 113), (1, 2, 121).
+
+  This op is useful for converting `SparseTensor`s into dense formats for
+  compatibility with ops that expect dense tensors.
+
+  The input `SparseTensor` must be in row-major order.
+
+  Args:
+    sp_input: A `SparseTensor` of type `int32` or `int64`.
+    vocab_size: The new size of the last dimension, with
+      `all(0 <= sp_input.values < vocab_size)`.
+    name: A name prefix for the returned tensors (optional)
+
+  Returns:
+    A dense bool indicator tensor representing the indices with specified value.
+
+  Raises:
+    TypeError: If `sp_input` is not a `SparseTensor`.
+  """
+  if not isinstance(sp_input, ops.SparseTensor):
+    raise TypeError("Input must be a SparseTensor")
+
+  with ops.op_scope([sp_input], name, "SparseToIndicator") as name:
+    indices_shape = array_ops.shape(sp_input.indices)
+    num_entries = indices_shape[0]
+    rank = indices_shape[1]
+
+    ids = sp_input.values
+    if ids.dtype != types.int64:
+      ids = math_ops.cast(ids, types.int64)
+
+    # Slice off the last dimension of indices, then then tack on the ids
+    indices_columns_to_preserve = array_ops.slice(
+        sp_input.indices, [0, 0], array_ops.pack([-1, rank - 1]))
+    new_indices = array_ops.concat(
+        1, [indices_columns_to_preserve, array_ops.reshape(ids, [-1, 1])])
+
+    new_values = array_ops.fill(array_ops.expand_dims(num_entries, 0), True)
+    new_shape = array_ops.concat(
+        0, [array_ops.slice(sp_input.shape, [0],
+                            array_ops.expand_dims(rank - 1, 0)), [vocab_size]])
+
+    sp_new = ops.SparseTensor(new_indices, new_values, new_shape)
+
+    return sparse_tensor_to_dense(sp_new, False, name=name)
+
+
+def sparse_retain(sp_input, to_retain):
+  """Retains specified non-empty values within a `SparseTensor`.
+
+  For example, if `sp_input` has shape `[4, 5]` and 4 non-empty string values:
+
+      [0, 1]: a
+      [0, 3]: b
+      [2, 0]: c
+      [3, 1]: d
+
+  and `to_retain = [True, False, False, True]`, then the output will
+  be a `SparseTensor` of shape `[4, 5]` with 2 non-empty values:
+
+      [0, 1]: a
+      [3, 1]: d
+
+  Args:
+    sp_input: The input `SparseTensor` with `N` non-empty elements.
+    to_retain: A bool vector of length `N` with `M` true values.
+
+  Returns:
+    A `SparseTensor` with the same shape as the input and `M` non-empty
+    elements corresponding to the true positions in `to_retain`.
+
+  Raises:
+    TypeError: If `sp_input` is not a `SparseTensor`.
+  """
+  if not isinstance(sp_input, ops.SparseTensor):
+    raise TypeError("Input must be a SparseTensor")
+
+  to_retain = ops.convert_to_tensor(to_retain)
+
+  # Shape checking, if shape is known at graph construction time
+  retain_shape = to_retain.get_shape()
+  retain_shape.assert_has_rank(1)
+  sp_input.values.get_shape()[0].merge_with(retain_shape[0])
+
+  where_true = array_ops.reshape(array_ops.where(to_retain), [-1])
+  new_indices = array_ops.gather(sp_input.indices, where_true)
+  new_values = array_ops.gather(sp_input.values, where_true)
+  return ops.SparseTensor(
+      new_indices, new_values, array_ops.identity(sp_input.shape))
+
+
+def sparse_fill_empty_rows(sp_input, default_value, name=None):
+  """Fills empty rows in the input 2-D `SparseTensor` with a default value.
+
+  This op adds entries with the specified `default_value` at index
+  `[row, 0]` for any row in the input that does not already have a value.
+
+  For example, suppose `sp_input` has shape `[5, 6]` and non-empty values:
+
+      [0, 1]: a
+      [0, 3]: b
+      [2, 0]: c
+      [3, 1]: d
+
+  Rows 1 and 4 are empty, so the output will be of shape `[5, 6]` with values:
+
+      [0, 1]: a
+      [0, 3]: b
+      [1, 0]: default_value
+      [2, 0]: c
+      [3, 1]: d
+      [4, 0]: default_value
+
+  Note that the input may have empty columns at the end, with no effect on
+  this op.
+
+  The output `SparseTensor` will be in row-major order and will have the
+  same shape as the input.
+
+  This op also returns an indicator vector such that
+
+      empty_row_indicator[i] = True iff row i was an empty row.
+
+  Args:
+    sp_input: A `SparseTensor` with shape `[N, M]`.
+    default_value: The value to fill for empty rows, with the same type as
+      `sp_input.`
+    name: A name prefix for the returned tensors (optional)
+
+  Returns:
+    sp_ordered_output: A `SparseTensor` with shape `[N, M]`, and with all empty
+      rows filled in with `default_value`.
+    empty_row_indicator: A bool vector of length `N` indicating whether each
+      input row was empty.
+
+  Raises:
+    TypeError: If `sp_input` is not a `SparseTensor`.
+  """
+  if not isinstance(sp_input, ops.SparseTensor):
+    raise TypeError("Input must be a SparseTensor")
+
+  with ops.op_scope([sp_input], name, "SparseFillEmptyRows"):
+    default_value = ops.convert_to_tensor(
+        default_value, dtype=sp_input.values.dtype)
+
+    num_rows = math_ops.cast(sp_input.shape[0], types.int32)
+    all_row_indices = math_ops.cast(
+        math_ops.range(0, num_rows, 1), types.int64)
+    empty_row_indices, _ = array_ops.list_diff(
+        all_row_indices, sp_input.indices[:, 0])
+    empty_row_indicator = gen_sparse_ops.sparse_to_dense(
+        empty_row_indices, array_ops.expand_dims(sp_input.shape[0], -1), True,
+        False)
+
+    empty_row_indices_as_column = array_ops.reshape(empty_row_indices, [-1, 1])
+    additional_indices = array_ops.concat(
+        1,
+        [empty_row_indices_as_column,
+         array_ops.zeros_like(empty_row_indices_as_column)])
+    additional_values = array_ops.fill(array_ops.shape(empty_row_indices),
+                                       default_value)
+
+    all_indices_unordered = array_ops.concat(
+        0, [sp_input.indices, additional_indices])
+    all_values_unordered = array_ops.concat(
+        0, [sp_input.values, additional_values])
+    sp_unordered_output = ops.SparseTensor(
+        all_indices_unordered, all_values_unordered, sp_input.shape)
+    sp_ordered_output = sparse_reorder(sp_unordered_output)
+
+    return sp_ordered_output, empty_row_indicator
diff --git a/tensorflow/python/ops/sparse_ops_test.py b/tensorflow/python/ops/sparse_ops_test.py
new file mode 100644
index 0000000000..07a5e6c6da
--- /dev/null
+++ b/tensorflow/python/ops/sparse_ops_test.py
@@ -0,0 +1,212 @@
+"""Tests for Python ops defined in sparse_ops."""
+
+import tensorflow.python.platform
+
+import numpy as np
+
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.framework import types
+from tensorflow.python.ops import constant_op
+from tensorflow.python.ops import sparse_ops
+from tensorflow.python.platform import googletest
+
+
+class SparseToIndicatorTest(test_util.TensorFlowTestCase):
+
+  def _SparseTensor_5x6(self, dtype):
+    ind = np.array([
+        [0, 0],
+        [1, 0], [1, 3], [1, 4],
+        [3, 2], [3, 3]])
+    val = np.array([0, 10, 13, 14, 32, 33])
+    shape = np.array([5, 6])
+    return ops.SparseTensor(
+        constant_op.constant(ind, types.int64),
+        constant_op.constant(val, dtype),
+        constant_op.constant(shape, types.int64))
+
+  def _SparseTensor_2x3x4(self, dtype):
+    ind = np.array([
+        [0, 0, 1],
+        [0, 1, 0], [0, 1, 2],
+        [1, 0, 3],
+        [1, 1, 1], [1, 1, 3],
+        [1, 2, 2]])
+    val = np.array([1, 10, 12, 103, 111, 113, 122])
+    shape = np.array([2, 3, 4])
+    return ops.SparseTensor(
+        constant_op.constant(ind, types.int64),
+        constant_op.constant(val, dtype),
+        constant_op.constant(shape, types.int64))
+
+  def testInt32(self):
+    with self.test_session(use_gpu=False):
+      sp_input = self._SparseTensor_5x6(types.int32)
+      output = sparse_ops.sparse_to_indicator(sp_input, 50).eval()
+
+      expected_output = np.zeros((5, 50), dtype=np.bool)
+      expected_trues = ((0, 0), (1, 10), (1, 13), (1, 14), (3, 32), (3, 33))
+      for expected_true in expected_trues:
+        expected_output[expected_true] = True
+
+      self.assertAllEqual(output, expected_output)
+
+  def testInt64(self):
+    with self.test_session(use_gpu=False):
+      sp_input = self._SparseTensor_5x6(types.int64)
+      output = sparse_ops.sparse_to_indicator(sp_input, 50).eval()
+
+      expected_output = np.zeros((5, 50), dtype=np.bool)
+      expected_trues = [(0, 0), (1, 10), (1, 13), (1, 14), (3, 32), (3, 33)]
+      for expected_true in expected_trues:
+        expected_output[expected_true] = True
+
+      self.assertAllEqual(output, expected_output)
+
+  def testHigherRank(self):
+    with self.test_session(use_gpu=False):
+      sp_input = self._SparseTensor_2x3x4(types.int64)
+      output = sparse_ops.sparse_to_indicator(sp_input, 200).eval()
+
+      expected_output = np.zeros((2, 3, 200), dtype=np.bool)
+      expected_trues = [(0, 0, 1), (0, 1, 10), (0, 1, 12),
+                        (1, 0, 103), (1, 1, 111), (1, 1, 113), (1, 2, 122)]
+      for expected_true in expected_trues:
+        expected_output[expected_true] = True
+
+      self.assertAllEqual(output, expected_output)
+
+
+class SparseRetainTest(test_util.TensorFlowTestCase):
+
+  def _SparseTensor_5x6(self):
+    ind = np.array([
+        [0, 0],
+        [1, 0], [1, 3], [1, 4],
+        [3, 2], [3, 3]])
+    val = np.array([0, 10, 13, 14, 32, 33])
+    shape = np.array([5, 6])
+    return ops.SparseTensor(
+        constant_op.constant(ind, types.int64),
+        constant_op.constant(val, types.int32),
+        constant_op.constant(shape, types.int64))
+
+  def testBasic(self):
+    with self.test_session(use_gpu=False) as sess:
+      sp_input = self._SparseTensor_5x6()
+      to_retain = np.array([1, 0, 0, 1, 1, 0], dtype=np.bool)
+      sp_output = sparse_ops.sparse_retain(sp_input, to_retain)
+
+      output = sess.run(sp_output)
+
+      self.assertAllEqual(output.indices, [[0, 0], [1, 4], [3, 2]])
+      self.assertAllEqual(output.values, [0, 14, 32])
+      self.assertAllEqual(output.shape, [5, 6])
+
+  def testRetainNone(self):
+    with self.test_session(use_gpu=False) as sess:
+      sp_input = self._SparseTensor_5x6()
+      to_retain = np.zeros((6,), dtype=np.bool)
+      sp_output = sparse_ops.sparse_retain(sp_input, to_retain)
+
+      output = sess.run(sp_output)
+
+      self.assertAllEqual(output.indices, np.array([]).reshape((0, 2)))
+      self.assertAllEqual(output.values, [])
+      self.assertAllEqual(output.shape, [5, 6])
+
+  def testMismatchedRetainShape(self):
+    with self.test_session(use_gpu=False):
+      sp_input = self._SparseTensor_5x6()
+      to_retain = np.array([1, 0, 0, 1, 0], dtype=np.bool)
+      with self.assertRaises(ValueError):
+        sparse_ops.sparse_retain(sp_input, to_retain)
+
+
+class SparseFillEmptyRowsTest(test_util.TensorFlowTestCase):
+
+  def _SparseTensor_5x6(self):
+    ind = np.array([
+        [0, 0],
+        [1, 0], [1, 3], [1, 4],
+        [3, 2], [3, 3]])
+    val = np.array([0, 10, 13, 14, 32, 33])
+    shape = np.array([5, 6])
+    return ops.SparseTensor(
+        constant_op.constant(ind, types.int64),
+        constant_op.constant(val, types.int32),
+        constant_op.constant(shape, types.int64))
+
+  def _SparseTensor_String5x6(self):
+    ind = np.array([
+        [0, 0],
+        [1, 0], [1, 3], [1, 4],
+        [3, 2], [3, 3]])
+    val = np.array(["a", "b", "c", "d", "e", "f"])
+    shape = np.array([5, 6])
+    return ops.SparseTensor(
+        constant_op.constant(ind, types.int64),
+        constant_op.constant(val, types.string),
+        constant_op.constant(shape, types.int64))
+
+  def _SparseTensor_2x6(self):
+    ind = np.array([[0, 0], [1, 0], [1, 3], [1, 4]])
+    val = np.array([0, 10, 13, 14])
+    shape = np.array([2, 6])
+    return ops.SparseTensor(
+        constant_op.constant(ind, types.int64),
+        constant_op.constant(val, types.int32),
+        constant_op.constant(shape, types.int64))
+
+  def testFillNumber(self):
+    with self.test_session(use_gpu=False) as sess:
+      sp_input = self._SparseTensor_5x6()
+      sp_output, empty_row_indicator = (
+          sparse_ops.sparse_fill_empty_rows(sp_input, -1))
+
+      output, empty_row_indicator_out = sess.run(
+          [sp_output, empty_row_indicator])
+
+      self.assertAllEqual(
+          output.indices,
+          [[0, 0], [1, 0], [1, 3], [1, 4], [2, 0], [3, 2], [3, 3], [4, 0]])
+      self.assertAllEqual(output.values, [0, 10, 13, 14, -1, 32, 33, -1])
+      self.assertAllEqual(output.shape, [5, 6])
+      self.assertAllEqual(empty_row_indicator_out,
+                          np.array([0, 0, 1, 0, 1]).astype(np.bool))
+
+  def testFillString(self):
+    with self.test_session(use_gpu=False) as sess:
+      sp_input = self._SparseTensor_String5x6()
+      sp_output, empty_row_indicator = (
+          sparse_ops.sparse_fill_empty_rows(sp_input, ""))
+
+      output, empty_row_indicator_out = sess.run(
+          [sp_output, empty_row_indicator])
+
+      self.assertAllEqual(
+          output.indices,
+          [[0, 0], [1, 0], [1, 3], [1, 4], [2, 0], [3, 2], [3, 3], [4, 0]])
+      self.assertAllEqual(output.values, ["a", "b", "c", "d", "", "e", "f", ""])
+      self.assertAllEqual(output.shape, [5, 6])
+      self.assertAllEqual(empty_row_indicator_out,
+                          np.array([0, 0, 1, 0, 1]).astype(np.bool))
+
+  def testNoEmptyRows(self):
+    with self.test_session(use_gpu=False) as sess:
+      sp_input = self._SparseTensor_2x6()
+      sp_output, empty_row_indicator = (
+          sparse_ops.sparse_fill_empty_rows(sp_input, -1))
+
+      output, empty_row_indicator_out = sess.run(
+          [sp_output, empty_row_indicator])
+
+      self.assertAllEqual(output.indices, [[0, 0], [1, 0], [1, 3], [1, 4]])
+      self.assertAllEqual(output.values, [0, 10, 13, 14])
+      self.assertAllEqual(output.shape, [2, 6])
+      self.assertAllEqual(empty_row_indicator_out, np.zeros(2).astype(np.bool))
+
+
+if __name__ == "__main__":
+  googletest.main()
diff --git a/tensorflow/python/ops/standard_ops.py b/tensorflow/python/ops/standard_ops.py
new file mode 100644
index 0000000000..beef8e75b5
--- /dev/null
+++ b/tensorflow/python/ops/standard_ops.py
@@ -0,0 +1,41 @@
+# pylint: disable=wildcard-import,unused-import
+"""Import names of Tensor Flow standard Ops."""
+
+# Imports the following modules so that @RegisterGradient get executed.
+from tensorflow.python.ops import array_grad
+from tensorflow.python.ops import data_flow_grad
+from tensorflow.python.ops import math_grad
+from tensorflow.python.ops import state_grad
+
+from tensorflow.python.ops.array_ops import *
+from tensorflow.python.ops.clip_ops import *
+# TODO(vrv): Switch to import * once we're okay with exposing the module.
+from tensorflow.python.ops.control_flow_ops import group
+from tensorflow.python.ops.control_flow_ops import no_op
+from tensorflow.python.ops.control_flow_ops import tuple
+from tensorflow.python.ops.data_flow_ops import *
+from tensorflow.python.ops.gradients import *
+from tensorflow.python.ops.init_ops import *
+from tensorflow.python.ops.io_ops import *
+from tensorflow.python.ops.linalg_ops import *
+from tensorflow.python.ops.logging_ops import *
+from tensorflow.python.ops.math_ops import *
+from tensorflow.python.ops.numerics import *
+from tensorflow.python.ops.parsing_ops import *
+from tensorflow.python.ops.random_ops import *
+from tensorflow.python.ops.sparse_ops import *
+from tensorflow.python.ops.state_ops import assign
+from tensorflow.python.ops.state_ops import assign_add
+from tensorflow.python.ops.state_ops import assign_sub
+from tensorflow.python.ops.state_ops import count_up_to
+from tensorflow.python.ops.state_ops import scatter_add
+from tensorflow.python.ops.state_ops import scatter_sub
+from tensorflow.python.ops.state_ops import scatter_update
+from tensorflow.python.ops.string_ops import *
+from tensorflow.python.ops.summary_ops import histogram_summary
+from tensorflow.python.ops.summary_ops import image_summary
+from tensorflow.python.ops.summary_ops import merge_all_summaries
+from tensorflow.python.ops.summary_ops import merge_summary
+from tensorflow.python.ops.summary_ops import scalar_summary
+from tensorflow.python.ops.variable_scope import *
+from tensorflow.python.ops.variables import *
diff --git a/tensorflow/python/ops/state_grad.py b/tensorflow/python/ops/state_grad.py
new file mode 100644
index 0000000000..d9b084693c
--- /dev/null
+++ b/tensorflow/python/ops/state_grad.py
@@ -0,0 +1,18 @@
+"""Gradients for operators defined in state_ops.py."""
+
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import state_ops
+
+ops.NoGradient("Assign")
+
+
+ops.NoGradient("AssignAdd")
+
+
+ops.NoGradient("AssignSub")
+
+
+ops.NoGradient("ScatterAdd")
+
+
+ops.NoGradient("ScatterSub")
diff --git a/tensorflow/python/ops/state_ops.py b/tensorflow/python/ops/state_ops.py
new file mode 100644
index 0000000000..1c8f38b94c
--- /dev/null
+++ b/tensorflow/python/ops/state_ops.py
@@ -0,0 +1,189 @@
+"""## Variables
+
+@@Variable
+
+## Variable helper functions
+
+TensorFlow provides a set of functions to help manage the set of variables
+collected in the graph.
+
+@@all_variables
+@@trainable_variables
+
+@@initialize_all_variables
+@@initialize_variables
+@@assert_variables_initialized
+
+## Saving and Restoring Variables.
+
+@@Saver
+
+@@latest_checkpoint
+
+@@get_checkpoint_state
+@@update_checkpoint_state
+
+## Sharing Variables
+
+TensorFlow provides several classes and operations that you can use to
+create variables contingent on certain conditions.
+
+@@get_variable
+@@get_variable_scope
+@@variable_scope
+
+@@constant_initializer
+@@random_normal_initializer
+@@truncated_normal_initializer
+@@random_uniform_initializer
+@@uniform_unit_scaling_initializer
+@@zeros_initializer
+
+## Sparse Variable Updates
+
+The sparse update ops modify a subset of the entries in a dense `Variable`,
+either overwriting the entries or adding / subtracting a delta.  These are
+useful for training embedding models and similar lookup-based networks, since
+only a small subset of embedding vectors change in any given step.
+
+Since a sparse update of a large tensor may be generated automatically during
+gradient computation (as in the gradient of [`tf.gather`](array_ops.md#gather)),
+an [`IndexedSlices`](#IndexedSlices) class is provided that encapsulates a set
+of sparse indices and values.  `IndexedSlices` objects are detected and handled
+automatically by the optimizers in most cases.
+
+@@scatter_update
+@@scatter_add
+@@scatter_sub
+@@sparse_mask
+@@IndexedSlices
+"""
+
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.ops import common_shapes
+from tensorflow.python.ops import gen_state_ops
+# pylint: disable=wildcard-import,undefined-variable
+from tensorflow.python.ops.gen_state_ops import *
+
+
+# pylint: disable=protected-access
+def variable_op(shape, dtype, name="Variable", set_shape=True, container="",
+                shared_name=""):
+  """Create a variable Operation.
+
+  See also variables.Variable.
+
+  Args:
+    shape: The shape of the tensor managed by this variable
+    dtype: The underlying type of the tensor values.
+    name: optional name to use for the variable op.
+    set_shape: If True, set the shape property of the returned Tensor to
+      the shape argument.
+    container: An optional string. Defaults to "".
+      If non-empty, this variable is placed in the given container.
+      Otherwise, a default container is used.
+    shared_name: An optional string. Defaults to "".
+      If non-empty, this variable is named in the given bucket
+      with this shared_name. Otherwise, the node name is used instead.
+
+  Returns:
+    A variable tensor.
+  """
+  ret = gen_state_ops._variable(shape=shape, dtype=dtype, name=name,
+                                container=container, shared_name=shared_name)
+  # TODO(mrry): Move this to where it is used, so we can get rid of this op
+  #   wrapper?
+  if set_shape:
+    ret.set_shape(shape)
+  return ret
+
+
+# NOTE(mrry): Shapes are conditionally set in the Python wrapper.
+ops.RegisterShape("Variable")(common_shapes.unknown_shape)
+
+
+@ops.RegisterShape("TemporaryVariable")
+def _TemporaryVariableShape(op):
+  """Shape function for the TemporaryVariable op."""
+  shape = tensor_util.TensorShapeProtoToList(op.get_attr("shape"))
+  return [tensor_shape.TensorShape(shape)]
+
+
+@ops.RegisterShape("DestroyTemporaryVariable")
+def _DestroyTemporaryVariableShape(op):
+  """Shape function for the DestroyTemporaryVariable op."""
+  return [op.inputs[0].get_shape()]
+
+
+def init_variable(v, init, name="init"):
+  """Initializes variable with "init".
+
+  This op does the following:
+  if init is a Tensor, v = init
+  if callable(init): v = init(VariableShape(v), v.dtype)
+
+  Args:
+    v: Variable to initialize
+    init: Tensor to assign to v,
+      Or an object convertible to Tensor e.g. nparray,
+      Or an Initializer that generates a tensor given the shape and type of v.
+      An "Initializer" is a callable that returns a tensor that "v" should be
+      set to. It will be called as init(shape, dtype).
+    name: Optional name for the op.
+
+  Returns:
+    The operation that initializes v.
+  """
+  with ops.op_scope([v, init], None, v.op.name + "/"):
+    with ops.name_scope(name) as scope:
+      with ops.device(v.device or ops.get_default_graph().get_default_device()):
+        if callable(init):
+          assert v.get_shape().is_fully_defined(), "Variable shape unknown."
+          # TODO(mrry): Convert to v.shape when the property and
+          # accessor are reconciled (and all initializers support
+          # tf.TensorShape objects).
+          value = init(v.get_shape().as_list(), v.dtype.base_dtype)
+          value = ops.convert_to_tensor(value, name="value")
+          return assign(v, value, name=scope)
+        else:
+          init = ops.convert_to_tensor(init, name="init")
+          return assign(v, init, name=scope)
+
+
+@ops.RegisterShape("Assign")
+def _AssignShape(op):
+  """Shape function for the Assign op."""
+  if op.get_attr("validate_shape"):
+    # NOTE(mrry): Return a known shape here. This makes it awkward to
+    # chain a validated-shape assignment and a reshaping assignment,
+    # but that is a sufficiently niche case that supporting it does
+    # not seem worthwhile.
+    return [op.inputs[0].get_shape().merge_with(op.inputs[1].get_shape())]
+  return [op.inputs[1].get_shape()]
+
+
+@ops.RegisterShape("AssignAdd")
+@ops.RegisterShape("AssignSub")
+def _AssignUpdateShape(op):
+  """Shape function for the AssignAdd and AssignSub dense update ops."""
+  return [op.inputs[0].get_shape().merge_with(op.inputs[1].get_shape())]
+
+
+@ops.RegisterShape("CountUpTo")
+def _CountUpToShape(op):
+  """Shape function for the CountUpTo op."""
+  return [op.inputs[0].get_shape().merge_with(tensor_shape.scalar())]
+
+
+@ops.RegisterShape("ScatterAdd")
+@ops.RegisterShape("ScatterSub")
+@ops.RegisterShape("ScatterUpdate")
+def _ScatterUpdateShape(op):
+  """Shape function for the sparse update ops."""
+  var_shape = op.inputs[0].get_shape()
+  indices_shape = op.inputs[1].get_shape()
+  unused_updates_shape = op.inputs[2].get_shape().merge_with(
+      indices_shape.concatenate(var_shape[1:]))
+  return [var_shape]
diff --git a/tensorflow/python/ops/string_ops.py b/tensorflow/python/ops/string_ops.py
new file mode 100644
index 0000000000..8181fe9a2a
--- /dev/null
+++ b/tensorflow/python/ops/string_ops.py
@@ -0,0 +1,12 @@
+"""String Ops."""
+
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.ops import common_shapes
+from tensorflow.python.ops import gen_string_ops
+# pylint: disable=wildcard-import,undefined-variable
+from tensorflow.python.ops.gen_string_ops import *
+
+ops.NoGradient("StringToHashBucket")
+
+ops.RegisterShape("StringToHashBucket")(common_shapes.unchanged_shape)
diff --git a/tensorflow/python/ops/summary_ops.py b/tensorflow/python/ops/summary_ops.py
new file mode 100644
index 0000000000..d65fd1ea7c
--- /dev/null
+++ b/tensorflow/python/ops/summary_ops.py
@@ -0,0 +1,177 @@
+"""Summary Operations."""
+# pylint: disable=wildcard-import,protected-access
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import gen_summary_ops
+from tensorflow.python.ops.gen_summary_ops import *
+
+
+def _Collect(val, collections, default_collections):
+  if collections is None:
+    collections = default_collections
+  for key in collections:
+    ops.add_to_collection(key, val)
+
+
+def histogram_summary(tag, values, collections=None, name=None):
+  """Outputs a `Summary` protocol buffer with a histogram.
+
+  The generated
+  [`Summary`](https://tensorflow.googlesource.com/tensorflow/+/master/tensorflow/core/framework/summary.proto)
+  has one summary value containing a histogram for `values`.
+
+  This op reports an `OutOfRange` error if any value is not finite.
+
+  Args:
+    tag: A `string` `Tensor`. 0-D.  Tag to use for the summary value.
+    values: A `float32` `Tensor`. Any shape. Values to use to build the
+      histogram.
+    collections: Optional list of graph collections keys. The new summary op is
+      added to these collections. Defaults to `[GraphKeys.SUMMARIES]`.
+    name: A name for the operation (optional).
+
+  Returns:
+    A scalar `Tensor` of type `string`. The serialized `Summary` protocol
+    buffer.
+  """
+  with ops.op_scope([tag, values], name, "HistogramSummary") as scope:
+    val = gen_summary_ops._histogram_summary(
+        tag=tag, values=values, name=scope)
+    _Collect(val, collections, [ops.GraphKeys.SUMMARIES])
+  return val
+
+
+def image_summary(tag, tensor, max_images=None, collections=None, name=None):
+  """Outputs a `Summary` protocol buffer with images.
+
+  The summary has up to `max_images` summary values containing images. The
+  images are built from `tensor` which must be 4-D with shape `[batch_size,
+  height, width, channels]` and where `channels` can be:
+
+  *  1: `tensor` is interpreted as Grayscale.
+  *  3: `tensor` is interpreted as RGB.
+  *  4: `tensor` is interpreted as RGBA.
+
+  The images have the same number of channels as the input tensor. Their values
+  are normalized, one image at a time, to fit in the range `[0, 255]`.  The
+  op uses two different normalization algorithms:
+
+  *  If the input values are all positive, they are rescaled so the largest one
+     is 255.
+
+  *  If any input value is negative, the values are shifted so input value 0.0
+     is at 127.  They are then rescaled so that either the smallest value is 0,
+     or the largest one is 255.
+
+  The `tag` argument is a scalar `Tensor` of type `string`.  It is used to
+  build the `tag` of the summary values:
+
+  *  If `max_images` is 1, the summary value tag is '*tag*/image'.
+  *  If `max_images` is greater than 1, the summary value tags are
+     generated sequentially as '*tag*/image/0', '*tag*/image/1', etc.
+
+  Args:
+    tag: A scalar `Tensor` of type `string`. Used to build the `tag`
+      of the summary values.
+    tensor: A 4-D `float32` `Tensor` of shape `[batch_size, height, width,
+     channels]` where `channels` is 1, 3, or 4.
+    max_images: Max number of batch elements to generate images for.
+    collections: Optional list of ops.GraphKeys.  The collections to add the
+      summary to.  Defaults to [ops.GraphKeys.SUMMARIES]
+    name: A name for the operation (optional).
+
+  Returns:
+    A scalar `Tensor` of type `string`. The serialized `Summary` protocol
+    buffer.
+  """
+  with ops.op_scope([tag, tensor], name, "ImageSummary") as scope:
+    val = gen_summary_ops._image_summary(
+        tag=tag, tensor=tensor, max_images=max_images, name=scope)
+    _Collect(val, collections, [ops.GraphKeys.SUMMARIES])
+  return val
+
+
+def merge_summary(inputs, collections=None, name=None):
+  """Merges summaries.
+
+  This op creates a
+  [`Summary`](https://tensorflow.googlesource.com/tensorflow/+/master/tensorflow/core/framework/summary.proto)
+  protocol buffer that contains the union of all the values in the input
+  summaries.
+
+  When the Op is run, it reports an `InvalidArgument` error if multiple values
+  in the summaries to merge use the same tag.
+
+  Args:
+    inputs: A list of `string` `Tensor` objects containing serialized `Summary`
+      protocol buffers.
+    collections: Optional list of graph collections keys. The new summary op is
+      added to these collections. Defaults to `[GraphKeys.SUMMARIES]`.
+    name: A name for the operation (optional).
+
+  Returns:
+    A scalar `Tensor` of type `string`. The serialized `Summary` protocol
+    buffer resulting from the merging.
+  """
+  with ops.op_scope(inputs, name, "MergeSummary") as scope:
+    val = gen_summary_ops._merge_summary(inputs=inputs, name=name)
+    _Collect(val, collections, [])
+  return val
+
+
+def merge_all_summaries(key=ops.GraphKeys.SUMMARIES):
+  """Merges all summaries collected in the default graph.
+
+  Args:
+    key: `GraphKey` used to collect the summaries.  Defaults to
+      `GraphKeys.SUMMARIES`.
+
+  Returns:
+    If no summaries were collected, returns None.  Otherwise returns a scalar
+    `Tensor` of type`string` containing the serialized `Summary` protocol
+    buffer resulting from the merging.
+  """
+  summary_ops = ops.get_collection(key)
+  if not summary_ops:
+    return None
+  else:
+    return merge_summary(summary_ops)
+
+
+def scalar_summary(tags, values, collections=None, name=None):
+  """Outputs a `Summary` protocol buffer with scalar values.
+
+  The input `tags` and `values` must have the same shape.  The generated
+  summary has a summary value for each tag-value pair in `tags` and `values`.
+
+  Args:
+    tags: A 1-D `string` `Tensor`.  Tags for the summaries.
+    values: A 1-D `float32` or `float64` Tensor.  Values for the summaries.
+    collections: Optional list of graph collections keys. The new summary op is
+      added to these collections. Defaults to `[GraphKeys.SUMMARIES]`.
+    name: A name for the operation (optional).
+
+  Returns:
+    A scalar `Tensor` of type `string`. The serialized `Summary` protocol
+    buffer.
+  """
+  with ops.op_scope([tags, values], name, "ScalarSummary") as scope:
+    val = gen_summary_ops._scalar_summary(tags=tags, values=values, name=scope)
+    _Collect(val, collections, [ops.GraphKeys.SUMMARIES])
+  return val
+
+
+ops.NoGradient("HistogramAccumulatorSummary")
+ops.NoGradient("HistogramSummary")
+ops.NoGradient("ImageSummary")
+ops.NoGradient("MergeSummary")
+ops.NoGradient("ScalarSummary")
+
+
+@ops.RegisterShape("HistogramAccumulatorSummary")
+@ops.RegisterShape("HistogramSummary")
+@ops.RegisterShape("ImageSummary")
+@ops.RegisterShape("MergeSummary")
+@ops.RegisterShape("ScalarSummary")
+def _ScalarShape(unused_op):
+  return [tensor_shape.scalar()]
diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py
new file mode 100644
index 0000000000..c9c2cac0a5
--- /dev/null
+++ b/tensorflow/python/ops/variable_scope.py
@@ -0,0 +1,333 @@
+"""A class to store named variables and a scope operator to manage sharing."""
+
+import contextlib
+
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import types
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import logging
+
+
+class _VariableStore(object):
+  """Variable store that carries a number of named Variables.
+
+  New variable names and new variables can be created; all stored
+  variables are initialized with the initializer passed to __init__.
+
+  Attributes:
+    vars: a dictionary with string names (same as passed in GetVar) as keys
+          and the corresponding TensorFlow Variables as values.
+  """
+
+  def __init__(self):
+    """Create a variable store."""
+    self._vars = {}  # A dictionary of the stored TensorFlow variables.
+
+  def get_variable(self, name, shape=None, dtype=types.float32,
+                   initializer=None, reuse=None, trainable=True,
+                   collections=None):
+    """Gets an existing variable with these parameters or create a new one.
+
+    If a variable with the given name is already stored, we return the stored
+    variable. Otherwise, we create a new one.
+
+    Set `reuse` to `True` when you only want to reuse existing Variables.
+    Set `reuse` to `False` when you only want to create new Variables.
+    If `reuse` is `None` (the default), both new and existing variables are
+    returned.
+
+    If initializer is `None` (the default), the default initializer passed in
+    the constructor is used. If that one is `None` too, we use a new
+    `UniformUnitScalingInitializer`.
+
+    Args:
+      name: the name of the new or existing variable.
+      shape: shape of the new or existing variable.
+      dtype: type of the new or existing variable (defaults to `DT_FLOAT`).
+      initializer: initializer for the variable.
+      reuse: a Boolean or `None`. Controls reuse or creation of variables.
+      trainable: If `True` also add the variable to the graph collection
+        `GraphKeys.TRAINABLE_VARIABLES` (see variables.Variable).
+      collections: List of graph collections keys to add the Variable to.
+        Defaults to `[GraphKeys.VARIABLES]` (see variables.Variable).
+
+    Returns:
+      The created or existing variable.
+
+    Raises:
+      ValueError: when creating a new variable and shape is not declared,
+        when reusing a variable and specifying a conflicting shape,
+        or when violating reuse during variable creation.
+    """
+    should_check = reuse is not None
+    dtype = types.as_dtype(dtype)
+    shape = tensor_shape.as_shape(shape)
+    if name in self._vars:
+      # Here we handle the case when returning an existing variable.
+      if should_check and not reuse:
+        raise ValueError("Over-sharing: Variable %s already exists, disallowed."
+                         " Did you mean to set reuse=True in VarScope?" % name)
+      found_var = self._vars[name]
+      if not shape.is_compatible_with(found_var.get_shape()):
+        raise ValueError("Trying to share variable %s, but specified shape %s"
+                         " and found shape %s." % (name, str(shape),
+                                                   str(found_var.get_shape())))
+      if not dtype.is_compatible_with(found_var.dtype):
+        dtype_str = dtype.name
+        found_type_str = found_var.dtype.name
+        raise ValueError("Trying to share variable %s, but specified dtype %s"
+                         " and found dtype %s." % (name, str(dtype_str),
+                                                   str(found_type_str)))
+      return found_var
+
+    # The code below handles only the case of creating a new variable.
+    if should_check and reuse:
+      raise ValueError("Under-sharing: Variable %s does not exist, disallowed."
+                       " Did you mean to set reuse=None in VarScope?" % name)
+    if not shape.is_fully_defined():
+      raise ValueError("Shape of a new variable (%s) must be fully defined, "
+                       "but instead was %s." % (name, shape))
+    if initializer is None:
+      initializer = init_ops.uniform_unit_scaling_initializer()
+    with ops.name_scope(name + "/Initializer/"):
+      init_val = initializer(shape.as_list(), dtype=dtype)
+    v = variables.Variable(init_val, name=name, trainable=trainable,
+                           collections=collections)
+    self._vars[name] = v
+    logging.info("Created variable %s with shape %s and init %s", v.name,
+                 format(shape), str(initializer))
+    return v
+
+
+class _VariableScope(object):
+  """Variable scope object to carry defaults to provide to get_variable.
+
+  Many of the arguments we need for get_variable in a variable store are most
+  easily handled with a context. This object is used for the defaults.
+
+  Attributes:
+    name: name of the current scope, used as prefix in get_variable.
+    initializer: default initializer passed to get_variable.
+    reuse: Boolean or None, setting the reuse in get_variable.
+  """
+
+  def __init__(self, reuse, name="", initializer=None):
+    self._name = name
+    self._initializer = initializer
+    self._reuse = reuse
+
+  @property
+  def name(self):
+    return self._name
+
+  @property
+  def reuse(self):
+    return self._reuse
+
+  @property
+  def initializer(self):
+    return self._initializer
+
+  def reuse_variables(self):
+    """Reuse variables in this scope."""
+    self._reuse = True
+
+  def set_initializer(self, initializer):
+    """Set initializer for this scope."""
+    self._initializer = initializer
+
+  def get_variable(self, var_store, name, shape=None, dtype=types.float32,
+                   initializer=None, trainable=True, collections=None):
+    """Gets an existing variable with this name or create a new one."""
+    if initializer is None and self._initializer:
+      initializer = self._initializer
+    full_name = self.name + "/" + name if self.name else name
+    # Variable names only depend on variable_scope (full_name here),
+    # not name_scope, so we reset it below for the time of variable creation.
+    with ops.name_scope(None):
+      return var_store.get_variable(full_name, shape, dtype, initializer,
+                                    self.reuse, trainable, collections)
+
+
+_VARSTORE_KEY = ("__variable_store",)
+_VARSCOPE_KEY = ("__varscope",)
+
+
+def get_variable_scope():
+  """Returns the current variable scope."""
+  scope = ops.get_collection(_VARSCOPE_KEY)
+  if scope:  # This collection has at most 1 element, the default scope at [0].
+    return scope[0]
+  scope = _VariableScope(False)
+  ops.add_to_collection(_VARSCOPE_KEY, scope)
+  return scope
+
+
+def _get_default_variable_store():
+  store = ops.get_collection(_VARSTORE_KEY)
+  if store:
+    return store[0]
+  store = _VariableStore()
+  ops.add_to_collection(_VARSTORE_KEY, store)
+  return store
+
+
+def get_variable(name, shape=None, dtype=types.float32, initializer=None,
+                 trainable=True, collections=None):
+  """Gets an existing variable with these parameters or create a new one.
+
+  This function prefixes the name with the current variable scope
+  and performs reuse checks. See the
+  [Variable Scope How To](../../how_tos/variable_scope/index.md)
+  for an extensive description of how reusing works. Here is a basic example:
+
+  ```python
+  with tf.variable_scope("foo"):
+      v = get_variable("v", [1])  # v.name == "foo/v:0"
+      w = get_variable("w", [1])  # w.name == "foo/w:0"
+  with tf.variable_scope("foo", reuse=True)
+      v1 = get_variable("v")  # The same as v above.
+  ```
+
+  If initializer is `None` (the default), the default initializer passed in
+  the constructor is used. If that one is `None` too, a
+  `UniformUnitScalingInitializer` will be used.
+
+  Args:
+    name: the name of the new or existing variable.
+    shape: shape of the new or existing variable.
+    dtype: type of the new or existing variable (defaults to `DT_FLOAT`).
+    initializer: initializer for the variable if one is created.
+    trainable: If `True` also add the variable to the graph collection
+      `GraphKeys.TRAINABLE_VARIABLES` (see variables.Variable).
+    collections: List of graph collections keys to add the Variable to.
+      Defaults to `[GraphKeys.VARIABLES]` (see variables.Variable).
+
+  Returns:
+    The created or existing variable.
+
+  Raises:
+    ValueError: when creating a new variable and shape is not declared,
+      or when violating reuse during variable creation. Reuse is set inside
+      `variable_scope`.
+  """
+  return get_variable_scope().get_variable(_get_default_variable_store(), name,
+                                           shape, dtype, initializer,
+                                           trainable, collections)
+
+
+@contextlib.contextmanager
+def variable_scope(name_or_scope, reuse=None, initializer=None):
+  """Returns a context for variable scope.
+
+  Variable scope allows to create new variables and to share already created
+  ones while providing checks to not create or share by accident. For details,
+  see the [Variable Scope How To](../../how_tos/variable_scope/index.md),
+  here we present only a few basic examples.
+
+  Simple example of how to create a new variable:
+
+  ```python
+  with tf.variable_scope("foo"):
+      with tf.variable_scope("bar"):
+          v = tf.get_variable("v", [1])
+          assert v.name == "foo/bar/v:0"
+  ```
+
+  Basic example of sharing a variable:
+
+  ```python
+  with tf.variable_scope("foo"):
+      v = get_variable("v", [1])
+  with tf.variable_scope("foo", reuse=True):
+      v1 = tf.get_variable("v", [1])
+  assert v1 == v
+  ```
+
+  Sharing a variable by capturing a scope and setting reuse:
+
+  ```python
+  with tf.variable_scope("foo") as scope.
+      v = get_variable("v", [1])
+      scope.reuse_variables()
+      v1 = tf.get_variable("v", [1])
+  assert v1 == v
+  ```
+
+  To prevent accidental sharing of variables, we raise an exception when
+  getting an existing variable in a non-reusing scope.
+
+  ```python
+  with tf.variable_scope("foo") as scope.
+      v = get_variable("v", [1])
+      v1 = tf.get_variable("v", [1])
+      #  Raises ValueError("... v already exists ...").
+  ```
+
+  Similarly, we raise an exception when trying to get a variable that
+  does not exist in reuse mode.
+
+  ```python
+  with tf.variable_scope("foo", reuse=True):
+      v = get_variable("v", [1])
+      #  Raises ValueError("... v does not exists ...").
+  ```
+
+  Note that the `reuse` flag is inherited: if we open a reusing scope,
+  then all its sub-scopes become reusing as well.
+
+  Args:
+    name_or_scope: `string` or `VariableScope`: the scope to open.
+    reuse: `True` or `None`; if `True`, we go into reuse mode for this scope as
+      well as all sub-scopes; if `None`, we just inherit the parent scope reuse.
+    initializer: default initializer for variables within this scope.
+
+  Yields:
+    A scope that can be to captured and reused.
+
+  Raises:
+    ValueError: when trying to reuse within a create scope, or create within
+      a reuse scope, or if reuse is not `None` or `True`.
+    TypeError: when the types of some arguments are not appropriate.
+  """
+  if not isinstance(name_or_scope, (_VariableScope, basestring)):
+    raise TypeError("VariableScope: name_scope must be a string or "
+                    "VariableScope.")
+  if reuse not in [None, True]:
+    raise ValueError("VariableScope reuse parameter must be True or None.")
+  if not reuse and isinstance(name_or_scope, (_VariableScope)):
+    logging.info("Passing VariableScope to a non-reusing scope, intended?")
+  if reuse and isinstance(name_or_scope, (basestring)):
+    logging.info("Re-using string-named scope, consider capturing as object.")
+  get_variable_scope()  # Ensure that a default exists, then get a pointer.
+  default_varscope = ops.get_collection(_VARSCOPE_KEY)
+  try:
+    old = default_varscope[0]
+    reuse = reuse or old.reuse  # Re-using is inherited by sub-scopes.
+    if isinstance(name_or_scope, _VariableScope):
+      # Handler for the case when we jump to a shared scope.
+      #   In this case, we leave the current name_scope unchanged.
+      #   We create a new VariableScope (default_varscope[0]) that contains
+      #   a copy of the provided shared scope, possibly with changed reuse
+      #   and initializer, if the user requested this.
+      default_varscope[0] = _VariableScope(reuse, name_or_scope.name,
+                                           name_or_scope.initializer)
+      if initializer:
+        default_varscope[0].set_initializer(initializer)
+      yield default_varscope[0]
+    else:
+      # Handler for the case when we just prolong current variable scope.
+      #   In this case we prolong the current name_scope and create a new
+      #   VariableScope with name extended by the provided one, and inherited
+      #   reuse and initializer (except if the user provided values to set).
+      with ops.name_scope(name_or_scope):
+        new_name = old.name + "/" + name_or_scope if old.name else name_or_scope
+        default_varscope[0] = _VariableScope(reuse, name=new_name,
+                                             initializer=old.initializer)
+        if initializer:
+          default_varscope[0].set_initializer(initializer)
+        yield default_varscope[0]
+  finally:
+    default_varscope[0] = old
diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py
new file mode 100644
index 0000000000..dafd3b8bdc
--- /dev/null
+++ b/tensorflow/python/ops/variables.py
@@ -0,0 +1,569 @@
+"""Variable class."""
+import tensorflow.python.platform
+
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import state_ops
+
+
+class Variable(object):
+  """See the [Variables How To](../../how_tos/variables/index.md) for a high
+  level overview.
+
+  A variable maintains state in the graph across calls to `run()`. You add a
+  variable to the graph by constructing an instance of the class `Variable`.
+
+  The `Variable()` constructor requires an initial value for the variable,
+  which can be a `Tensor` of any type and shape. The initial value defines the
+  type and shape of the variable. After construction, the type and shape of
+  the variable are fixed. The value can be changed using one of the assign
+  methods.
+
+  If you want to change the shape of a variable later you have to use an
+  `assign` Op with `validate_shape=False`.
+
+  Just like any `Tensor`, variables created with `Variable()` can be used as
+  inputs for other Ops in the graph. Additionally, all the operators
+  overloaded for the `Tensor` class are carried over to variables, so you can
+  also add nodes to the graph by just doing arithmetic on variables.
+
+  ```python
+  import tensorflow as tf
+
+  # Create a variable.
+  w = tf.Variable(<initial-value>, name=<optional-name>)
+
+  # Use the variable in the graph like any Tensor.
+  y = tf.matmul(w, ...another variable or tensor...)
+
+  # The overloaded operators are available too.
+  z = tf.sigmoid(w + b)
+
+  # Assign a new value to the variable with `assign()` or a related method.
+  w.assign(w + 1.0)
+  w.assign_add(1.0)
+  ```
+
+  When you launch the graph, variables have to be explicitly initialized before
+  you can run Ops that use their value. You can initialize a variable by
+  running its *initializer op*, restoring the variable from a save file, or
+  simply running an `assign` Op that assigns a value to the variable. In fact,
+  the variable *initializer op* is just an `assign` Op that assigns the
+  variable's initial value to the variable itself.
+
+  ```python
+  # Launch the graph in a session.
+  with tf.Session() as sess:
+      # Run the variable initializer.
+      sess.run(w.initializer)
+      # ...you now can run ops that use the value of 'w'...
+  ```
+
+  The most common initialization pattern is to use the convenience function
+  `initialize_all_variables()` to add an Op to the graph that initializes
+  all the variables. You then run that Op after launching the graph.
+
+  ```python
+  # Add an Op to initialize all variables.
+  init_op = tf.initialize_all_variables()
+
+  # Launch the graph in a session.
+  with tf.Session() as sess:
+      # Run the Op that initializes all variables.
+      sess.run(init_op)
+      # ...you can now run any Op that uses variable values...
+  ```
+
+  If you need to create a variable with an initial value dependent on another
+  variable, use the other variable's `initialized_value()`. This ensures that
+  variables are initialized in the right order.
+
+  All variables are automatically collected in the graph where they are
+  created. By default, the constructor adds the new variable to the graph
+  collection `GraphKeys.VARIABLES`. The convenience function
+  `all_variables()` returns the contents of that collection.
+
+  When building a machine learning model it is often convenient to distinguish
+  betwen variables holding the trainable model parameters and other variables
+  such as a `global step` variable used to count training steps. To make this
+  easier, the variable constructor supports a `trainable=<bool>` parameter. If
+  `True`, the new variable is also added to the graph collection
+  `GraphKeys.TRAINABLE_VARIABLES`. The convenience function
+  `trainable_variables()` returns the contents of this collection. The
+  various `Optimizer` classes use this collection as the default list of
+  variables to optimize.
+
+
+  Creating a variable.
+
+  @@__init__
+  @@initialized_value
+
+  Changing a variable value.
+
+  @@assign
+  @@assign_add
+  @@assign_sub
+  @@scatter_sub
+  @@count_up_to
+
+  @@eval
+
+  Properties.
+
+  @@name
+  @@dtype
+  @@get_shape
+  @@device
+  @@initializer
+  @@graph
+  @@op
+  """
+
+  def __init__(self, initial_value, trainable=True, collections=None,
+               validate_shape=True, name=None):
+    """Creates a new variable with value `initial_value`.
+
+    The new variable is added to the graph collections listed in `collections`,
+    which defaults to `[GraphKeys.VARIABLES]`.
+
+    If `trainable` is `True` the variable is also added to the graph collection
+    `GraphKeys.TRAINABLE_VARIABLES`.
+
+    This constructor creates both a `variable` Op and an `assign` Op to set the
+    variable to its initial value.
+
+    Args:
+      initial_value: A `Tensor`, or Python object convertible to a `Tensor`.
+        The initial value for the Variable. Must have a shape specified unless
+        `validate_shape` is set to False.
+      trainable: If `True`, the default, also adds the variable to the graph
+        collection `GraphKeys.TRAINABLE_VARIABLES`. This collection is used as
+        the default list of variables to use by the `Optimizer` classes.
+      collections: List of graph collections keys. The new variable is added to
+        these collections. Defaults to `[GraphKeys.VARIABLES]`.
+      validate_shape: If `False`, allows the variable to be initialized with a
+        value of unknown shape. If `True`, the default, the shape of
+        `initial_value` must be known.
+      name: Optional name for the variable. Defaults to `'Variable'` and gets
+        uniquified automatically.
+
+    Returns:
+      A Variable.
+
+    Raises:
+      ValueError: If the initial value does not have a shape and
+        `validate_shape` is `True`.
+    """
+    if collections is None:
+      collections = [ops.GraphKeys.VARIABLES]
+    if trainable and ops.GraphKeys.TRAINABLE_VARIABLES not in collections:
+      # pylint: disable=g-no-augmented-assignment
+      #
+      # Pylint wants us to write collections += [...TRAINABLE_VARIABLES] which
+      # is not the same (it modifies the list in place.)  Here, we only want to
+      # modify the value of the variable, not the list.
+      collections = collections + [ops.GraphKeys.TRAINABLE_VARIABLES]
+      # pylint: enable=g-no-augmented-assignment
+    with ops.op_scope([initial_value], name, "Variable") as name:
+      self._initial_value = ops.convert_to_tensor(initial_value,
+                                                  name="initial_value")
+      if not self._initial_value.get_shape().is_fully_defined():
+        if validate_shape:
+          raise ValueError(
+              "initial_value must have a shape specified: %s"
+              % self._initial_value)
+        self._variable = state_ops.variable_op(
+            [], self._initial_value.dtype.base_dtype, set_shape=False,
+            name=name)
+        with ops.device(self._variable.device):
+          self._initializer_op = state_ops.assign(
+              self._variable, self._initial_value, validate_shape=False).op
+      else:
+        self._variable = state_ops.variable_op(
+            self._initial_value.get_shape(),
+            self._initial_value.dtype.base_dtype,
+            name=name)
+        with ops.device(self._variable.device):
+          self._initializer_op = state_ops.assign(
+              self._variable, self._initial_value).op
+    for key in collections:
+      ops.add_to_collection(key, self)
+    self._save_slice_info = None
+
+  def _as_graph_element(self):
+    """Conversion function for Graph.as_graph_element()."""
+    return self._variable
+
+  def _AsTensor(self):
+    """Conversion function for ops.convert_to_tensor()."""
+    return self._variable
+
+  def eval(self, session=None):
+    """In a session, computes and returns the value of this variable.
+
+    This is not a graph construction method, it does not add ops to the graph.
+
+    This convenience method requires a session where the graph containing this
+    variable has been launched. If no session is passed, the default session is
+    used.  See the [Session class](../client.md#Session) for more information on
+    launching a graph and on sessions.
+
+    ```python
+    v = tf.Variable([1, 2])
+    init = tf.initialize_all_variables()
+
+    with tf.Session() as sess:
+        sess.run(init)
+        # Usage passing the session explicitly.
+        print v.eval(sess)
+        # Usage with the default session.  The 'with' block
+        # above makes 'sess' the default session.
+        print v.eval()
+    ```
+
+    Args:
+      session: The session to use to evaluate this variable. If
+        none, the default session is used.
+
+    Returns:
+      A numpy `ndarray` with a copy of the value of this variable.
+    """
+    return self._variable.eval(session=session)
+
+  def initialized_value(self):
+    """Returns the value of the initialized variable.
+
+    You should use this instead of the variable itself to initialize another
+    variable with a value that depends on the value of this variable.
+
+    ```python
+    # Initialize 'v' with a random tensor.
+    v = tf.Variable(tf.truncated_normal([10, 40]))
+    # Use `initialized_value` to guarantee that `v` has been
+    # initialized before its value is used to initialize `w`.
+    # The random values are picked only once.
+    w = tf.Variable(v.initialized_value() * 2.0)
+    ```
+
+    Returns:
+      A `Tensor` holding the value of this variable after its initializer
+      has run.
+    """
+    return control_flow_ops.with_dependencies(
+        [self._initializer_op], self._variable)
+
+  def assign(self, value, use_locking=False):
+    """Assigns a new value to the variable.
+
+    This is essentially a shortcut for `assign(self, value)`.
+
+    Args:
+      value: A `Tensor`. The new value for this variable.
+      use_locking: If `True`, use locking during the assignment.
+
+    Returns:
+      A `Tensor` that will hold the new value of this variable after
+      the assignment has completed.
+    """
+    return state_ops.assign(self._variable, value, use_locking=use_locking)
+
+  def assign_add(self, delta, use_locking=False):
+    """Adds a value to this variable.
+
+     This is essentially a shortcut for `assign_add(self, delta)`.
+
+    Args:
+      delta: A `Tensor`. The value to add to this variable.
+      use_locking: If `True`, use locking during the operation.
+
+    Returns:
+      A `Tensor` that will hold the new value of this variable after
+      the addition has completed.
+    """
+    return state_ops.assign_add(self._variable, delta, use_locking=use_locking)
+
+  def assign_sub(self, delta, use_locking=False):
+    """Subtracts a value from this variable.
+
+    This is essentially a shortcut for `assign_sub(self, delta)`.
+
+    Args:
+      delta: A `Tensor`. The value to subtract from this variable.
+      use_locking: If `True`, use locking during the operation.
+
+    Returns:
+      A `Tensor` that will hold the new value of this variable after
+      the subtraction has completed.
+    """
+    return state_ops.assign_sub(self._variable, delta, use_locking=use_locking)
+
+  def scatter_sub(self, sparse_delta, use_locking=False):
+    """Subtracts `IndexedSlices` from this variable.
+
+    This is essentially a shortcut for `scatter_sub(self, sparse_delta.indices,
+    sparse_delta.values)`.
+
+    Args:
+      sparse_delta: `IndexedSlices` to be subtracted from this variable.
+      use_locking: If `True`, use locking during the operation.
+
+    Returns:
+      A `Tensor` that will hold the new value of this variable after
+      the scattered subtraction has completed.
+
+    Raises:
+      ValueError: if `sparse_delta` is not an `IndexedSlices`.
+    """
+    if not isinstance(sparse_delta, ops.IndexedSlices):
+      raise ValueError("sparse_delta is not IndexedSlices: %s" % sparse_delta)
+    return state_ops.scatter_sub(self._variable,
+                                 sparse_delta.indices,
+                                 sparse_delta.values,
+                                 use_locking=use_locking)
+
+  def count_up_to(self, limit):
+    """Increments this variable until it reaches `limit`.
+
+    When that Op is run it tries to increment the variable by `1`. If
+    incrementing the variable would bring it above `limit` then the Op raises
+    the exception `OutOfRangeError`.
+
+    If no error is raised, the Op outputs the value of the variable before
+    the increment.
+
+    This is essentially a shortcut for `count_up_to(self, limit)`.
+
+    Args:
+      limit: value at which incrementing the variable raises an error.
+
+    Returns:
+      A `Tensor` that will hold the variable value before the increment. If no
+      other Op modifies this variable, the values produced will all be
+      distinct.
+    """
+    return state_ops.count_up_to(self._variable, limit=limit)
+
+  # Conversion to tensor.
+  @staticmethod
+  def _TensorConversionFunction(v, dtype=None, name=None):
+    """Utility function for converting a Variable to a Tensor."""
+    _ = name
+    ret = v._AsTensor()  # pylint: disable=protected-access
+    if dtype and not dtype.is_compatible_with(v.dtype):
+      raise ValueError(
+          "Incompatible type conversion requested to type '%s' for variable "
+          "of type '%s'" % (dtype.name, v.dtype.name))
+    return ret
+
+  # Operator overloading.
+  #
+  # To carry over all overloaded operators from ops.Tensor to Variable, we
+  # register the _RunOp() static method as the implementation of all operators.
+  # That function dynamically discovers the overloaded operator in ops.Tensor
+  # and invokes it after converting the Variable to a tensor.
+  @staticmethod
+  def _OverloadAllOperators():
+    """Register overloads for all operators."""
+    for operator in ops.Tensor.OVERLOADABLE_OPERATORS:
+      Variable._OverloadOperator(operator)
+
+  @staticmethod
+  def _OverloadOperator(operator):
+    """Register _RunOp as the implementation of 'operator'.
+
+    Args:
+      operator: string. The operator name.
+    """
+    if operator in ["__invert__", "__neg__", "__abs__"]:
+      setattr(Variable, operator, lambda a: Variable._RunOp(operator, a, None))
+    else:
+      setattr(Variable, operator, lambda a, b: Variable._RunOp(operator, a, b))
+
+  @staticmethod
+  def _RunOp(operator, a, b):
+    """Run the operator 'op' for 'a'.
+
+    Args:
+      operator: string. The operator name.
+      a: A Variable.
+      b: Second argument to the operator. None if unary.
+    Returns:
+      The result of the operator.
+    """
+    # pylint: disable=protected-access
+    if b is not None:
+      return getattr(ops.Tensor, operator)(a._AsTensor(), b)
+    else:
+      return getattr(ops.Tensor, operator)(a._AsTensor())
+    # pylint: enable=protected-access
+
+  @property
+  def name(self):
+    """The name of this variable."""
+    return self._variable.name
+
+  @property
+  def initializer(self):
+    """The initializer operation for this variable."""
+    return self._initializer_op
+
+  @property
+  def device(self):
+    """The device of this variable."""
+    return self._variable.device
+
+  @property
+  def dtype(self):
+    """The `DType` of this variable."""
+    return self._variable.dtype
+
+  @property
+  def op(self):
+    """The `Operation` of this variable."""
+    return self._variable.op
+
+  @property
+  def graph(self):
+    """The `Graph` of this variable."""
+    return self._variable.graph
+
+  def get_shape(self):
+    """The `TensorShape` of this variable.
+
+    Returns:
+      A `TensorShape`.
+    """
+    return self._variable.get_shape()
+
+  # Experimental support for saving variables as slices of a larger variable.
+  class SaveSliceInfo(object):
+    """Information on how to save this Variable as a slice."""
+
+    def  __init__(self, name, spec):
+      """Create a SliceInfo.
+
+      Args:
+        name: Name of the larger Tensor that this variable is a slice of.
+        spec: Slice specification for the saver.
+      """
+      self.name = name
+      self.spec = spec
+
+  def _set_save_slice_info(self, save_slice_info):
+    """Sets the slice info for this Variable.
+
+    Args:
+      save_slice_info: A Variable.SliceInfo object.
+    """
+    self._save_slice_info = save_slice_info
+
+
+def all_variables():
+  """Returns all variables collected in the graph.
+
+  The `Variable()` constructor automatically adds new variables to the graph
+  collection `GraphKeys.VARIABLES`. This convenience function returns the
+  contents of that collection.
+
+  Returns:
+    A list of `Variable` objects.
+  """
+  return ops.get_collection(ops.GraphKeys.VARIABLES)
+
+
+def trainable_variables():
+  """Returns all variables created with `trainable=True`.
+
+  When passed `trainable=True`, the `Variable()` constructor automatically
+  adds new variables to the graph collection
+  `GraphKeys.TRAINABLE_VARIABLES`. This convenience function returns the
+  contents of that collection.
+
+  Returns:
+    A list of Variable objects.
+  """
+  return ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
+
+
+def initialize_variables(var_list, name="init"):
+  """Returns an Op that initializes a list of variables.
+
+  After you launch the graph in a session, you can run the returned Op to
+  initialize all the variables in `var_list`. This Op runs all the
+  initializers of the variables in `var_list` in parallel.
+
+  Calling `initialize_variables()` is equivalent to passing the list of
+  initializers to `Group()`.
+
+  If `var_list` is empty, however, the function still returns an Op that can
+  be run. That Op just has no effect.
+
+  Args:
+    var_list: List of `Variable` objects to initialize.
+    name: Optional name for the returned operation.
+
+  Returns:
+    An Op that run the initializers of all the specified variables.
+  """
+  if var_list:
+    return control_flow_ops.group(
+        *[v.initializer for v in var_list], name=name)
+  return control_flow_ops.no_op(name=name)
+
+
+def initialize_all_variables():
+  """Returns an Op that initializes all variables.
+
+  This is just a shortcut for `initialize_variables(all_variables())`
+
+  Returns:
+    An Op that initializes all variables in the graph.
+  """
+  return initialize_variables(all_variables())
+
+
+def assert_variables_initialized(var_list=None):
+  """Returns an Op to check if variables are initialized.
+
+  When run, the returned Op will raise the exception `FailedPreconditionError`
+  if any of the variables has not yet been initialized.
+
+  Note: This function is implemented by trying to fetch the values of the
+  variables. If one of the variables is not initialized a message may be
+  logged by the C++ runtime. This is expected.
+
+  Args:
+    var_list: List of `Variable` objects to check. Defaults to the
+      value of `all_variables().`
+
+  Returns:
+    An Op, or None if there are no variables.
+  """
+  if var_list is None:
+    var_list = all_variables()
+  # Backwards compatibility for old-style variables. TODO(mdevin): remove.
+  if not var_list:
+    var_list = []
+    for op in ops.get_default_graph().get_operations():
+      if op.type in ["Variable", "AutoReloadVariable"]:
+        var_list.append(op.outputs[0])
+  if not var_list:
+    return None
+  else:
+    ranks = []
+    for var in var_list:
+      with ops.device(var.device):
+        ranks.append(array_ops.rank(var))
+    if len(ranks) == 1:
+      return ranks[0]
+    else:
+      return array_ops.pack(ranks)
+
+
+# pylint: disable=protected-access
+ops.register_tensor_conversion_function(Variable,
+                                        Variable._TensorConversionFunction)
+Variable._OverloadAllOperators()
+# pylint: enable=protected-access