Automated g4 rollback of changelist 180670333

PiperOrigin-RevId: 180691955
author: Anna R <annarev@google.com> 2018-01-03 11:00:21 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2018-01-03 11:04:25 -0800
commit: 613a160b55bf23d886e20f7512a79d57b9d2f83f (patch)
tree: 16d579355fffc85cecc9ee0e257c6aea30c0e69f /tensorflow
parent: 0f2fa9daa6b36e7dcad0b739ef4d08944e69ecce (diff)
35 files changed, 14604 insertions, 917 deletions
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index bf29a0da79..b8ff44bc4b 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -3449,7 +3449,6 @@ tf_cc_test(
         ":ops",
         ":protos_all_cc",
         ":test",
-        ":test_main",
     ],
 )
 
diff --git a/tensorflow/core/api_def/api_test.cc b/tensorflow/core/api_def/api_test.cc
index d689bf0480..2cdc14843f 100644
--- a/tensorflow/core/api_def/api_test.cc
+++ b/tensorflow/core/api_def/api_test.cc
@@ -13,7 +13,9 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-// Test that validates tensorflow/core/api_def/base_api/api_def*.pbtxt files.
+// Test that verifies tensorflow/core/api_def/base_api/api_def*.pbtxt files
+// are correct. If api_def*.pbtxt do not match expected contents, run
+// tensorflow/core/api_def/base_api/update_api_def.sh script to update them.
 
 #include <ctype.h>
 #include <algorithm>
@@ -42,173 +44,309 @@ namespace tensorflow {
 namespace {
 constexpr char kDefaultApiDefDir[] =
     "tensorflow/core/api_def/base_api";
+constexpr char kOverridesFilePath[] =
+    "tensorflow/cc/ops/op_gen_overrides.pbtxt";
+constexpr char kApiDefFileFormat[] = "api_def_%s.pbtxt";
 constexpr char kApiDefFilePattern[] = "api_def_*.pbtxt";
-}  // namespace
 
-// Returns a list of ops excluded from ApiDef.
-// TODO(annarev): figure out if we should keep ApiDefs for these ops as well.
-const std::unordered_set<string>* GetExcludedOps() {
-  static std::unordered_set<string>* excluded_ops =
-      new std::unordered_set<string>(
-          {"BigQueryReader", "GenerateBigQueryReaderPartitions"});
-  return excluded_ops;
+void FillBaseApiDef(ApiDef* api_def, const OpDef& op) {
+  api_def->set_graph_op_name(op.name());
+  // Add arg docs
+  for (auto& input_arg : op.input_arg()) {
+    if (!input_arg.description().empty()) {
+      auto* api_def_in_arg = api_def->add_in_arg();
+      api_def_in_arg->set_name(input_arg.name());
+      api_def_in_arg->set_description(input_arg.description());
+    }
+  }
+  for (auto& output_arg : op.output_arg()) {
+    if (!output_arg.description().empty()) {
+      auto* api_def_out_arg = api_def->add_out_arg();
+      api_def_out_arg->set_name(output_arg.name());
+      api_def_out_arg->set_description(output_arg.description());
+    }
+  }
+  // Add attr docs
+  for (auto& attr : op.attr()) {
+    if (!attr.description().empty()) {
+      auto* api_def_attr = api_def->add_attr();
+      api_def_attr->set_name(attr.name());
+      api_def_attr->set_description(attr.description());
+    }
+  }
+  // Add docs
+  api_def->set_summary(op.summary());
+  api_def->set_description(op.description());
 }
 
-// Reads golden ApiDef files and returns a map from file name to ApiDef file
-// contents.
-void GetGoldenApiDefs(Env* env, const string& api_files_dir,
-                      std::unordered_map<string, ApiDef>* name_to_api_def) {
-  std::vector<string> matching_paths;
-  TF_CHECK_OK(env->GetMatchingPaths(
-      io::JoinPath(api_files_dir, kApiDefFilePattern), &matching_paths));
-
-  for (auto& file_path : matching_paths) {
-    string file_contents;
-    TF_CHECK_OK(ReadFileToString(env, file_path, &file_contents));
-    file_contents = PBTxtFromMultiline(file_contents);
-
-    ApiDefs api_defs;
-    CHECK(tensorflow::protobuf::TextFormat::ParseFromString(file_contents,
-                                                            &api_defs))
-        << "Failed to load " << file_path;
-    CHECK_EQ(api_defs.op_size(), 1);
-    (*name_to_api_def)[api_defs.op(0).graph_op_name()] = api_defs.op(0);
+// Checks if arg1 should be before arg2 according to ordering in args.
+bool CheckArgBefore(const ApiDef::Arg* arg1, const ApiDef::Arg* arg2,
+                    const protobuf::RepeatedPtrField<OpDef::ArgDef>& args) {
+  for (auto& arg : args) {
+    if (arg.name() == arg2->name()) {
+      return false;
+    } else if (arg.name() == arg1->name()) {
+      return true;
+    }
   }
+  return false;
 }
 
-class ApiTest : public ::testing::Test {
- protected:
-  ApiTest() {
-    OpRegistry::Global()->Export(false, &ops_);
-    const std::vector<string> multi_line_fields = {"description"};
+// Checks if attr1 should be before attr2 according to ordering in op_def.
+bool CheckAttrBefore(const ApiDef::Attr* attr1, const ApiDef::Attr* attr2,
+                     const OpDef& op_def) {
+  for (auto& attr : op_def.attr()) {
+    if (attr.name() == attr2->name()) {
+      return false;
+    } else if (attr.name() == attr1->name()) {
+      return true;
+    }
+  }
+  return false;
+}
 
-    Env* env = Env::Default();
-    GetGoldenApiDefs(env, kDefaultApiDefDir, &api_defs_map_);
+// Applies renames to args.
+void ApplyArgOverrides(
+    protobuf::RepeatedPtrField<ApiDef::Arg>* args,
+    const protobuf::RepeatedPtrField<OpGenOverride::Rename>& renames,
+    const protobuf::RepeatedPtrField<OpDef::ArgDef>& op_args,
+    const string& op_name) {
+  for (auto& rename : renames) {
+    // First check if rename is valid.
+    bool valid = false;
+    for (const auto& op_arg : op_args) {
+      if (op_arg.name() == rename.from()) {
+        valid = true;
+      }
+    }
+    QCHECK(valid) << rename.from() << " is not a valid argument for "
+                  << op_name;
+    bool found_arg = false;
+    // If Arg is already in ApiDef, just update it.
+    for (int i = 0; i < args->size(); ++i) {
+      auto* arg = args->Mutable(i);
+      if (arg->name() == rename.from()) {
+        arg->set_rename_to(rename.to());
+        found_arg = true;
+        break;
+      }
+    }
+    if (!found_arg) {  // not in ApiDef, add a new arg.
+      auto* new_arg = args->Add();
+      new_arg->set_name(rename.from());
+      new_arg->set_rename_to(rename.to());
+    }
   }
-  OpList ops_;
-  std::unordered_map<string, ApiDef> api_defs_map_;
-};
+  // We don't really need a specific order here right now.
+  // However, it is clearer if order follows OpDef.
+  std::sort(args->pointer_begin(), args->pointer_end(),
+            [&](ApiDef::Arg* arg1, ApiDef::Arg* arg2) {
+              return CheckArgBefore(arg1, arg2, op_args);
+            });
+}
 
-// Check that all ops have an ApiDef.
-TEST_F(ApiTest, AllOpsAreInApiDef) {
-  auto* excluded_ops = GetExcludedOps();
-  for (const auto& op : ops_.op()) {
-    if (excluded_ops->find(op.name()) != excluded_ops->end()) {
-      continue;
+// Returns existing attribute with the given name if such
+// attribute exists. Otherwise, adds a new attribute and returns it.
+ApiDef::Attr* FindOrAddAttr(ApiDef* api_def, const string attr_name) {
+  // If Attr is already in ApiDef, just update it.
+  for (int i = 0; i < api_def->attr_size(); ++i) {
+    auto* attr = api_def->mutable_attr(i);
+    if (attr->name() == attr_name) {
+      return attr;
     }
-    ASSERT_TRUE(api_defs_map_.find(op.name()) != api_defs_map_.end())
-        << op.name() << " op does not have api_def_*.pbtxt file. "
-        << "Please add api_def_" << op.name() << ".pbtxt file "
-        << "under tensorflow/core/api_def/base_api/ directory.";
   }
+  // Add a new Attr.
+  auto* new_attr = api_def->add_attr();
+  new_attr->set_name(attr_name);
+  return new_attr;
 }
 
-// Check that ApiDefs have a corresponding op.
-TEST_F(ApiTest, AllApiDefsHaveCorrespondingOp) {
-  std::unordered_set<string> op_names;
-  for (const auto& op : ops_.op()) {
-    op_names.insert(op.name());
+// Applies renames and default values to attributes.
+void ApplyAttrOverrides(ApiDef* api_def, const OpGenOverride& op_override,
+                        const OpDef& op_def) {
+  for (auto& attr_rename : op_override.attr_rename()) {
+    auto* attr = FindOrAddAttr(api_def, attr_rename.from());
+    attr->set_rename_to(attr_rename.to());
   }
-  for (const auto& name_and_api_def : api_defs_map_) {
-    ASSERT_TRUE(op_names.find(name_and_api_def.first) != op_names.end())
-        << name_and_api_def.first << " op has ApiDef but missing from ops. "
-        << "Does api_def_" << name_and_api_def.first << " need to be deleted?";
+
+  for (auto& attr_default : op_override.attr_default()) {
+    auto* attr = FindOrAddAttr(api_def, attr_default.name());
+    *(attr->mutable_default_value()) = attr_default.value();
   }
+  // We don't really need a specific order here right now.
+  // However, it is clearer if order follows OpDef.
+  std::sort(api_def->mutable_attr()->pointer_begin(),
+            api_def->mutable_attr()->pointer_end(),
+            [&](ApiDef::Attr* attr1, ApiDef::Attr* attr2) {
+              return CheckAttrBefore(attr1, attr2, op_def);
+            });
 }
 
-string GetOpDefHasDocStringError(const string& op_name) {
-  return strings::Printf(
-      "OpDef for %s has a doc string. "
-      "Doc strings must be defined in ApiDef instead of OpDef. "
-      "Please, add summary and descriptions in api_def_%s"
-      ".pbtxt file instead",
-      op_name.c_str(), op_name.c_str());
+void ApplyOverridesToApiDef(ApiDef* api_def, const OpDef& op,
+                            const OpGenOverride& op_override) {
+  // Fill ApiDef with data based on op and op_override.
+  // Set visibility
+  if (op_override.skip()) {
+    api_def->set_visibility(ApiDef_Visibility_SKIP);
+  } else if (op_override.hide()) {
+    api_def->set_visibility(ApiDef_Visibility_HIDDEN);
+  }
+  // Add endpoints
+  if (!op_override.rename_to().empty()) {
+    api_def->add_endpoint()->set_name(op_override.rename_to());
+  } else if (!op_override.alias().empty()) {
+    api_def->add_endpoint()->set_name(op.name());
+  }
+
+  for (auto& alias : op_override.alias()) {
+    auto* endpoint = api_def->add_endpoint();
+    endpoint->set_name(alias);
+  }
+
+  ApplyArgOverrides(api_def->mutable_in_arg(), op_override.input_rename(),
+                    op.input_arg(), api_def->graph_op_name());
+  ApplyArgOverrides(api_def->mutable_out_arg(), op_override.output_rename(),
+                    op.output_arg(), api_def->graph_op_name());
+  ApplyAttrOverrides(api_def, op_override, op);
 }
 
-// Check that OpDef's do not have descriptions and summaries.
-// Descriptions and summaries must be in corresponding ApiDefs.
-TEST_F(ApiTest, OpDefsShouldNotHaveDocs) {
-  auto* excluded_ops = GetExcludedOps();
-  for (const auto& op : ops_.op()) {
-    if (excluded_ops->find(op.name()) != excluded_ops->end()) {
+// Get map from ApiDef file path to corresponding ApiDefs proto.
+std::unordered_map<string, ApiDefs> GenerateApiDef(
+    const string& api_def_dir, const OpList& ops,
+    const OpGenOverrides& overrides) {
+  std::unordered_map<string, OpGenOverride> name_to_override;
+  for (const auto& op_override : overrides.op()) {
+    name_to_override[op_override.name()] = op_override;
+  }
+
+  std::unordered_map<string, ApiDefs> api_defs_map;
+
+  // These ops are included in OpList only if TF_NEED_GCP
+  // is set to true. So, we skip them for now so that this test passes
+  // whether TF_NEED_GCP is set or not.
+  const std::unordered_set<string> ops_to_exclude = {
+      "BigQueryReader", "GenerateBigQueryReaderPartitions"};
+  for (const auto& op : ops.op()) {
+    CHECK(!op.name().empty())
+        << "Encountered empty op name: %s" << op.DebugString();
+    if (ops_to_exclude.find(op.name()) != ops_to_exclude.end()) {
+      LOG(INFO) << "Skipping " << op.name();
       continue;
     }
-    ASSERT_TRUE(op.summary().empty()) << GetOpDefHasDocStringError(op.name());
-    ASSERT_TRUE(op.description().empty())
-        << GetOpDefHasDocStringError(op.name());
-    for (const auto& arg : op.input_arg()) {
-      ASSERT_TRUE(arg.description().empty())
-          << GetOpDefHasDocStringError(op.name());
-    }
-    for (const auto& arg : op.output_arg()) {
-      ASSERT_TRUE(arg.description().empty())
-          << GetOpDefHasDocStringError(op.name());
-    }
-    for (const auto& attr : op.attr()) {
-      ASSERT_TRUE(attr.description().empty())
-          << GetOpDefHasDocStringError(op.name());
+    string file_path = io::JoinPath(api_def_dir, kApiDefFileFormat);
+    file_path = strings::Printf(file_path.c_str(), op.name().c_str());
+    ApiDef* api_def = api_defs_map[file_path].add_op();
+    FillBaseApiDef(api_def, op);
+
+    if (name_to_override.find(op.name()) != name_to_override.end()) {
+      ApplyOverridesToApiDef(api_def, op, name_to_override[op.name()]);
     }
   }
+  return api_defs_map;
 }
 
-// Checks that input arg names in an ApiDef match input
-// arg names in corresponding OpDef.
-TEST_F(ApiTest, AllApiDefInputArgsAreValid) {
-  for (const auto& op : ops_.op()) {
-    const auto& api_def = api_defs_map_[op.name()];
-    for (const auto& api_def_arg : api_def.in_arg()) {
-      bool found_arg = false;
-      for (const auto& op_arg : op.input_arg()) {
-        if (api_def_arg.name() == op_arg.name()) {
-          found_arg = true;
-          break;
-        }
-      }
-      ASSERT_TRUE(found_arg)
-          << "Input argument " << api_def_arg.name()
-          << " (overwritten in api_def_" << op.name()
-          << ".pbtxt) is not defined in OpDef for " << op.name();
-    }
+// Reads golden ApiDef files and returns a map from file name to ApiDef file
+// contents.
+std::unordered_map<string, string> GetGoldenApiDefs(
+    Env* env, const string& api_files_dir) {
+  std::vector<string> matching_paths;
+  TF_CHECK_OK(env->GetMatchingPaths(
+      io::JoinPath(api_files_dir, kApiDefFilePattern), &matching_paths));
+
+  std::unordered_map<string, string> file_path_to_api_def;
+  for (auto& file_path : matching_paths) {
+    string file_contents;
+    TF_CHECK_OK(ReadFileToString(env, file_path, &file_contents));
+    file_path_to_api_def[file_path] = file_contents;
   }
+  return file_path_to_api_def;
 }
 
-// Checks that output arg names in an ApiDef match output
-// arg names in corresponding OpDef.
-TEST_F(ApiTest, AllApiDefOutputArgsAreValid) {
-  for (const auto& op : ops_.op()) {
-    const auto& api_def = api_defs_map_[op.name()];
-    for (const auto& api_def_arg : api_def.out_arg()) {
-      bool found_arg = false;
-      for (const auto& op_arg : op.output_arg()) {
-        if (api_def_arg.name() == op_arg.name()) {
-          found_arg = true;
-          break;
-        }
-      }
-      ASSERT_TRUE(found_arg)
-          << "Output argument " << api_def_arg.name()
-          << " (overwritten in api_def_" << op.name()
-          << ".pbtxt) is not defined in OpDef for " << op.name();
+void RunApiTest(bool update_api_def, const string& api_files_dir) {
+  // Read C++ overrides file
+  OpGenOverrides overrides;
+  Env* env = Env::Default();
+  TF_EXPECT_OK(ReadTextProto(env, kOverridesFilePath, &overrides));
+
+  // Read all ops
+  OpList ops;
+  OpRegistry::Global()->Export(false, &ops);
+  const std::vector<string> multi_line_fields = {"description"};
+
+  // Get expected ApiDefs
+  const auto new_api_defs_map = GenerateApiDef(api_files_dir, ops, overrides);
+
+  bool updated_at_least_one_file = false;
+  const auto golden_api_defs_map = GetGoldenApiDefs(env, api_files_dir);
+
+  for (auto new_api_entry : new_api_defs_map) {
+    const auto& file_path = new_api_entry.first;
+    std::string golden_api_defs_str = "";
+    if (golden_api_defs_map.find(file_path) != golden_api_defs_map.end()) {
+      golden_api_defs_str = golden_api_defs_map.at(file_path);
+    }
+    string new_api_defs_str = new_api_entry.second.DebugString();
+    new_api_defs_str = PBTxtToMultiline(new_api_defs_str, multi_line_fields);
+    if (golden_api_defs_str == new_api_defs_str) {
+      continue;
+    }
+    if (update_api_def) {
+      std::cout << "Updating " << file_path << "..." << std::endl;
+      TF_EXPECT_OK(WriteStringToFile(env, file_path, new_api_defs_str));
+      updated_at_least_one_file = true;
+    } else {
+      EXPECT_EQ(golden_api_defs_str, new_api_defs_str)
+          << "To update golden API files, run "
+          << "tensorflow/core/api_def/update_api_def.sh.";
     }
   }
-}
 
-// Checks that attribute names in an ApiDef match attribute
-// names in corresponding OpDef.
-TEST_F(ApiTest, AllApiDefAttributeNamesAreValid) {
-  for (const auto& op : ops_.op()) {
-    const auto& api_def = api_defs_map_[op.name()];
-    for (const auto& api_def_attr : api_def.attr()) {
-      bool found_attr = false;
-      for (const auto& op_attr : op.attr()) {
-        if (api_def_attr.name() == op_attr.name()) {
-          found_attr = true;
-        }
+  for (const auto& golden_api_entry : golden_api_defs_map) {
+    const auto& file_path = golden_api_entry.first;
+    if (new_api_defs_map.find(file_path) == new_api_defs_map.end()) {
+      if (update_api_def) {
+        std::cout << "Deleting " << file_path << "..." << std::endl;
+        TF_EXPECT_OK(env->DeleteFile(file_path));
+        updated_at_least_one_file = true;
+      } else {
+        EXPECT_EQ("", golden_api_entry.second)
+            << "To update golden API files, run "
+            << "tensorflow/core/api_def/update_api_def.sh.";
       }
-      ASSERT_TRUE(found_attr)
-          << "Attribute " << api_def_attr.name() << " (overwritten in api_def_"
-          << op.name() << ".pbtxt) is not defined in OpDef for " << op.name();
     }
   }
+
+  if (update_api_def && !updated_at_least_one_file) {
+    std::cout << "Api def files are already up to date." << std::endl;
+  }
 }
+
+TEST(ApiTest, GenerateBaseAPIDef) { RunApiTest(false, kDefaultApiDefDir); }
+}  // namespace
 }  // namespace tensorflow
+
+int main(int argc, char** argv) {
+  bool update_api_def = false;
+  tensorflow::string api_files_dir = tensorflow::kDefaultApiDefDir;
+  std::vector<tensorflow::Flag> flag_list = {
+      tensorflow::Flag(
+          "update_api_def", &update_api_def,
+          "Whether to update tensorflow/core/api_def/base_api/api_def*.pbtxt "
+          "files if they differ from expected API."),
+      tensorflow::Flag("api_def_dir", &api_files_dir,
+                       "Base directory of api_def*.pbtxt files.")};
+  std::string usage = tensorflow::Flags::Usage(argv[0], flag_list);
+  bool parsed_values_ok = tensorflow::Flags::Parse(&argc, argv, flag_list);
+  if (!parsed_values_ok) {
+    std::cerr << usage << std::endl;
+    return 2;
+  }
+  if (update_api_def) {
+    tensorflow::port::InitMain(argv[0], &argc, &argv);
+    tensorflow::RunApiTest(update_api_def, api_files_dir);
+    return 0;
+  }
+  testing::InitGoogleTest(&argc, argv);
+  // Run tests
+  return RUN_ALL_TESTS();
+}
diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index 81543448be..c7d9b97461 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -261,7 +261,33 @@ REGISTER_OP("ParallelConcat")
       c->set_output(0, passed_shape);
 
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Concatenates a list of `N` tensors along the first dimension.
+
+The input tensors are all required to have size 1 in the first dimension.
+
+For example:
+
+```
+# 'x' is [[1, 4]]
+# 'y' is [[2, 5]]
+# 'z' is [[3, 6]]
+parallel_concat([x, y, z]) => [[1, 4], [2, 5], [3, 6]]  # Pack along first dim.
+```
+
+The difference between concat and parallel_concat is that concat requires all
+of the inputs be computed before the operation will begin but doesn't require
+that the input shapes be known during graph construction.  Parallel concat
+will copy pieces of the input into the output as they become available, in
+some situations this can provide a performance benefit.
+
+values: Tensors to be concatenated. All must have size 1 in the first dimension
+ and same shape.
+output: The concatenated tensor.
+shape: the final shape of the result; should be equal to the shapes of any input
+ but with the number of input values in the first dimension.
+)doc");
 
 REGISTER_OP("Pack")
     .Input("values: N * T")
@@ -297,7 +323,35 @@ REGISTER_OP("Pack")
 
       c->set_output(0, c->MakeShape(dims));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Packs a list of `N` rank-`R` tensors into one rank-`(R+1)` tensor.
+
+Packs the `N` tensors in `values` into a tensor with rank one higher than each
+tensor in `values`, by packing them along the `axis` dimension.
+Given a list of tensors of shape `(A, B, C)`;
+
+if `axis == 0` then the `output` tensor will have the shape `(N, A, B, C)`.
+if `axis == 1` then the `output` tensor will have the shape `(A, N, B, C)`.
+Etc.
+
+For example:
+
+```
+# 'x' is [1, 4]
+# 'y' is [2, 5]
+# 'z' is [3, 6]
+pack([x, y, z]) => [[1, 4], [2, 5], [3, 6]]  # Pack along first dim.
+pack([x, y, z], axis=1) => [[1, 2, 3], [4, 5, 6]]
+```
+
+This is the opposite of `unpack`.
+
+values: Must be of same shape and type.
+axis: Dimension along which to pack.  Negative values wrap around, so the
+  valid range is `[-(R+1), R+1)`.
+output: The packed tensor.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Unpack")
@@ -333,7 +387,28 @@ REGISTER_OP("Unpack")
       }
       for (int i = 0; i < c->num_outputs(); ++i) c->set_output(i, out);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Unpacks a given dimension of a rank-`R` tensor into `num` rank-`(R-1)` tensors.
+
+Unpacks `num` tensors from `value` by chipping it along the `axis` dimension.
+For example, given a tensor of shape `(A, B, C, D)`;
+
+If `axis == 0` then the i'th tensor in `output` is the slice `value[i, :, :, :]`
+  and each tensor in `output` will have shape `(B, C, D)`. (Note that the
+  dimension unpacked along is gone, unlike `split`).
+
+If `axis == 1` then the i'th tensor in `output` is the slice `value[:, i, :, :]`
+  and each tensor in `output` will have shape `(A, C, D)`.
+Etc.
+
+This is the opposite of `pack`.
+
+value: 1-D or higher, with `axis` dimension size equal to `num`.
+axis: Dimension along which to unpack.  Negative values wrap around, so the
+  valid range is `[-R, R)`.
+output: The list of tensors unpacked from `value`.
+)doc");
 
 // --------------------------------------------------------------------------
 // TODO(josh11b): Remove the >= 2 constraint, once we can rewrite the graph
@@ -346,7 +421,18 @@ REGISTER_OP("Concat")
     .Attr("T: type")
     .SetShapeFn([](InferenceContext* c) {
       return shape_inference::ConcatShape(c, c->num_inputs() - 1);
-    });
+    })
+    .Doc(R"doc(
+Concatenates tensors along one dimension.
+
+concat_dim: 0-D.  The dimension along which to concatenate.  Must be in the
+  range [0, rank(values)).
+values: The `N` Tensors to concatenate. Their ranks and types must match,
+  and their sizes must match in all dimensions except `concat_dim`.
+output: A `Tensor` with the concatenation of values stacked along the
+  `concat_dim` dimension.  This tensor's shape matches that of `values` except
+  in `concat_dim` where it has the sum of the sizes.
+)doc");
 
 REGISTER_OP("ConcatV2")
     .Input("values: N * T")
@@ -355,7 +441,18 @@ REGISTER_OP("ConcatV2")
     .Attr("N: int >= 2")
     .Attr("T: type")
     .Attr("Tidx: {int32, int64} = DT_INT32")
-    .SetShapeFn(shape_inference::ConcatV2Shape);
+    .SetShapeFn(shape_inference::ConcatV2Shape)
+    .Doc(R"doc(
+Concatenates tensors along one dimension.
+
+values: List of `N` Tensors to concatenate. Their ranks and types must match,
+  and their sizes must match in all dimensions except `concat_dim`.
+axis: 0-D.  The dimension along which to concatenate.  Must be in the
+  range [-rank(values), rank(values)).
+output: A `Tensor` with the concatenation of values stacked along the
+  `concat_dim` dimension.  This tensor's shape matches that of `values` except
+  in `concat_dim` where it has the sum of the sizes.
+)doc");
 
 // TODO(vivek.v.rane@intel.com): Prefix the op names with underscore if the ops
 // are not to be made user-accessible.
@@ -389,7 +486,26 @@ REGISTER_OP("ConcatOffset")
         c->set_output(i - 1, c->input(i));
       }
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Computes offsets of concat inputs within its output.
+
+For example:
+
+```
+# 'x' is [2, 2, 7]
+# 'y' is [2, 3, 7]
+# 'z' is [2, 5, 7]
+concat_offset(2, [x, y, z]) => [0, 0, 0], [0, 2, 0], [0, 5, 0]
+```
+
+This is typically used by gradient computations for a concat operation.
+
+concat_dim: The dimension along which to concatenate.
+shape: The `N` int32 vectors representing shape of tensors being concatenated.
+offset: The `N` int32 vectors representing the starting offset
+        of input tensors within the concatenated output.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Split")
@@ -424,7 +540,19 @@ REGISTER_OP("Split")
       }
       for (int i = 0; i < num_split; ++i) c->set_output(i, out);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Splits a tensor into `num_split` tensors along one dimension.
+
+split_dim: 0-D.  The dimension along which to split.  Must be in the range
+  `[-rank(value), rank(value))`.
+num_split: The number of ways to split.  Must evenly divide
+  `value.shape[split_dim]`.
+value: The tensor to split.
+output: They are identically shaped tensors, whose shape matches that of `value`
+  except along `split_dim`, where their sizes are
+  `values.shape[split_dim] / num_split`.
+)doc");
 
 REGISTER_OP("SplitV")
     .Input("value: T")
@@ -519,7 +647,20 @@ REGISTER_OP("SplitV")
       }
 
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Splits a tensor into `num_split` tensors along one dimension.
+
+value: The tensor to split.
+size_splits: list containing the sizes of each output tensor along the split
+             dimension. Must sum to the dimension of value along split_dim.
+             Can contain one -1 indicating that dimension is to be inferred.
+split_dim: 0-D.  The dimension along which to split.  Must be in the range
+  `[-rank(value), rank(value))`.
+output: Tensors whose shape matches that of `value`
+  except along `split_dim`, where their sizes are
+  `size_splits[i]`.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Const")
@@ -538,7 +679,12 @@ REGISTER_OP("Const")
       }
       c->set_output(0, c->MakeShape(dims));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Returns a constant tensor.
+
+value: Attr `value` is the tensor to return.
+)doc");
 
 // --------------------------------------------------------------------------
 // TODO(mgubin): Update the doc when the freeze_graph script supports converting
@@ -548,7 +694,17 @@ REGISTER_OP("ImmutableConst")
     .Attr("shape: shape")
     .Attr("memory_region_name: string")
     .Output("tensor: dtype")
-    .SetShapeFn(shape_inference::ExplicitShape);
+    .SetShapeFn(shape_inference::ExplicitShape)
+    .Doc(R"doc(
+Returns immutable tensor from memory region.
+
+The current implementation memmaps the tensor from a file.
+
+dtype: Type of the returned tensor.
+shape: Shape of the returned tensor.
+memory_region_name: Name of readonly memory region used by the tensor, see
+  NewReadOnlyMemoryRegionFromFile in tensorflow::Env.
+)doc");
 
 REGISTER_OP("GuaranteeConst")
     .Input("input: T")
@@ -558,14 +714,30 @@ REGISTER_OP("GuaranteeConst")
       return UnchangedShape(c);
     })
     // We don't want this to be optimized away.
-    .SetIsStateful();
+    .SetIsStateful()
+    .Doc(R"(
+Gives a guarantee to the TF runtime that the input tensor is a constant.
+
+The runtime is then free to make optimizations based on this.
+
+Only accepts value typed tensors as inputs and rejects resource variable handles
+as input.
+
+Returns the input tensor without modification.
+)");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("ZerosLike")
     .Input("x: T")
     .Output("y: T")
     .Attr("T: type")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Returns a tensor of zeros with the same shape and type as x.
+
+x: a tensor of type T.
+y: a tensor of the same shape and type as x but filled with zeros.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("OnesLike")
@@ -574,7 +746,13 @@ REGISTER_OP("OnesLike")
     .Attr(
         "T: {bfloat16, float, double, int8, uint8, int16, uint16, int32, "
         "int64, complex64, complex128, bool}")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Returns a tensor of ones with the same shape and type as x.
+
+x: a tensor of type T.
+y: a tensor of the same shape and type as x but filled with ones.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Diag")
@@ -589,7 +767,30 @@ REGISTER_OP("Diag")
       TF_RETURN_IF_ERROR(c->Concatenate(in, in, &out));
       c->set_output(0, out);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Returns a diagonal tensor with a given diagonal values.
+
+Given a `diagonal`, this operation returns a tensor with the `diagonal` and
+everything else padded with zeros. The diagonal is computed as follows:
+
+Assume `diagonal` has dimensions [D1,..., Dk], then the output is a tensor of
+rank 2k with dimensions [D1,..., Dk, D1,..., Dk] where:
+
+`output[i1,..., ik, i1,..., ik] = diagonal[i1, ..., ik]` and 0 everywhere else.
+
+For example:
+
+```
+# 'diagonal' is [1, 2, 3, 4]
+tf.diag(diagonal) ==> [[1, 0, 0, 0]
+                       [0, 2, 0, 0]
+                       [0, 0, 3, 0]
+                       [0, 0, 0, 4]]
+```
+
+diagonal: Rank k tensor where k is at most 1.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("DiagPart")
@@ -618,7 +819,33 @@ REGISTER_OP("DiagPart")
       }
       c->set_output(0, c->MakeShape(dims));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Returns the diagonal part of the tensor.
+
+This operation returns a tensor with the `diagonal` part
+of the `input`. The `diagonal` part is computed as follows:
+
+Assume `input` has dimensions `[D1,..., Dk, D1,..., Dk]`, then the output is a
+tensor of rank `k` with dimensions `[D1,..., Dk]` where:
+
+`diagonal[i1,..., ik] = input[i1, ..., ik, i1,..., ik]`.
+
+For example:
+
+```
+# 'input' is [[1, 0, 0, 0]
+              [0, 2, 0, 0]
+              [0, 0, 3, 0]
+              [0, 0, 0, 4]]
+
+tf.diag_part(input) ==> [1, 2, 3, 4]
+```
+
+input: Rank k tensor where k is even and not zero.
+diagonal: The extracted diagonal.
+
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("MatrixDiag")
@@ -638,7 +865,40 @@ REGISTER_OP("MatrixDiag")
           c->Concatenate(in, c->Vector(c->Dim(in, rank - 1)), &out));
       c->set_output(0, out);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Returns a batched diagonal tensor with a given batched diagonal values.
+
+Given a `diagonal`, this operation returns a tensor with the `diagonal` and
+everything else padded with zeros. The diagonal is computed as follows:
+
+Assume `diagonal` has `k` dimensions `[I, J, K, ..., N]`, then the output is a
+tensor of rank `k+1` with dimensions [I, J, K, ..., N, N]` where:
+
+`output[i, j, k, ..., m, n] = 1{m=n} * diagonal[i, j, k, ..., n]`.
+
+For example:
+
+```
+# 'diagonal' is [[1, 2, 3, 4], [5, 6, 7, 8]]
+
+and diagonal.shape = (2, 4)
+
+tf.matrix_diag(diagonal) ==> [[[1, 0, 0, 0]
+                                     [0, 2, 0, 0]
+                                     [0, 0, 3, 0]
+                                     [0, 0, 0, 4]],
+                                    [[5, 0, 0, 0]
+                                     [0, 6, 0, 0]
+                                     [0, 0, 7, 0]
+                                     [0, 0, 0, 8]]]
+
+which has shape (2, 4, 4)
+```
+
+diagonal: Rank `k`, where `k >= 1`.
+output: Rank `k+1`, with `output.shape = diagonal.shape + [diagonal.shape[-1]]`.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("MatrixSetDiag")
@@ -671,7 +931,27 @@ REGISTER_OP("MatrixSetDiag")
       }
       c->set_output(0, output);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Returns a batched matrix tensor with new batched diagonal values.
+
+Given `input` and `diagonal`, this operation returns a tensor with the
+same shape and values as `input`, except for the main diagonal of the
+innermost matrices.  These will be overwritten by the values in `diagonal`.
+
+The output is computed as follows:
+
+Assume `input` has `k+1` dimensions `[I, J, K, ..., M, N]` and `diagonal` has
+`k` dimensions `[I, J, K, ..., min(M, N)]`.  Then the output is a
+tensor of rank `k+1` with dimensions `[I, J, K, ..., M, N]` where:
+
+  * `output[i, j, k, ..., m, n] = diagonal[i, j, k, ..., n]` for `m == n`.
+  * `output[i, j, k, ..., m, n] = input[i, j, k, ..., m, n]` for `m != n`.
+
+input: Rank `k+1`, where `k >= 1`.
+diagonal: Rank `k`, where `k >= 1`.
+output: Rank `k+1`, with `output.shape = input.shape`.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("MatrixDiagPart")
@@ -696,7 +976,43 @@ REGISTER_OP("MatrixDiagPart")
       dims.push_back(min_dim);
       c->set_output(0, c->MakeShape(dims));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Returns the batched diagonal part of a batched tensor.
+
+This operation returns a tensor with the `diagonal` part
+of the batched `input`. The `diagonal` part is computed as follows:
+
+Assume `input` has `k` dimensions `[I, J, K, ..., M, N]`, then the output is a
+tensor of rank `k - 1` with dimensions `[I, J, K, ..., min(M, N)]` where:
+
+`diagonal[i, j, k, ..., n] = input[i, j, k, ..., n, n]`.
+
+The input must be at least a matrix.
+
+For example:
+
+```
+# 'input' is [[[1, 0, 0, 0]
+               [0, 2, 0, 0]
+               [0, 0, 3, 0]
+               [0, 0, 0, 4]],
+              [[5, 0, 0, 0]
+               [0, 6, 0, 0]
+               [0, 0, 7, 0]
+               [0, 0, 0, 8]]]
+
+and input.shape = (2, 4, 4)
+
+tf.matrix_diag_part(input) ==> [[1, 2, 3, 4], [5, 6, 7, 8]]
+
+which has shape (2, 4)
+```
+
+input: Rank `k` tensor where `k >= 2`.
+diagonal: The extracted diagonal(s) having shape
+  `diagonal.shape = input.shape[:-2] + [min(input.shape[-2:])]`.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("MatrixBandPart")
@@ -705,7 +1021,57 @@ REGISTER_OP("MatrixBandPart")
     .Input("num_upper: int64")
     .Output("band: T")
     .Attr("T: type")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Copy a tensor setting everything outside a central band in each innermost matrix
+to zero.
+
+The `band` part is computed as follows:
+Assume `input` has `k` dimensions `[I, J, K, ..., M, N]`, then the output is a
+tensor with the same shape where
+
+`band[i, j, k, ..., m, n] = in_band(m, n) * input[i, j, k, ..., m, n]`.
+
+The indicator function
+
+`in_band(m, n) = (num_lower < 0 || (m-n) <= num_lower)) &&
+                 (num_upper < 0 || (n-m) <= num_upper)`.
+
+For example:
+
+```
+# if 'input' is [[ 0,  1,  2, 3]
+                 [-1,  0,  1, 2]
+                 [-2, -1,  0, 1]
+                 [-3, -2, -1, 0]],
+
+tf.matrix_band_part(input, 1, -1) ==> [[ 0,  1,  2, 3]
+                                       [-1,  0,  1, 2]
+                                       [ 0, -1,  0, 1]
+                                       [ 0,  0, -1, 0]],
+
+tf.matrix_band_part(input, 2, 1) ==> [[ 0,  1,  0, 0]
+                                      [-1,  0,  1, 0]
+                                      [-2, -1,  0, 1]
+                                      [ 0, -2, -1, 0]]
+```
+
+Useful special cases:
+
+```
+ tf.matrix_band_part(input, 0, -1) ==> Upper triangular part.
+ tf.matrix_band_part(input, -1, 0) ==> Lower triangular part.
+ tf.matrix_band_part(input, 0, 0) ==> Diagonal.
+```
+
+input: Rank `k` tensor.
+num_lower: 0-D tensor. Number of subdiagonals to keep. If negative, keep entire
+           lower triangle.
+num_upper: 0-D tensor. Number of superdiagonals to keep. If negative, keep
+           entire upper triangle.
+band: Rank `k` tensor of the same shape as input. The extracted banded tensor.
+
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Reverse")
@@ -729,7 +1095,59 @@ REGISTER_OP("Reverse")
       }
       c->set_output(0, input);
       return Status::OK();
-    });
+    })
+    .Doc(R"Doc(
+Reverses specific dimensions of a tensor.
+
+Given a `tensor`, and a `bool` tensor `dims` representing the dimensions
+of `tensor`, this operation reverses each dimension i of `tensor` where
+`dims[i]` is `True`.
+
+`tensor` can have up to 8 dimensions. The number of dimensions
+of `tensor` must equal the number of elements in `dims`. In other words:
+
+`rank(tensor) = size(dims)`
+
+For example:
+
+```
+# tensor 't' is [[[[ 0,  1,  2,  3],
+#                  [ 4,  5,  6,  7],
+#                  [ 8,  9, 10, 11]],
+#                 [[12, 13, 14, 15],
+#                  [16, 17, 18, 19],
+#                  [20, 21, 22, 23]]]]
+# tensor 't' shape is [1, 2, 3, 4]
+
+# 'dims' is [False, False, False, True]
+reverse(t, dims) ==> [[[[ 3,  2,  1,  0],
+                        [ 7,  6,  5,  4],
+                        [ 11, 10, 9, 8]],
+                       [[15, 14, 13, 12],
+                        [19, 18, 17, 16],
+                        [23, 22, 21, 20]]]]
+
+# 'dims' is [False, True, False, False]
+reverse(t, dims) ==> [[[[12, 13, 14, 15],
+                        [16, 17, 18, 19],
+                        [20, 21, 22, 23]
+                       [[ 0,  1,  2,  3],
+                        [ 4,  5,  6,  7],
+                        [ 8,  9, 10, 11]]]]
+
+# 'dims' is [False, False, True, False]
+reverse(t, dims) ==> [[[[8, 9, 10, 11],
+                        [4, 5, 6, 7],
+                        [0, 1, 2, 3]]
+                       [[20, 21, 22, 23],
+                        [16, 17, 18, 19],
+                        [12, 13, 14, 15]]]]
+```
+
+tensor: Up to 8-D.
+dims: 1-D. The dimensions to reverse.
+output: The same shape as `tensor`.
+)Doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("ReverseV2")
@@ -751,7 +1169,62 @@ REGISTER_OP("ReverseV2")
       }
       c->set_output(0, input);
       return Status::OK();
-    });
+    })
+    .Doc(R"Doc(
+Reverses specific dimensions of a tensor.
+
+NOTE `tf.reverse` has now changed behavior in preparation for 1.0.
+`tf.reverse_v2` is currently an alias that will be deprecated before TF 1.0.
+
+Given a `tensor`, and a `int32` tensor `axis` representing the set of
+dimensions of `tensor` to reverse. This operation reverses each dimension
+`i` for which there exists `j` s.t. `axis[j] == i`.
+
+`tensor` can have up to 8 dimensions. The number of dimensions specified
+in `axis` may be 0 or more entries. If an index is specified more than
+once, a InvalidArgument error is raised.
+
+For example:
+
+```
+# tensor 't' is [[[[ 0,  1,  2,  3],
+#                  [ 4,  5,  6,  7],
+#                  [ 8,  9, 10, 11]],
+#                 [[12, 13, 14, 15],
+#                  [16, 17, 18, 19],
+#                  [20, 21, 22, 23]]]]
+# tensor 't' shape is [1, 2, 3, 4]
+
+# 'dims' is [3] or 'dims' is [-1]
+reverse(t, dims) ==> [[[[ 3,  2,  1,  0],
+                        [ 7,  6,  5,  4],
+                        [ 11, 10, 9, 8]],
+                       [[15, 14, 13, 12],
+                        [19, 18, 17, 16],
+                        [23, 22, 21, 20]]]]
+
+# 'dims' is '[1]' (or 'dims' is '[-3]')
+reverse(t, dims) ==> [[[[12, 13, 14, 15],
+                        [16, 17, 18, 19],
+                        [20, 21, 22, 23]
+                       [[ 0,  1,  2,  3],
+                        [ 4,  5,  6,  7],
+                        [ 8,  9, 10, 11]]]]
+
+# 'dims' is '[2]' (or 'dims' is '[-2]')
+reverse(t, dims) ==> [[[[8, 9, 10, 11],
+                        [4, 5, 6, 7],
+                        [0, 1, 2, 3]]
+                       [[20, 21, 22, 23],
+                        [16, 17, 18, 19],
+                        [12, 13, 14, 15]]]]
+```
+
+tensor: Up to 8-D.
+axis: 1-D. The indices of the dimensions to reverse. Must be in the range
+  `[-rank(tensor), rank(tensor))`.
+output: The same shape as `tensor`.
+)Doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("EditDistance")
@@ -793,7 +1266,65 @@ REGISTER_OP("EditDistance")
 
       c->set_output(0, c->MakeShape(dims));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Computes the (possibly normalized) Levenshtein Edit Distance.
+
+The inputs are variable-length sequences provided by SparseTensors
+  (hypothesis_indices, hypothesis_values, hypothesis_shape)
+and
+  (truth_indices, truth_values, truth_shape).
+
+The inputs are:
+
+hypothesis_indices: The indices of the hypothesis list SparseTensor.
+  This is an N x R int64 matrix.
+hypothesis_values: The values of the hypothesis list SparseTensor.
+  This is an N-length vector.
+hypothesis_shape: The shape of the hypothesis list SparseTensor.
+  This is an R-length vector.
+truth_indices: The indices of the truth list SparseTensor.
+  This is an M x R int64 matrix.
+truth_values: The values of the truth list SparseTensor.
+  This is an M-length vector.
+truth_shape: The shape of the truth list SparseTensor.
+  This is an R-length vector.
+truth_shape: truth indices, vector.
+normalize: boolean (if true, edit distances are normalized by length of truth).
+
+The output is:
+
+output: A dense float tensor with rank R - 1.
+
+For the example input:
+
+    // hypothesis represents a 2x1 matrix with variable-length values:
+    //   (0,0) = ["a"]
+    //   (1,0) = ["b"]
+    hypothesis_indices = [[0, 0, 0],
+                          [1, 0, 0]]
+    hypothesis_values = ["a", "b"]
+    hypothesis_shape = [2, 1, 1]
+
+    // truth represents a 2x2 matrix with variable-length values:
+    //   (0,0) = []
+    //   (0,1) = ["a"]
+    //   (1,0) = ["b", "c"]
+    //   (1,1) = ["a"]
+    truth_indices = [[0, 1, 0],
+                     [1, 0, 0],
+                     [1, 0, 1],
+                     [1, 1, 0]]
+    truth_values = ["a", "b", "c", "a"]
+    truth_shape = [2, 2, 2]
+    normalize = true
+
+The output will be:
+
+    // output is a 2x2 matrix with edit distances normalized by truth lengths.
+    output = [[inf, 1.0],  // (0,0): no truth, (0,1): no hypothesis
+              [0.5, 1.0]]  // (1,0): addition, (1,1): no hypothesis
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Fill")
@@ -826,7 +1357,27 @@ REGISTER_OP("Fill")
       TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &out));
       c->set_output(0, out);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Creates a tensor filled with a scalar value.
+
+This operation creates a tensor of shape `dims` and fills it with `value`.
+
+For example:
+
+```
+# Output tensor has shape [2, 3].
+fill([2, 3], 9) ==> [[9, 9, 9]
+                     [9, 9, 9]]
+```
+
+dims: 1-D. Represents the shape of the output tensor.
+value: 0-D (scalar). Value to fill the returned tensor.
+
+@compatibility(numpy)
+Equivalent to np.full
+@end_compatibility
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("_ParallelConcatStart")
@@ -888,7 +1439,36 @@ REGISTER_OP("Gather")
       TF_RETURN_IF_ERROR(c->Concatenate(indices_shape, params_subshape, &out));
       c->set_output(0, out);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Gather slices from `params` according to `indices`.
+
+`indices` must be an integer tensor of any dimension (usually 0-D or 1-D).
+Produces an output tensor with shape `indices.shape + params.shape[1:]` where:
+
+```python
+    # Scalar indices
+    output[:, ..., :] = params[indices, :, ... :]
+
+    # Vector indices
+    output[i, :, ..., :] = params[indices[i], :, ... :]
+
+    # Higher rank indices
+    output[i, ..., j, :, ... :] = params[indices[i, ..., j], :, ..., :]
+```
+
+If `indices` is a permutation and `len(indices) == params.shape[0]` then
+this operation will permute `params` accordingly.
+
+`validate_indices`: DEPRECATED. If this operation is assigned to CPU, values in
+`indices` are always validated to be within range. If assigned to GPU,
+out-of-bound indices result in safe but unspecified behavior, which may include
+raising an error.
+
+<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="https://www.tensorflow.org/images/Gather.png" alt>
+</div>
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("GatherV2")
@@ -954,7 +1534,40 @@ REGISTER_OP("GatherV2")
 
       c->set_output(0, out);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Gather slices from `params` axis `axis` according to `indices`.
+
+`indices` must be an integer tensor of any dimension (usually 0-D or 1-D).
+Produces an output tensor with shape `params.shape[:axis] + indices.shape +
+params.shape[axis + 1:]` where:
+
+```python
+    # Scalar indices (output is rank(params) - 1).
+    output[a_0, ..., a_n, b_0, ..., b_n] =
+      params[a_0, ..., a_n, indices, b_0, ..., b_n]
+
+    # Vector indices (output is rank(params)).
+    output[a_0, ..., a_n, i, b_0, ..., b_n] =
+      params[a_0, ..., a_n, indices[i], b_0, ..., b_n]
+
+    # Higher rank indices (output is rank(params) + rank(indices) - 1).
+    output[a_0, ..., a_n, i, ..., j, b_0, ... b_n] =
+      params[a_0, ..., a_n, indices[i, ..., j], b_0, ..., b_n]
+```
+
+<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="https://www.tensorflow.org/images/Gather.png" alt>
+</div>
+
+params: The tensor from which to gather values. Must be at least rank
+  `axis + 1`.
+indices: Index tensor. Must be in range `[0, params.shape[axis])`.
+axis: The axis in `params` to gather `indices` from. Defaults to the first
+  dimension. Supports negative indexes.
+output: Values from `params` gathered from indices given by `indices`, with
+  shape `params.shape[:axis] + indices.shape + params.shape[axis + 1:]`.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("GatherNd")
@@ -990,7 +1603,114 @@ REGISTER_OP("GatherNd")
       TF_RETURN_IF_ERROR(c->Concatenate(indices_slice, params_slice, &out));
       c->set_output(0, out);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Gather slices from `params` into a Tensor with shape specified by `indices`.
+
+`indices` is an K-dimensional integer tensor, best thought of as a
+(K-1)-dimensional tensor of indices into `params`, where each element defines a
+slice of `params`:
+
+    output[i_0, ..., i_{K-2}] = params[indices[i0, ..., i_{K-2}]]
+
+Whereas in @{tf.gather} `indices` defines slices into the first
+dimension of `params`, in `tf.gather_nd`, `indices` defines slices into the
+first `N` dimensions of `params`, where `N = indices.shape[-1]`.
+
+The last dimension of `indices` can be at most the rank of
+`params`:
+
+    indices.shape[-1] <= params.rank
+
+The last dimension of `indices` corresponds to elements
+(if `indices.shape[-1] == params.rank`) or slices
+(if `indices.shape[-1] < params.rank`) along dimension `indices.shape[-1]`
+of `params`.  The output tensor has shape
+
+    indices.shape[:-1] + params.shape[indices.shape[-1]:]
+
+Some examples below.
+
+Simple indexing into a matrix:
+
+```python
+    indices = [[0, 0], [1, 1]]
+    params = [['a', 'b'], ['c', 'd']]
+    output = ['a', 'd']
+```
+
+Slice indexing into a matrix:
+
+```python
+    indices = [[1], [0]]
+    params = [['a', 'b'], ['c', 'd']]
+    output = [['c', 'd'], ['a', 'b']]
+```
+
+Indexing into a 3-tensor:
+
+```python
+    indices = [[1]]
+    params = [[['a0', 'b0'], ['c0', 'd0']],
+              [['a1', 'b1'], ['c1', 'd1']]]
+    output = [[['a1', 'b1'], ['c1', 'd1']]]
+
+
+    indices = [[0, 1], [1, 0]]
+    params = [[['a0', 'b0'], ['c0', 'd0']],
+              [['a1', 'b1'], ['c1', 'd1']]]
+    output = [['c0', 'd0'], ['a1', 'b1']]
+
+
+    indices = [[0, 0, 1], [1, 0, 1]]
+    params = [[['a0', 'b0'], ['c0', 'd0']],
+              [['a1', 'b1'], ['c1', 'd1']]]
+    output = ['b0', 'b1']
+```
+
+Batched indexing into a matrix:
+
+```python
+    indices = [[[0, 0]], [[0, 1]]]
+    params = [['a', 'b'], ['c', 'd']]
+    output = [['a'], ['b']]
+```
+
+Batched slice indexing into a matrix:
+
+```python
+    indices = [[[1]], [[0]]]
+    params = [['a', 'b'], ['c', 'd']]
+    output = [[['c', 'd']], [['a', 'b']]]
+```
+
+Batched indexing into a 3-tensor:
+
+```python
+    indices = [[[1]], [[0]]]
+    params = [[['a0', 'b0'], ['c0', 'd0']],
+              [['a1', 'b1'], ['c1', 'd1']]]
+    output = [[[['a1', 'b1'], ['c1', 'd1']]],
+              [[['a0', 'b0'], ['c0', 'd0']]]]
+
+    indices = [[[0, 1], [1, 0]], [[0, 0], [1, 1]]]
+    params = [[['a0', 'b0'], ['c0', 'd0']],
+              [['a1', 'b1'], ['c1', 'd1']]]
+    output = [[['c0', 'd0'], ['a1', 'b1']],
+              [['a0', 'b0'], ['c1', 'd1']]]
+
+
+    indices = [[[0, 0, 1], [1, 0, 1]], [[0, 1, 1], [1, 1, 0]]]
+    params = [[['a0', 'b0'], ['c0', 'd0']],
+              [['a1', 'b1'], ['c1', 'd1']]]
+    output = [['b0', 'b1'], ['d0', 'c1']]
+```
+
+params: The tensor from which to gather values.
+indices: Index tensor.
+output: Values from `params` gathered from indices given by `indices`, with
+  shape `indices.shape[:-1] + params.shape[indices.shape[-1]:]`.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Identity")
@@ -1004,7 +1724,10 @@ REGISTER_OP("Identity")
         c->set_output_handle_shapes_and_types(0, *handle_data);
       }
       return Status::OK();
-    });
+    })
+    .Doc(R"Doc(
+Return a tensor with the same shape and contents as the input tensor or value.
+)Doc");
 
 REGISTER_OP("Snapshot")
     .Input("input: T")
@@ -1017,7 +1740,8 @@ REGISTER_OP("Snapshot")
         c->set_output_handle_shapes_and_types(0, *handle_data);
       }
       return Status::OK();
-    });
+    })
+    .Doc(R"Doc(Returns a copy of the input tensor.)Doc");
 
 #ifdef INTEL_MKL
 REGISTER_OP("_MklIdentity")
@@ -1047,7 +1771,25 @@ REGISTER_OP("IdentityN")
       TF_RETURN_IF_ERROR(c->input("input", &input));
       TF_RETURN_IF_ERROR(c->set_output("output", input));
       return Status::OK();
-    });
+    })
+    .Doc(R"Doc(
+Returns a list of tensors with the same shapes and contents as the input
+tensors.
+
+This op can be used to override the gradient for complicated functions. For
+example, suppose y = f(x) and we wish to apply a custom function g for backprop
+such that dx = g(dy). In Python,
+
+```python
+with tf.get_default_graph().gradient_override_map(
+    {'IdentityN': 'OverrideGradientWithG'}):
+  y, _ = identity_n([f(x), x])
+
+@tf.RegisterGradient('OverrideGradientWithG')
+def ApplyG(op, dy, _):
+  return [None, g(dy)]  # Do not backprop to f(x).
+```
+)Doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("RefIdentity")
@@ -1055,7 +1797,10 @@ REGISTER_OP("RefIdentity")
     .Output("output: Ref(T)")
     .Attr("T: type")
     .SetShapeFn(shape_inference::UnchangedShape)
-    .SetAllowsUninitializedInput();
+    .SetAllowsUninitializedInput()
+    .Doc(R"Doc(
+Return the same ref tensor as the input ref tensor.
+)Doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("DebugGradientIdentity")
@@ -1063,21 +1808,66 @@ REGISTER_OP("DebugGradientIdentity")
     .Output("output: T")
     .Attr("T: type")
     .SetShapeFn(shape_inference::UnchangedShape)
-    .SetAllowsUninitializedInput();
+    .SetAllowsUninitializedInput()
+    .Doc(R"Doc(
+Identity op for gradient debugging.
+
+This op is hidden from public in Python. It is used by TensorFlow Debugger to
+register gradient tensors for gradient debugging.
+)Doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("StopGradient")
     .Input("input: T")
     .Output("output: T")
     .Attr("T: type")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"Doc(
+Stops gradient computation.
+
+When executed in a graph, this op outputs its input tensor as-is.
+
+When building ops to compute gradients, this op prevents the contribution of
+its inputs to be taken into account.  Normally, the gradient generator adds ops
+to a graph to compute the derivatives of a specified 'loss' by recursively
+finding out inputs that contributed to its computation.  If you insert this op
+in the graph it inputs are masked from the gradient generator.  They are not
+taken into account for computing gradients.
+
+This is useful any time you want to compute a value with TensorFlow but need
+to pretend that the value was a constant. Some examples include:
+
+*  The *EM* algorithm where the *M-step* should not involve backpropagation
+   through the output of the *E-step*.
+*  Contrastive divergence training of Boltzmann machines where, when
+   differentiating the energy function, the training must not backpropagate
+   through the graph that generated the samples from the model.
+*  Adversarial training, where no backprop should happen through the adversarial
+   example generation process.
+)Doc");
 
 REGISTER_OP("PreventGradient")
     .Input("input: T")
     .Output("output: T")
     .Attr("T: type")
     .Attr("message: string = ''")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"Doc(
+An identity op that triggers an error if a gradient is requested.
+
+When executed in a graph, this op outputs its input tensor as-is.
+
+When building ops to compute gradients, the TensorFlow gradient system
+will return an error when trying to lookup the gradient of this op,
+because no gradient must ever be registered for this function.  This
+op exists to prevent subtle bugs from silently returning unimplemented
+gradients in some corner cases.
+
+input: any tensor.
+output: the same input tensor.
+message: Will be printed in the error when anyone tries to differentiate
+this operation.
+)Doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("CheckNumerics")
@@ -1085,7 +1875,15 @@ REGISTER_OP("CheckNumerics")
     .Output("output: T")
     .Attr("T: {half, bfloat16, float, double}")
     .Attr("message: string")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Checks a tensor for NaN and Inf values.
+
+When run, reports an `InvalidArgument` error if `tensor` has any values
+that are not a number (NaN) or infinity (Inf). Otherwise, passes `tensor` as-is.
+
+message: Prefix of the error message.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Reshape")
@@ -1094,9 +1892,69 @@ REGISTER_OP("Reshape")
     .Output("output: T")
     .Attr("T: type")
     .Attr("Tshape: {int32, int64} = DT_INT32")
-    .SetShapeFn([](InferenceContext* c) {
-      return SetOutputShapeForReshape(c);
-    });
+    .SetShapeFn([](InferenceContext* c) { return SetOutputShapeForReshape(c); })
+    .Doc(R"Doc(
+Reshapes a tensor.
+
+Given `tensor`, this operation returns a tensor that has the same values
+as `tensor` with shape `shape`.
+
+If one component of `shape` is the special value -1, the size of that dimension
+is computed so that the total size remains constant.  In particular, a `shape`
+of `[-1]` flattens into 1-D.  At most one component of `shape` can be -1.
+
+If `shape` is 1-D or higher, then the operation returns a tensor with shape
+`shape` filled with the values of `tensor`. In this case, the number of elements
+implied by `shape` must be the same as the number of elements in `tensor`.
+
+For example:
+
+```
+# tensor 't' is [1, 2, 3, 4, 5, 6, 7, 8, 9]
+# tensor 't' has shape [9]
+reshape(t, [3, 3]) ==> [[1, 2, 3],
+                        [4, 5, 6],
+                        [7, 8, 9]]
+
+# tensor 't' is [[[1, 1], [2, 2]],
+#                [[3, 3], [4, 4]]]
+# tensor 't' has shape [2, 2, 2]
+reshape(t, [2, 4]) ==> [[1, 1, 2, 2],
+                        [3, 3, 4, 4]]
+
+# tensor 't' is [[[1, 1, 1],
+#                 [2, 2, 2]],
+#                [[3, 3, 3],
+#                 [4, 4, 4]],
+#                [[5, 5, 5],
+#                 [6, 6, 6]]]
+# tensor 't' has shape [3, 2, 3]
+# pass '[-1]' to flatten 't'
+reshape(t, [-1]) ==> [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6]
+
+# -1 can also be used to infer the shape
+
+# -1 is inferred to be 9:
+reshape(t, [2, -1]) ==> [[1, 1, 1, 2, 2, 2, 3, 3, 3],
+                         [4, 4, 4, 5, 5, 5, 6, 6, 6]]
+# -1 is inferred to be 2:
+reshape(t, [-1, 9]) ==> [[1, 1, 1, 2, 2, 2, 3, 3, 3],
+                         [4, 4, 4, 5, 5, 5, 6, 6, 6]]
+# -1 is inferred to be 3:
+reshape(t, [ 2, -1, 3]) ==> [[[1, 1, 1],
+                              [2, 2, 2],
+                              [3, 3, 3]],
+                             [[4, 4, 4],
+                              [5, 5, 5],
+                              [6, 6, 6]]]
+
+# tensor 't' is [7]
+# shape `[]` reshapes to a scalar
+reshape(t, []) ==> 7
+```
+
+shape: Defines the shape of the output tensor.
+)Doc");
 
 #ifdef INTEL_MKL
 REGISTER_OP("_MklReshape")
@@ -1123,7 +1981,29 @@ REGISTER_OP("InvertPermutation")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &x));
       c->set_output(0, x);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Computes the inverse permutation of a tensor.
+
+This operation computes the inverse of an index permutation. It takes a 1-D
+integer tensor `x`, which represents the indices of a zero-based array, and
+swaps each value with its index position. In other words, for an output tensor
+`y` and an input tensor `x`, this operation computes the following:
+
+`y[x[i]] = i for i in [0, 1, ..., len(x) - 1]`
+
+The values must include 0. There can be no duplicate values or negative values.
+
+For example:
+
+```
+# tensor `x` is [3, 4, 0, 2, 1]
+invert_permutation(x) ==> [2, 4, 3, 0, 1]
+```
+
+x: 1-D.
+y: 1-D.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Transpose")
@@ -1132,7 +2012,13 @@ REGISTER_OP("Transpose")
     .Output("y: T")
     .Attr("T: type")
     .Attr("Tperm: {int32, int64} = DT_INT32")
-    .SetShapeFn(TransposeShapeFn);
+    .SetShapeFn(TransposeShapeFn)
+    .Doc(R"doc(
+Shuffle dimensions of x according to a permutation.
+
+The output `y` has the same rank as `x`. The shapes of `x` and `y` satisfy:
+  `y.shape[i] == x.shape[perm[i]] for i in [0, 1, ..., rank(x) - 1]`
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("ConjugateTranspose")
@@ -1141,7 +2027,14 @@ REGISTER_OP("ConjugateTranspose")
     .Output("y: T")
     .Attr("T: type")
     .Attr("Tperm: {int32, int64} = DT_INT32")
-    .SetShapeFn(TransposeShapeFn);
+    .SetShapeFn(TransposeShapeFn)
+    .Doc(R"doc(
+Shuffle dimensions of x according to a permutation and conjugate the result.
+
+The output `y` has the same rank as `x`. The shapes of `x` and `y` satisfy:
+  `y.shape[i] == x.shape[perm[i]] for i in [0, 1, ..., rank(x) - 1]`
+  `y[i,j,k,...,s,t,u] == conj(x[perm[i], perm[j], perm[k],...,perm[s], perm[t], perm[u]])`
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Unique")
@@ -1154,7 +2047,30 @@ REGISTER_OP("Unique")
       c->set_output(0, c->Vector(InferenceContext::kUnknownDim));
       c->set_output(1, c->input(0));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Finds unique elements in a 1-D tensor.
+
+This operation returns a tensor `y` containing all of the unique elements of `x`
+sorted in the same order that they occur in `x`. This operation also returns a
+tensor `idx` the same size as `x` that contains the index of each value of `x`
+in the unique output `y`. In other words:
+
+`y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]`
+
+For example:
+
+```
+# tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8]
+y, idx = unique(x)
+y ==> [1, 2, 4, 7, 8]
+idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4]
+```
+
+x: 1-D.
+y: 1-D.
+idx: 1-D.
+)doc");
 
 REGISTER_OP("UniqueV2")
     .Input("x: T")
@@ -1167,7 +2083,34 @@ REGISTER_OP("UniqueV2")
       c->set_output(0, c->Vector(InferenceContext::kUnknownDim));
       c->set_output(1, c->input(0));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Finds unique elements in a 1-D tensor.
+
+This operation returns a tensor `y` containing all of the unique elements of `x`
+sorted in the same order that they occur in `x`. This operation also returns a
+tensor `idx` the same size as `x` that contains the index of each value of `x`
+in the unique output `y`. In other words:
+
+`y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]`
+
+For example:
+
+```
+# tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8]
+y, idx = unique(x)
+y ==> [1, 2, 4, 7, 8]
+idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4]
+```
+
+
+x: A `Tensor`.
+axis: A `Tensor` of type `int64` (default: 0). The axis of the Tensor to
+  find the unique elements.
+y: A `Tensor`. Unique elements along the `axis` of `Tensor` x.
+idx: A 1-D Tensor. Has the same type as x that contains the index of each
+  value of x in the output y.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("UniqueWithCounts")
@@ -1183,7 +2126,33 @@ REGISTER_OP("UniqueWithCounts")
       c->set_output(1, c->input(0));
       c->set_output(2, uniq);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Finds unique elements in a 1-D tensor.
+
+This operation returns a tensor `y` containing all of the unique elements of `x`
+sorted in the same order that they occur in `x`. This operation also returns a
+tensor `idx` the same size as `x` that contains the index of each value of `x`
+in the unique output `y`. Finally, it returns a third tensor `count` that
+contains the count of each element of `y` in `x`. In other words:
+
+`y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]`
+
+For example:
+
+```
+# tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8]
+y, idx, count = unique_with_counts(x)
+y ==> [1, 2, 4, 7, 8]
+idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4]
+count ==> [2, 1, 3, 1, 2]
+```
+
+x: 1-D.
+y: 1-D.
+idx: 1-D.
+count: 1-D.
+)doc");
 
 namespace {
 
@@ -1208,7 +2177,20 @@ REGISTER_OP("Shape")
     .Output("output: out_type")
     .Attr("T: type")
     .Attr("out_type: {int32, int64} = DT_INT32")
-    .SetShapeFn(ShapeShapeFn);
+    .SetShapeFn(ShapeShapeFn)
+    .Doc(R"doc(
+Returns the shape of a tensor.
+
+This operation returns a 1-D integer tensor representing the shape of `input`.
+
+For example:
+
+```
+# 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]
+shape(t) ==> [2, 2, 3]
+```
+
+)doc");
 
 REGISTER_OP("ShapeN")
     .Input("input: N * T")
@@ -1216,7 +2198,12 @@ REGISTER_OP("ShapeN")
     .Attr("N: int")
     .Attr("T: type")
     .Attr("out_type: {int32, int64} = DT_INT32")
-    .SetShapeFn(ShapeShapeFn);
+    .SetShapeFn(ShapeShapeFn)
+    .Doc(R"doc(
+Returns shape of tensors.
+
+This operation returns N 1-D integer tensors representing shape of `input[i]s`.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("ReverseSequence")
@@ -1262,14 +2249,96 @@ REGISTER_OP("ReverseSequence")
           c->ReplaceDim(input, batch_dim, batch_dim_dim, &output_shape));
       c->set_output(0, output_shape);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Reverses variable length slices.
+
+This op first slices `input` along the dimension `batch_dim`, and for each
+slice `i`, reverses the first `seq_lengths[i]` elements along
+the dimension `seq_dim`.
+
+The elements of `seq_lengths` must obey `seq_lengths[i] <= input.dims[seq_dim]`,
+and `seq_lengths` must be a vector of length `input.dims[batch_dim]`.
+
+The output slice `i` along dimension `batch_dim` is then given by input
+slice `i`, with the first `seq_lengths[i]` slices along dimension
+`seq_dim` reversed.
+
+For example:
+
+```
+# Given this:
+batch_dim = 0
+seq_dim = 1
+input.dims = (4, 8, ...)
+seq_lengths = [7, 2, 3, 5]
+
+# then slices of input are reversed on seq_dim, but only up to seq_lengths:
+output[0, 0:7, :, ...] = input[0, 7:0:-1, :, ...]
+output[1, 0:2, :, ...] = input[1, 2:0:-1, :, ...]
+output[2, 0:3, :, ...] = input[2, 3:0:-1, :, ...]
+output[3, 0:5, :, ...] = input[3, 5:0:-1, :, ...]
+
+# while entries past seq_lens are copied through:
+output[0, 7:, :, ...] = input[0, 7:, :, ...]
+output[1, 2:, :, ...] = input[1, 2:, :, ...]
+output[2, 3:, :, ...] = input[2, 3:, :, ...]
+output[3, 2:, :, ...] = input[3, 2:, :, ...]
+```
+
+In contrast, if:
+
+```
+# Given this:
+batch_dim = 2
+seq_dim = 0
+input.dims = (8, ?, 4, ...)
+seq_lengths = [7, 2, 3, 5]
+
+# then slices of input are reversed on seq_dim, but only up to seq_lengths:
+output[0:7, :, 0, :, ...] = input[7:0:-1, :, 0, :, ...]
+output[0:2, :, 1, :, ...] = input[2:0:-1, :, 1, :, ...]
+output[0:3, :, 2, :, ...] = input[3:0:-1, :, 2, :, ...]
+output[0:5, :, 3, :, ...] = input[5:0:-1, :, 3, :, ...]
+
+# while entries past seq_lens are copied through:
+output[7:, :, 0, :, ...] = input[7:, :, 0, :, ...]
+output[2:, :, 1, :, ...] = input[2:, :, 1, :, ...]
+output[3:, :, 2, :, ...] = input[3:, :, 2, :, ...]
+output[2:, :, 3, :, ...] = input[2:, :, 3, :, ...]
+```
+
+input: The input to reverse.
+seq_lengths: 1-D with length `input.dims(batch_dim)` and
+  `max(seq_lengths) <= input.dims(seq_dim)`
+seq_dim: The dimension which is partially reversed.
+batch_dim: The dimension along which reversal is performed.
+output: The partially reversed input. It has the same shape as `input`.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Rank")
     .Input("input: T")
     .Output("output: int32")
     .Attr("T: type")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Returns the rank of a tensor.
+
+This operation returns an integer representing the rank of `input`.
+
+For example:
+
+```
+# 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]
+# shape of tensor 't' is [2, 2, 3]
+rank(t) ==> 3
+```
+
+**Note**: The rank of a tensor is not the same as the rank of a matrix. The rank
+of a tensor is the number of indices required to uniquely select each element
+of the tensor. Rank is also known as "order", "degree", or "ndims."
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Size")
@@ -1277,7 +2346,21 @@ REGISTER_OP("Size")
     .Output("output: out_type")
     .Attr("T: type")
     .Attr("out_type: {int32, int64} = DT_INT32")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Returns the size of a tensor.
+
+This operation returns an integer representing the number of elements in
+`input`.
+
+For example:
+
+```
+# 't' is [[[1, 1,, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]]
+size(t) ==> 12
+```
+
+)doc");
 
 namespace {
 
@@ -1394,7 +2477,24 @@ REGISTER_OP("Slice")
       }
 
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Return a slice from 'input'.
+
+The output tensor is a tensor with dimensions described by 'size'
+whose values are extracted from 'input' starting at the offsets in
+'begin'.
+
+*Requirements*:
+  0 <= begin[i] <= begin[i] + size[i] <= Di  for i in [0, n)
+
+begin: begin[i] specifies the offset into the 'i'th dimension of
+  'input' to slice from.
+size: size[i] specifies the number of elements of the 'i'th dimension
+  of 'input' to slice. If size[i] is -1, all remaining elements in dimension
+  i are included in the slice (i.e. this is equivalent to setting
+  size[i] = input.dim_size(i) - begin[i]).
+)doc");
 
 REGISTER_OP("StridedSlice")
     .Input("input: T")
@@ -1459,7 +2559,133 @@ REGISTER_OP("StridedSlice")
       c->set_output(0, out);
 
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Return a strided slice from `input`.
+
+Note, most python users will want to use the Python `Tensor.__getitem__`
+or `Variable.__getitem__` rather than this op directly.
+
+The goal of this op is to produce a new tensor with a subset of
+the elements from the `n` dimensional `input` tensor. The subset is chosen using
+a sequence of `m` sparse range specifications encoded into the arguments
+of this function. Note, in some cases
+`m` could be equal to `n`, but this need not be the case. Each
+range specification entry can be one of the following:
+
+- An ellipsis (...). Ellipses are used to imply zero or more
+  dimensions of full-dimension selection and are produced using
+  `ellipsis_mask`. For example, `foo[...]` is the identity slice.
+
+- A new axis. This is used to insert a new shape=1 dimension and is
+  produced using `new_axis_mask`. For example, `foo[:, ...]` where
+  `foo` is shape `(3, 4)` produces a `(1, 3, 4)` tensor.
+
+
+- A range `begin:end:stride`. This is used to specify how much to choose from
+  a given dimension. `stride` can be any integer but 0.  `begin` is an integer
+  which represents the index of the first value to select while `end` represents
+  the index of the last value to select. The number of values selected in each
+  dimension is `end - begin` if `stride > 0` and `begin - end` if `stride < 0`.
+  `begin` and `end` can be negative where `-1` is the last element, `-2` is
+  the second to last. `begin_mask` controls whether to replace the explicitly
+  given `begin` with an implicit effective value of `0` if `stride > 0` and
+  `-1` if `stride < 0`. `end_mask` is analogous but produces the number
+  required to create the largest open interval. For example, given a shape
+  `(3,)` tensor `foo[:]`, the effective `begin` and `end` are `0` and `3`. Do
+  not assume this is equivalent to `foo[0:-1]` which has an effective `begin`
+  and `end` of `0` and `2`. Another example is `foo[-2::-1]` which reverses the
+  first dimension of a tensor while dropping the last two (in the original
+  order elements). For example `foo = [1,2,3,4]; foo[-2::-1]` is `[4,3]`.
+
+- A single index. This is used to keep only elements that have a given
+  index. For example (`foo[2, :]` on a shape `(5,6)` tensor produces a
+  shape `(6,)` tensor. This is encoded in `begin` and `end` and
+  `shrink_axis_mask`.
+
+Each conceptual range specification is encoded in the op's argument. This
+encoding is best understand by considering a non-trivial example. In
+particular,
+`foo[1, 2:4, None, ..., :-3:-1, :]` will be encoded as
+
+```
+begin = [1, 2, x, x, 0, x] # x denotes don't care (usually 0)
+end = [2, 4, x, x, -3, x]
+strides = [1, 1, x, x, -1, 1]
+begin_mask = 1<<4 | 1 << 5 = 48
+end_mask = 1<<5 = 32
+ellipsis_mask = 1<<3 = 8
+new_axis_mask = 1<<2 4
+shrink_axis_mask = 1<<0
+```
+
+In this case if `foo.shape` is (5, 5, 5, 5, 5, 5) the final shape of
+the slice becomes (2, 1, 5, 5, 2, 5).
+Let us walk step by step through each argument specification.
+
+1.  The first argument in the example slice is turned into `begin = 1` and
+`end = begin + 1 = 2`. To disambiguate from the original spec `2:4` we
+also set the appropriate bit in `shrink_axis_mask`.
+
+2. `2:4` is contributes 2, 4, 1 to begin, end, and stride. All masks have
+zero bits contributed.
+
+3. None is a synonym for `tf.newaxis`. This means insert a dimension of size 1
+dimension in the final shape. Dummy values are contributed to begin,
+end and stride, while the new_axis_mask bit is set.
+
+4. `...` grab the full ranges from as many dimensions as needed to
+fully specify a slice for every dimension of the input shape.
+
+5. `:-3:-1` shows the use of negative indices. A negative index `i` associated
+with a dimension that has shape `s` is converted to a positive index
+`s + i`. So `-1` becomes `s-1` (i.e. the last element). This conversion
+is done internally so begin, end and strides receive x, -3, and -1.
+The appropriate begin_mask bit is set to indicate the start range is the
+full range (ignoring the x).
+
+6. `:` indicates that the entire contents of the corresponding dimension
+is selected. This is equivalent to `::` or `0::1`. begin, end, and strides
+receive 0, 0, and 1, respectively. The appropriate bits in `begin_mask` and
+`end_mask` are also set.
+
+*Requirements*:
+  `0 != strides[i] for i in [0, m)`
+  `ellipsis_mask must be a power of two (only one ellipsis)`
+
+begin: `begin[k]` specifies the offset into the `k`th range specification.
+  The exact dimension this corresponds to will be determined by context.
+  Out-of-bounds values will be silently clamped. If the `k`th bit of
+  `begin_mask` then `begin[k]` is ignored and the full range of the
+  appropriate dimension is used instead. Negative values causes indexing
+  to start from the highest element e.g. If `foo==[1,2,3]` then `foo[-1]==3`.
+end: `end[i]` is like `begin` with the exception that `end_mask` is
+  used to determine full ranges.
+strides: `strides[i]` specifies the increment in the `i`th specification
+  after extracting a given element. Negative indices will reverse
+  the original order. Out or range values are
+  clamped to `[0,dim[i]) if slice[i]>0` or `[-1,dim[i]-1] if slice[i] < 0`
+begin_mask: a bitmask where a bit i being 1 means to ignore the begin
+  value and instead use the largest interval possible. At runtime
+  begin[i] will be replaced with `[0, n-1) if `stride[i] > 0` or
+  `[-1, n-1]` if `stride[i] < 0`
+end_mask: analogous to `begin_mask`
+ellipsis_mask: a bitmask where bit `i` being 1 means the `i`th
+  position is actually an ellipsis. One bit at most can be 1.
+  If `ellipsis_mask == 0`, then an implicit ellipsis mask of `1 << (m+1)`
+  is provided. This means that `foo[3:5] == foo[3:5, ...]`. An ellipsis
+  implicitly creates as many range specifications as necessary to fully
+  specify the sliced range for every dimension. For example for a 4-dimensional
+  tensor `foo` the slice `foo[2, ..., 5:8]` implies `foo[2, :, :, 5:8]`.
+new_axis_mask: a bitmask where bit `i` being 1 means the `i`th
+  specification creates a new shape 1 dimension. For example
+  `foo[:4, tf.newaxis, :2]` would produce a shape `(4, 1, 2)` tensor.
+shrink_axis_mask: a bitmask where bit `i` implies that the `i`th
+  specification should shrink the dimensionality. begin and end
+  must imply a slice of size 1 in the dimension. For example in
+  python one might do `foo[:, 3, :]` which would result in
+  `shrink_axis_mask` being 2.
+)doc");
 
 REGISTER_OP("StridedSliceGrad")
     .Input("shape: Index")
@@ -1480,7 +2706,19 @@ REGISTER_OP("StridedSliceGrad")
       TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &out));
       c->set_output(0, out);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Returns the gradient of `StridedSlice`.
+
+Since `StridedSlice` cuts out pieces of its `input` which is size
+`shape`, its gradient will have the same shape (which is passed here
+as `shape`). The gradient will be zero in any element that the slice
+does not select.
+
+Arguments are the same as StridedSliceGrad with the exception that
+`dy` is the input gradient to be propagated and `shape` is the
+shape of `StridedSlice`'s `input`.
+)doc");
 
 REGISTER_OP("StridedSliceAssign")
     .Input("ref: Ref(T)")
@@ -1496,7 +2734,18 @@ REGISTER_OP("StridedSliceAssign")
     .Attr("ellipsis_mask: int = 0")
     .Attr("new_axis_mask: int = 0")
     .Attr("shrink_axis_mask: int = 0")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Assign `value` to the sliced l-value reference of `ref`.
+
+The values of `value` are assigned to the positions in the variable
+`ref` that are selected by the slice parameters. The slice parameters
+`begin, `end`, `strides`, etc. work exactly as in `StridedSlice`.
+
+NOTE this op currently does not support broadcasting and so `value`'s
+shape must be exactly the shape produced by the slice of `ref`.
+
+)doc");
 // TODO(aselle): Fix this documentation once StridedSliceAssign Supports
 // broadcasting.
 // --------------------------------------------------------------------------
@@ -1514,7 +2763,18 @@ REGISTER_OP("ResourceStridedSliceAssign")
     .Attr("ellipsis_mask: int = 0")
     .Attr("new_axis_mask: int = 0")
     .Attr("shrink_axis_mask: int = 0")
-    .SetShapeFn(shape_inference::NoOutputs);
+    .SetShapeFn(shape_inference::NoOutputs)
+    .Doc(R"doc(
+Assign `value` to the sliced l-value reference of `ref`.
+
+The values of `value` are assigned to the positions in the variable
+`ref` that are selected by the slice parameters. The slice parameters
+`begin, `end`, `strides`, etc. work exactly as in `StridedSlice`.
+
+NOTE this op currently does not support broadcasting and so `value`'s
+shape must be exactly the shape produced by the slice of `ref`.
+
+)doc");
 
 REGISTER_OP("Tile")
     .Input("input: T")
@@ -1546,7 +2806,19 @@ REGISTER_OP("Tile")
       }
       c->set_output(0, c->MakeShape(dims));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Constructs a tensor by tiling a given tensor.
+
+This operation creates a new tensor by replicating `input` `multiples` times.
+The output tensor's i'th dimension has `input.dims(i) * multiples[i]` elements,
+and the values of `input` are replicated `multiples[i]` times along the 'i'th
+dimension. For example, tiling `[a b c d]` by `[2]` produces
+`[a b c d a b c d]`.
+
+input: 1-D or higher.
+multiples: 1-D. Length must be the same as the number of dimensions in `input`
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("TileGrad")
@@ -1555,7 +2827,14 @@ REGISTER_OP("TileGrad")
     .Output("output: T")
     .Attr("T: type")
     .Deprecated(3, "TileGrad has been replaced with reduce_sum")
-    .SetShapeFn(tensorflow::shape_inference::UnknownShape);
+    .SetShapeFn(tensorflow::shape_inference::UnknownShape)
+    .Doc(R"doc(
+Returns the gradient of `Tile`.
+
+Since `Tile` takes an input and repeats the input `multiples` times
+along each dimension, `TileGrad` takes in `multiples` and aggregates
+each repeated tile of `input` into `output`.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Where")
@@ -1565,7 +2844,71 @@ REGISTER_OP("Where")
     .SetShapeFn([](InferenceContext* c) {
       c->set_output(0, c->Matrix(c->UnknownDim(), c->Rank(c->input(0))));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Returns locations of nonzero / true values in a tensor.
+
+This operation returns the coordinates of true elements in `input`. The
+coordinates are returned in a 2-D tensor where the first dimension (rows)
+represents the number of true elements, and the second dimension (columns)
+represents the coordinates of the true elements. Keep in mind, the shape of
+the output tensor can vary depending on how many true values there are in
+`input`. Indices are output in row-major order.
+
+For example:
+
+```
+# 'input' tensor is [[True, False]
+#                    [True, False]]
+# 'input' has two true values, so output has two coordinates.
+# 'input' has rank of 2, so coordinates have two indices.
+where(input) ==> [[0, 0],
+                  [1, 0]]
+
+# `input` tensor is [[[True, False]
+#                     [True, False]]
+#                    [[False, True]
+#                     [False, True]]
+#                    [[False, False]
+#                     [False, True]]]
+# 'input' has 5 true values, so output has 5 coordinates.
+# 'input' has rank of 3, so coordinates have three indices.
+where(input) ==> [[0, 0, 0],
+                  [0, 1, 0],
+                  [1, 0, 1],
+                  [1, 1, 1],
+                  [2, 1, 1]]
+
+# `input` tensor is [[[1.5,  0.0]
+#                     [-0.5, 0.0]]
+#                    [[0.0,  0.25]
+#                     [0.0,  0.75]]
+#                    [[0.0,  0.0]
+#                     [0.0,  0.01]]]
+# 'input' has 5 nonzero values, so output has 5 coordinates.
+# 'input' has rank of 3, so coordinates have three indices.
+where(input) ==> [[0, 0, 0],
+                  [0, 1, 0],
+                  [1, 0, 1],
+                  [1, 1, 1],
+                  [2, 1, 1]]
+
+# `input` tensor is [[[1.5 + 0.0j, 0.0  + 0.0j]
+#                     [0.0 + 0.5j, 0.0  + 0.0j]]
+#                    [[0.0 + 0.0j, 0.25 + 1.5j]
+#                     [0.0 + 0.0j, 0.75 + 0.0j]]
+#                    [[0.0 + 0.0j, 0.0  + 0.0j]
+#                     [0.0 + 0.0j, 0.01 + 0.0j]]]
+# 'input' has 5 nonzero magnitude values, so output has 5 coordinates.
+# 'input' has rank of 3, so coordinates have three indices.
+where(input) ==> [[0, 0, 0],
+                  [0, 1, 0],
+                  [1, 0, 1],
+                  [1, 1, 1],
+                  [2, 1, 1]]
+```
+
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("BroadcastArgs")
@@ -1592,7 +2935,13 @@ REGISTER_OP("BroadcastArgs")
       // Broadcasted shape is going to be as large as the largest dimension.
       c->set_output(0, c->Vector(std::max(x_dim, y_dim)));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Return the shape of s0 op s1 with broadcast.
+
+Given `s0` and `s1`, tensors that represent shapes, compute `r0`, the
+broadcasted shape. `s0`, `s1` and `r0` are all integer vectors.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("BroadcastGradientArgs")
@@ -1609,7 +2958,12 @@ REGISTER_OP("BroadcastGradientArgs")
       c->set_output(0, c->Vector(InferenceContext::kUnknownDim));
       c->set_output(1, c->Vector(InferenceContext::kUnknownDim));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Return the reduction indices for computing gradients of s0 op s1 with broadcast.
+
+This is typically used by gradient computations for a broadcasting operation.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Pad")
@@ -1618,7 +2972,34 @@ REGISTER_OP("Pad")
     .Output("output: T")
     .Attr("T: type")
     .Attr("Tpaddings: {int32, int64} = DT_INT32")
-    .SetShapeFn(PadShapeFn);
+    .SetShapeFn(PadShapeFn)
+    .Doc(R"doc(
+Pads a tensor with zeros.
+
+This operation pads a `input` with zeros according to the `paddings` you
+specify. `paddings` is an integer tensor with shape `[Dn, 2]`, where n is the
+rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates
+how many zeros to add before the contents of `input` in that dimension, and
+`paddings[D, 1]` indicates how many zeros to add after the contents of `input`
+in that dimension.
+
+The padded size of each dimension D of the output is:
+
+`paddings(D, 0) + input.dim_size(D) + paddings(D, 1)`
+
+For example:
+
+```
+# 't' is [[1, 1], [2, 2]]
+# 'paddings' is [[1, 1], [2, 2]]
+# rank of 't' is 2
+pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0]
+                      [0, 0, 1, 1, 0, 0]
+                      [0, 0, 2, 2, 0, 0]
+                      [0, 0, 0, 0, 0, 0]]
+```
+
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("PadV2")
@@ -1628,7 +3009,36 @@ REGISTER_OP("PadV2")
     .Output("output: T")
     .Attr("T: type")
     .Attr("Tpaddings: {int32, int64} = DT_INT32")
-    .SetShapeFn(PadShapeFn);
+    .SetShapeFn(PadShapeFn)
+    .Doc(R"doc(
+Pads a tensor.
+
+This operation pads `input` according to the `paddings` and `constant_values`
+you specify. `paddings` is an integer tensor with shape `[Dn, 2]`, where n is
+the rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates
+how many padding values to add before the contents of `input` in that dimension,
+and `paddings[D, 1]` indicates how many padding values to add after the contents
+of `input` in that dimension. `constant_values` is a scalar tensor of the same
+type as `input` that indicates the value to use for padding `input`.
+
+The padded size of each dimension D of the output is:
+
+`paddings(D, 0) + input.dim_size(D) + paddings(D, 1)`
+
+For example:
+
+```
+# 't' is [[1, 1], [2, 2]]
+# 'paddings' is [[1, 1], [2, 2]]
+# 'constant_values' is 0
+# rank of 't' is 2
+pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0]
+                      [0, 0, 1, 1, 0, 0]
+                      [0, 0, 2, 2, 0, 0]
+                      [0, 0, 0, 0, 0, 0]]
+```
+
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("MirrorPad")
@@ -1638,7 +3048,46 @@ REGISTER_OP("MirrorPad")
     .Attr("T: type")
     .Attr("Tpaddings: {int32, int64} = DT_INT32")
     .Attr(GetMirrorPadModeAttrString())
-    .SetShapeFn(PadShapeFn);
+    .SetShapeFn(PadShapeFn)
+    .Doc(R"doc(
+Pads a tensor with mirrored values.
+
+This operation pads a `input` with mirrored values according to the `paddings`
+you specify. `paddings` is an integer tensor with shape `[n, 2]`, where n is
+the rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates
+how many values to add before the contents of `input` in that dimension, and
+`paddings[D, 1]` indicates how many values to add after the contents of `input`
+in that dimension. Both `paddings[D, 0]` and `paddings[D, 1]` must be no greater
+than `input.dim_size(D)` (or `input.dim_size(D) - 1`) if `copy_border` is true
+(if false, respectively).
+
+The padded size of each dimension D of the output is:
+
+`paddings(D, 0) + input.dim_size(D) + paddings(D, 1)`
+
+For example:
+
+```
+# 't' is [[1, 2, 3], [4, 5, 6]].
+# 'paddings' is [[1, 1]], [2, 2]].
+# 'mode' is SYMMETRIC.
+# rank of 't' is 2.
+pad(t, paddings) ==> [[2, 1, 1, 2, 3, 3, 2]
+                      [2, 1, 1, 2, 3, 3, 2]
+                      [5, 4, 4, 5, 6, 6, 5]
+                      [5, 4, 4, 5, 6, 6, 5]]
+```
+
+input: The input tensor to be padded.
+paddings: A two-column matrix specifying the padding sizes. The number of
+  rows must be the same as the rank of `input`.
+mode: Either `REFLECT` or `SYMMETRIC`. In reflect mode the padded regions
+  do not include the borders, while in symmetric mode the padded regions
+  do include the borders. For example, if `input` is `[1, 2, 3]` and `paddings`
+  is `[0, 2]`, then the output is `[1, 2, 3, 2, 1]` in reflect mode, and
+  it is `[1, 2, 3, 3, 2]` in symmetric mode.
+output: The padded tensor.
+)doc");
 
 // --------------------------------------------------------------------------
 namespace {
@@ -1700,7 +3149,35 @@ REGISTER_OP("MirrorPadGrad")
       } else {
         return MirrorPadKnown<int64>(c, input, paddings_t, input_rank);
       }
-    });
+    })
+    .Doc(R"doc(
+Gradient op for `MirrorPad` op. This op folds a mirror-padded tensor.
+
+This operation folds the padded areas of `input` by `MirrorPad` according to the
+`paddings` you specify. `paddings` must be the same as `paddings` argument
+given to the corresponding `MirrorPad` op.
+
+The folded size of each dimension D of the output is:
+
+`input.dim_size(D) - paddings(D, 0) - paddings(D, 1)`
+
+For example:
+
+```
+# 't' is [[1, 2, 3], [4, 5, 6], [7, 8, 9]].
+# 'paddings' is [[0, 1]], [0, 1]].
+# 'mode' is SYMMETRIC.
+# rank of 't' is 2.
+pad(t, paddings) ==> [[ 1,  5]
+                      [11, 28]]
+```
+
+input: The input tensor to be folded.
+paddings: A two-column matrix specifying the padding sizes. The number of
+  rows must be the same as the rank of `input`.
+mode: The mode used in the `MirrorPad` op.
+output: The folded tensor.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Placeholder")
@@ -1722,7 +3199,19 @@ REGISTER_OP("Placeholder")
       TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape(shape, &out));
       c->set_output(0, out);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+A placeholder op for a value that will be fed into the computation.
+
+N.B. This operation will fail with an error if it is executed. It is
+intended as a way to represent a value that will always be fed, and to
+provide attrs that enable the fed value to be checked at runtime.
+
+output: A placeholder tensor that must be replaced using the feed mechanism.
+dtype: The type of elements in the tensor.
+shape: (Optional) The shape of the tensor. If the shape has 0 dimensions, the
+  shape is unconstrained.
+)doc");
 
 // Placeholder was modified in a backwards compatible way to do what
 // PlaceholderV2 did, so we have deprecated V2 (no one was really
@@ -1732,7 +3221,19 @@ REGISTER_OP("PlaceholderV2")
     .Attr("dtype: type")
     .Attr("shape: shape")
     .SetShapeFn(shape_inference::ExplicitShape)
-    .Deprecated(23, "Placeholder now behaves the same as PlaceholderV2.");
+    .Deprecated(23, "Placeholder now behaves the same as PlaceholderV2.")
+    .Doc(R"doc(
+A placeholder op for a value that will be fed into the computation.
+
+N.B. This operation will fail with an error if it is executed. It is
+intended as a way to represent a value that will always be fed, and to
+provide attrs that enable the fed value to be checked at runtime.
+
+output: A placeholder tensor that must be replaced using the feed mechanism.
+dtype: The type of elements in the tensor.
+shape: The shape of the tensor. The shape can be any partially-specified
+   shape.  To be unconstrained, pass in a shape with unknown rank.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("PlaceholderWithDefault")
@@ -1753,7 +3254,15 @@ REGISTER_OP("PlaceholderWithDefault")
       TF_RETURN_IF_ERROR(c->Merge(input, out, &unused));
       c->set_output(0, out);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+A placeholder op that passes through `input` when its output is not fed.
+
+input: The default value to produce when `output` is not fed.
+output: A placeholder tensor that defaults to `input` if it is not fed.
+dtype: The type of elements in the tensor.
+shape: The (possibly partial) shape of the tensor.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("ExpandDims")
@@ -1803,7 +3312,47 @@ REGISTER_OP("ExpandDims")
       TF_RETURN_IF_ERROR(c->Concatenate(output, end, &output));
       c->set_output(0, output);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Inserts a dimension of 1 into a tensor's shape.
+
+Given a tensor `input`, this operation inserts a dimension of 1 at the
+dimension index `dim` of `input`'s shape. The dimension index `dim` starts at
+zero; if you specify a negative number for `dim` it is counted backward from
+the end.
+
+This operation is useful if you want to add a batch dimension to a single
+element. For example, if you have a single image of shape `[height, width,
+channels]`, you can make it a batch of 1 image with `expand_dims(image, 0)`,
+which will make the shape `[1, height, width, channels]`.
+
+Other examples:
+
+```
+# 't' is a tensor of shape [2]
+shape(expand_dims(t, 0)) ==> [1, 2]
+shape(expand_dims(t, 1)) ==> [2, 1]
+shape(expand_dims(t, -1)) ==> [2, 1]
+
+# 't2' is a tensor of shape [2, 3, 5]
+shape(expand_dims(t2, 0)) ==> [1, 2, 3, 5]
+shape(expand_dims(t2, 2)) ==> [2, 3, 1, 5]
+shape(expand_dims(t2, 3)) ==> [2, 3, 5, 1]
+```
+
+This operation requires that:
+
+`-1-input.dims() <= dim <= input.dims()`
+
+This operation is related to `squeeze()`, which removes dimensions of
+size 1.
+
+dim: 0-D (scalar). Specifies the dimension index at which to
+  expand the shape of `input`. Must be in the range
+  `[-rank(input) - 1, rank(input)]`.
+output: Contains the same data as `input`, but its shape has an additional
+  dimension of size 1 added.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Squeeze")
@@ -1871,7 +3420,36 @@ REGISTER_OP("Squeeze")
 
       c->set_output(0, c->MakeShape(result_shape));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Removes dimensions of size 1 from the shape of a tensor.
+
+Given a tensor `input`, this operation returns a tensor of the same type with
+all dimensions of size 1 removed. If you don't want to remove all size 1
+dimensions, you can remove specific size 1 dimensions by specifying
+`squeeze_dims`.
+
+For example:
+
+```
+# 't' is a tensor of shape [1, 2, 1, 3, 1, 1]
+shape(squeeze(t)) ==> [2, 3]
+```
+
+Or, to remove specific size 1 dimensions:
+
+```
+# 't' is a tensor of shape [1, 2, 1, 3, 1, 1]
+shape(squeeze(t, [2, 4])) ==> [1, 2, 3, 1]
+```
+
+input: The `input` to squeeze.
+squeeze_dims: If specified, only squeezes the dimensions listed. The dimension
+  index starts at 0. It is an error to squeeze a dimension that is not 1. Must
+  be in the range `[-rank(input), rank(input))`.
+output: Contains the same data as `input`, but has one or more dimensions of
+  size 1 removed.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("ListDiff")
@@ -1890,7 +3468,37 @@ REGISTER_OP("ListDiff")
       c->set_output(0, out);
       c->set_output(1, out);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Computes the difference between two lists of numbers or strings.
+
+Given a list `x` and a list `y`, this operation returns a list `out` that
+represents all values that are in `x` but not in `y`. The returned list `out`
+is sorted in the same order that the numbers appear in `x` (duplicates are
+preserved). This operation also returns a list `idx` that represents the
+position of each `out` element in `x`. In other words:
+
+`out[i] = x[idx[i]] for i in [0, 1, ..., len(out) - 1]`
+
+For example, given this input:
+
+```
+x = [1, 2, 3, 4, 5, 6]
+y = [1, 3, 5]
+```
+
+This operation would return:
+
+```
+out ==> [2, 4, 6]
+idx ==> [1, 3, 5]
+```
+
+x: 1-D. Values to keep.
+y: 1-D. Values to remove.
+out: 1-D. Values present in `x` but not in `y`.
+idx: 1-D. Positions of `x` values preserved in `out`.
+)doc");
 
 namespace {
 
@@ -2078,7 +3686,133 @@ REGISTER_OP("SpaceToBatchND")
       return SpaceToBatchShapeHelper(c, c->input(0), c->input(1),
                                      c->input_tensor(1), c->input(2),
                                      c->input_tensor(2));
-    });
+    })
+    .Doc(R"doc(
+SpaceToBatch for N-D tensors of type T.
+
+This operation divides "spatial" dimensions `[1, ..., M]` of the input into a
+grid of blocks of shape `block_shape`, and interleaves these blocks with the
+"batch" dimension (0) such that in the output, the spatial dimensions
+`[1, ..., M]` correspond to the position within the grid, and the batch
+dimension combines both the position within a spatial block and the original
+batch position.  Prior to division into blocks, the spatial dimensions of the
+input are optionally zero padded according to `paddings`.  See below for a
+precise description.
+
+input: N-D with shape `input_shape = [batch] + spatial_shape + remaining_shape`,
+  where spatial_shape has `M` dimensions.
+
+block_shape: 1-D with shape `[M]`, all values must be >= 1.
+
+paddings: 2-D with shape `[M, 2]`, all values must be >= 0.
+  `paddings[i] = [pad_start, pad_end]` specifies the padding for input dimension
+  `i + 1`, which corresponds to spatial dimension `i`.  It is required that
+  `block_shape[i]` divides `input_shape[i + 1] + pad_start + pad_end`.
+
+This operation is equivalent to the following steps:
+
+1. Zero-pad the start and end of dimensions `[1, ..., M]` of the
+   input according to `paddings` to produce `padded` of shape `padded_shape`.
+
+2. Reshape `padded` to `reshaped_padded` of shape:
+
+     [batch] +
+     [padded_shape[1] / block_shape[0],
+       block_shape[0],
+      ...,
+      padded_shape[M] / block_shape[M-1],
+      block_shape[M-1]] +
+     remaining_shape
+
+3. Permute dimensions of `reshaped_padded` to produce
+   `permuted_reshaped_padded` of shape:
+
+     block_shape +
+     [batch] +
+     [padded_shape[1] / block_shape[0],
+      ...,
+      padded_shape[M] / block_shape[M-1]] +
+     remaining_shape
+
+4. Reshape `permuted_reshaped_padded` to flatten `block_shape` into the batch
+   dimension, producing an output tensor of shape:
+
+     [batch * prod(block_shape)] +
+     [padded_shape[1] / block_shape[0],
+      ...,
+      padded_shape[M] / block_shape[M-1]] +
+     remaining_shape
+
+Some examples:
+
+(1) For the following input of shape `[1, 2, 2, 1]`, `block_shape = [2, 2]`, and
+    `paddings = [[0, 0], [0, 0]]`:
+
+```
+x = [[[[1], [2]], [[3], [4]]]]
+```
+
+The output tensor has shape `[4, 1, 1, 1]` and value:
+
+```
+[[[[1]]], [[[2]]], [[[3]]], [[[4]]]]
+```
+
+(2) For the following input of shape `[1, 2, 2, 3]`, `block_shape = [2, 2]`, and
+    `paddings = [[0, 0], [0, 0]]`:
+
+```
+x = [[[[1, 2, 3], [4, 5, 6]],
+      [[7, 8, 9], [10, 11, 12]]]]
+```
+
+The output tensor has shape `[4, 1, 1, 3]` and value:
+
+```
+[[[1, 2, 3]], [[4, 5, 6]], [[7, 8, 9]], [[10, 11, 12]]]
+```
+
+(3) For the following input of shape `[1, 4, 4, 1]`, `block_shape = [2, 2]`, and
+    `paddings = [[0, 0], [0, 0]]`:
+
+```
+x = [[[[1],   [2],  [3],  [4]],
+      [[5],   [6],  [7],  [8]],
+      [[9],  [10], [11],  [12]],
+      [[13], [14], [15],  [16]]]]
+```
+
+The output tensor has shape `[4, 2, 2, 1]` and value:
+
+```
+x = [[[[1], [3]], [[9], [11]]],
+     [[[2], [4]], [[10], [12]]],
+     [[[5], [7]], [[13], [15]]],
+     [[[6], [8]], [[14], [16]]]]
+```
+
+(4) For the following input of shape `[2, 2, 4, 1]`, block_shape = `[2, 2]`, and
+    paddings = `[[0, 0], [2, 0]]`:
+
+```
+x = [[[[1],   [2],  [3],  [4]],
+      [[5],   [6],  [7],  [8]]],
+     [[[9],  [10], [11],  [12]],
+      [[13], [14], [15],  [16]]]]
+```
+
+The output tensor has shape `[8, 1, 3, 1]` and value:
+
+```
+x = [[[[0], [1], [3]]], [[[0], [9], [11]]],
+     [[[0], [2], [4]]], [[[0], [10], [12]]],
+     [[[0], [5], [7]]], [[[0], [13], [15]]],
+     [[[0], [6], [8]]], [[[0], [14], [16]]]]
+```
+
+Among others, this operation is useful for reducing atrous convolution into
+regular convolution.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("SpaceToBatch")
@@ -2103,7 +3837,106 @@ REGISTER_OP("SpaceToBatch")
       return SpaceToBatchShapeHelper(c, input_shape, c->MakeShape({2}),
                                      &block_shape, c->input(1),
                                      c->input_tensor(1));
-    });
+    })
+    .Doc(R"doc(
+SpaceToBatch for 4-D tensors of type T.
+
+This is a legacy version of the more general SpaceToBatchND.
+
+Zero-pads and then rearranges (permutes) blocks of spatial data into batch.
+More specifically, this op outputs a copy of the input tensor where values from
+the `height` and `width` dimensions are moved to the `batch` dimension. After
+the zero-padding, both `height` and `width` of the input must be divisible by the
+block size.
+
+input: 4-D with shape `[batch, height, width, depth]`.
+
+paddings: 2-D tensor of non-negative integers with shape `[2, 2]`. It specifies
+  the padding of the input with zeros across the spatial dimensions as follows:
+
+      paddings = [[pad_top, pad_bottom], [pad_left, pad_right]]
+
+  The effective spatial dimensions of the zero-padded input tensor will be:
+
+      height_pad = pad_top + height + pad_bottom
+      width_pad = pad_left + width + pad_right
+
+The attr `block_size` must be greater than one. It indicates the block size.
+
+  * Non-overlapping blocks of size `block_size x block size` in the height and
+    width dimensions are rearranged into the batch dimension at each location.
+  * The batch of the output tensor is `batch * block_size * block_size`.
+  * Both height_pad and width_pad must be divisible by block_size.
+
+The shape of the output will be:
+
+    [batch*block_size*block_size, height_pad/block_size, width_pad/block_size,
+     depth]
+
+Some examples:
+
+(1) For the following input of shape `[1, 2, 2, 1]` and block_size of 2:
+
+```
+x = [[[[1], [2]], [[3], [4]]]]
+```
+
+The output tensor has shape `[4, 1, 1, 1]` and value:
+
+```
+[[[[1]]], [[[2]]], [[[3]]], [[[4]]]]
+```
+
+(2) For the following input of shape `[1, 2, 2, 3]` and block_size of 2:
+
+```
+x = [[[[1, 2, 3], [4, 5, 6]],
+      [[7, 8, 9], [10, 11, 12]]]]
+```
+
+The output tensor has shape `[4, 1, 1, 3]` and value:
+
+```
+[[[1, 2, 3]], [[4, 5, 6]], [[7, 8, 9]], [[10, 11, 12]]]
+```
+
+(3) For the following input of shape `[1, 4, 4, 1]` and block_size of 2:
+
+```
+x = [[[[1],   [2],  [3],  [4]],
+      [[5],   [6],  [7],  [8]],
+      [[9],  [10], [11],  [12]],
+      [[13], [14], [15],  [16]]]]
+```
+
+The output tensor has shape `[4, 2, 2, 1]` and value:
+
+```
+x = [[[[1], [3]], [[9], [11]]],
+     [[[2], [4]], [[10], [12]]],
+     [[[5], [7]], [[13], [15]]],
+     [[[6], [8]], [[14], [16]]]]
+```
+
+(4) For the following input of shape `[2, 2, 4, 1]` and block_size of 2:
+
+```
+x = [[[[1],   [2],  [3],  [4]],
+      [[5],   [6],  [7],  [8]]],
+     [[[9],  [10], [11],  [12]],
+      [[13], [14], [15],  [16]]]]
+```
+
+The output tensor has shape `[8, 1, 2, 1]` and value:
+
+```
+x = [[[[1], [3]]], [[[9], [11]]], [[[2], [4]]], [[[10], [12]]],
+     [[[5], [7]]], [[[13], [15]]], [[[6], [8]]], [[[14], [16]]]]
+```
+
+Among others, this operation is useful for reducing atrous convolution into
+regular convolution.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("BatchToSpaceND")
@@ -2118,7 +3951,132 @@ REGISTER_OP("BatchToSpaceND")
       return BatchToSpaceShapeHelper(c, c->input(0), c->input(1),
                                      c->input_tensor(1), c->input(2),
                                      c->input_tensor(2));
-    });
+    })
+    .Doc(R"doc(
+BatchToSpace for N-D tensors of type T.
+
+This operation reshapes the "batch" dimension 0 into `M + 1` dimensions of shape
+`block_shape + [batch]`, interleaves these blocks back into the grid defined by
+the spatial dimensions `[1, ..., M]`, to obtain a result with the same rank as
+the input.  The spatial dimensions of this intermediate result are then
+optionally cropped according to `crops` to produce the output.  This is the
+reverse of SpaceToBatch.  See below for a precise description.
+
+input: N-D with shape `input_shape = [batch] + spatial_shape + remaining_shape`,
+  where spatial_shape has M dimensions.
+
+block_shape: 1-D with shape `[M]`, all values must be >= 1.
+
+crops: 2-D with shape `[M, 2]`, all values must be >= 0.
+  `crops[i] = [crop_start, crop_end]` specifies the amount to crop from input
+  dimension `i + 1`, which corresponds to spatial dimension `i`.  It is
+  required that
+  `crop_start[i] + crop_end[i] <= block_shape[i] * input_shape[i + 1]`.
+
+This operation is equivalent to the following steps:
+
+1. Reshape `input` to `reshaped` of shape:
+     [block_shape[0], ..., block_shape[M-1],
+      batch / prod(block_shape),
+      input_shape[1], ..., input_shape[N-1]]
+
+2. Permute dimensions of `reshaped` to produce `permuted` of shape
+     [batch / prod(block_shape),
+
+      input_shape[1], block_shape[0],
+      ...,
+      input_shape[M], block_shape[M-1],
+
+      input_shape[M+1], ..., input_shape[N-1]]
+
+3. Reshape `permuted` to produce `reshaped_permuted` of shape
+     [batch / prod(block_shape),
+
+      input_shape[1] * block_shape[0],
+      ...,
+      input_shape[M] * block_shape[M-1],
+
+      input_shape[M+1],
+      ...,
+      input_shape[N-1]]
+
+4. Crop the start and end of dimensions `[1, ..., M]` of
+   `reshaped_permuted` according to `crops` to produce the output of shape:
+     [batch / prod(block_shape),
+
+      input_shape[1] * block_shape[0] - crops[0,0] - crops[0,1],
+      ...,
+      input_shape[M] * block_shape[M-1] - crops[M-1,0] - crops[M-1,1],
+
+      input_shape[M+1], ..., input_shape[N-1]]
+
+Some examples:
+
+(1) For the following input of shape `[4, 1, 1, 1]`, `block_shape = [2, 2]`, and
+    `crops = [[0, 0], [0, 0]]`:
+
+```
+[[[[1]]], [[[2]]], [[[3]]], [[[4]]]]
+```
+
+The output tensor has shape `[1, 2, 2, 1]` and value:
+
+```
+x = [[[[1], [2]], [[3], [4]]]]
+```
+
+(2) For the following input of shape `[4, 1, 1, 3]`, `block_shape = [2, 2]`, and
+    `crops = [[0, 0], [0, 0]]`:
+
+```
+[[[1, 2, 3]], [[4, 5, 6]], [[7, 8, 9]], [[10, 11, 12]]]
+```
+
+The output tensor has shape `[1, 2, 2, 3]` and value:
+
+```
+x = [[[[1, 2, 3], [4, 5, 6]],
+      [[7, 8, 9], [10, 11, 12]]]]
+```
+
+(3) For the following input of shape `[4, 2, 2, 1]`, `block_shape = [2, 2]`, and
+    `crops = [[0, 0], [0, 0]]`:
+
+```
+x = [[[[1], [3]], [[9], [11]]],
+     [[[2], [4]], [[10], [12]]],
+     [[[5], [7]], [[13], [15]]],
+     [[[6], [8]], [[14], [16]]]]
+```
+
+The output tensor has shape `[1, 4, 4, 1]` and value:
+
+```
+x = [[[1],   [2],  [3],  [4]],
+     [[5],   [6],  [7],  [8]],
+     [[9],  [10], [11],  [12]],
+     [[13], [14], [15],  [16]]]
+```
+
+(4) For the following input of shape `[8, 1, 3, 1]`, `block_shape = [2, 2]`, and
+    `crops = [[0, 0], [2, 0]]`:
+
+```
+x = [[[[0], [1], [3]]], [[[0], [9], [11]]],
+     [[[0], [2], [4]]], [[[0], [10], [12]]],
+     [[[0], [5], [7]]], [[[0], [13], [15]]],
+     [[[0], [6], [8]]], [[[0], [14], [16]]]]
+```
+
+The output tensor has shape `[2, 2, 4, 1]` and value:
+
+```
+x = [[[[1],   [2],  [3],  [4]],
+      [[5],   [6],  [7],  [8]]],
+     [[[9],  [10], [11],  [12]],
+      [[13], [14], [15],  [16]]]]
+```
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("BatchToSpace")
@@ -2143,7 +4101,97 @@ REGISTER_OP("BatchToSpace")
       return BatchToSpaceShapeHelper(c, input_shape, c->MakeShape({2}),
                                      &block_shape, c->input(1),
                                      c->input_tensor(1));
-    });
+    })
+    .Doc(R"doc(
+BatchToSpace for 4-D tensors of type T.
+
+This is a legacy version of the more general BatchToSpaceND.
+
+Rearranges (permutes) data from batch into blocks of spatial data, followed by
+cropping. This is the reverse transformation of SpaceToBatch. More specifically,
+this op outputs a copy of the input tensor where values from the `batch`
+dimension are moved in spatial blocks to the `height` and `width` dimensions,
+followed by cropping along the `height` and `width` dimensions.
+
+input: 4-D tensor with shape
+ `[batch*block_size*block_size, height_pad/block_size, width_pad/block_size,
+   depth]`. Note that the batch size of the input tensor must be divisible by
+ `block_size * block_size`.
+
+crops: 2-D tensor of non-negative integers with shape `[2, 2]`. It specifies
+  how many elements to crop from the intermediate result across the spatial
+  dimensions as follows:
+
+      crops = [[crop_top, crop_bottom], [crop_left, crop_right]]
+
+output: 4-D with shape `[batch, height, width, depth]`, where:
+
+      height = height_pad - crop_top - crop_bottom
+      width = width_pad - crop_left - crop_right
+
+The attr `block_size` must be greater than one. It indicates the block size.
+
+Some examples:
+
+(1) For the following input of shape `[4, 1, 1, 1]` and block_size of 2:
+
+```
+[[[[1]]], [[[2]]], [[[3]]], [[[4]]]]
+```
+
+The output tensor has shape `[1, 2, 2, 1]` and value:
+
+```
+x = [[[[1], [2]], [[3], [4]]]]
+```
+
+(2) For the following input of shape `[4, 1, 1, 3]` and block_size of 2:
+
+```
+[[[1, 2, 3]], [[4, 5, 6]], [[7, 8, 9]], [[10, 11, 12]]]
+```
+
+The output tensor has shape `[1, 2, 2, 3]` and value:
+
+```
+x = [[[[1, 2, 3], [4, 5, 6]],
+      [[7, 8, 9], [10, 11, 12]]]]
+```
+
+(3) For the following input of shape `[4, 2, 2, 1]` and block_size of 2:
+
+```
+x = [[[[1], [3]], [[9], [11]]],
+     [[[2], [4]], [[10], [12]]],
+     [[[5], [7]], [[13], [15]]],
+     [[[6], [8]], [[14], [16]]]]
+```
+
+The output tensor has shape `[1, 4, 4, 1]` and value:
+
+```
+x = [[[1],   [2],  [3],  [4]],
+     [[5],   [6],  [7],  [8]],
+     [[9],  [10], [11],  [12]],
+     [[13], [14], [15],  [16]]]
+```
+
+(4) For the following input of shape `[8, 1, 2, 1]` and block_size of 2:
+
+```
+x = [[[[1], [3]]], [[[9], [11]]], [[[2], [4]]], [[[10], [12]]],
+     [[[5], [7]]], [[[13], [15]]], [[[6], [8]]], [[[14], [16]]]]
+```
+
+The output tensor has shape `[2, 2, 4, 1]` and value:
+
+```
+x = [[[[1], [3]], [[5], [7]]],
+     [[[2], [4]], [[10], [12]]],
+     [[[5], [7]], [[13], [15]]],
+     [[[6], [8]], [[14], [16]]]]
+```
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("SpaceToDepth")
@@ -2197,7 +4245,96 @@ REGISTER_OP("SpaceToDepth")
 
       c->set_output(0, output_shape);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+SpaceToDepth for tensors of type T.
+
+Rearranges blocks of spatial data, into depth. More specifically,
+this op outputs a copy of the input tensor where values from the `height`
+and `width` dimensions are moved to the `depth` dimension.
+The attr `block_size` indicates the input block size.
+
+  * Non-overlapping blocks of size `block_size x block size` are rearranged
+    into depth at each location.
+  * The depth of the output tensor is `block_size * block_size * input_depth`.
+  * The Y, X coordinates within each block of the input become the high order
+    component of the output channel index.
+  * The input tensor's height and width must be divisible by block_size.
+
+The `data_format` attr specifies the layout of the input and output tensors
+with the following options:
+  "NHWC": `[ batch, height, width, channels ]`
+  "NCHW": `[ batch, channels, height, width ]`
+  "NCHW_VECT_C":
+      `qint8 [ batch, channels / 4, height, width, 4 ]`
+
+It is useful to consider the operation as transforming a 6-D Tensor.
+e.g. for data_format = NHWC,
+     Each element in the input tensor can be specified via 6 coordinates,
+     ordered by decreasing memory layout significance as:
+     n,oY,bY,oX,bX,iC  (where n=batch index, oX, oY means X or Y coordinates
+                        within the output image, bX, bY means coordinates
+                        within the input block, iC means input channels).
+     The output would be a transpose to the following layout:
+     n,oY,oX,bY,bX,iC
+
+This operation is useful for resizing the activations between convolutions
+(but keeping all data), e.g. instead of pooling. It is also useful for training
+purely convolutional models.
+
+For example, given an input of shape `[1, 2, 2, 1]`, data_format = "NHWC" and
+block_size = 2:
+
+```
+x = [[[[1], [2]],
+      [[3], [4]]]]
+```
+
+This operation will output a tensor of shape `[1, 1, 1, 4]`:
+
+```
+[[[[1, 2, 3, 4]]]]
+```
+
+Here, the input has a batch of 1 and each batch element has shape `[2, 2, 1]`,
+the corresponding output will have a single element (i.e. width and height are
+both 1) and will have a depth of 4 channels (1 * block_size * block_size).
+The output element shape is `[1, 1, 4]`.
+
+For an input tensor with larger depth, here of shape `[1, 2, 2, 3]`, e.g.
+
+```
+x = [[[[1, 2, 3], [4, 5, 6]],
+      [[7, 8, 9], [10, 11, 12]]]]
+```
+
+This operation, for block_size of 2, will return the following tensor of shape
+`[1, 1, 1, 12]`
+
+```
+[[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]]]
+```
+
+Similarly, for the following input of shape `[1 4 4 1]`, and a block size of 2:
+
+```
+x = [[[[1],   [2],  [5],  [6]],
+      [[3],   [4],  [7],  [8]],
+      [[9],  [10], [13],  [14]],
+      [[11], [12], [15],  [16]]]]
+```
+
+the operator will return the following tensor of shape `[1 2 2 4]`:
+
+```
+x = [[[[1, 2, 3, 4],
+       [5, 6, 7, 8]],
+      [[9, 10, 11, 12],
+       [13, 14, 15, 16]]]]
+```
+
+block_size: The size of the spatial block.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("DepthToSpace")
@@ -2249,7 +4386,102 @@ REGISTER_OP("DepthToSpace")
 
       c->set_output(0, output_shape);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+DepthToSpace for tensors of type T.
+
+Rearranges data from depth into blocks of spatial data.
+This is the reverse transformation of SpaceToDepth. More specifically,
+this op outputs a copy of the input tensor where values from the `depth`
+dimension are moved in spatial blocks to the `height` and `width` dimensions.
+The attr `block_size` indicates the input block size and how the data is moved.
+
+  * Chunks of data of size `block_size * block_size` from depth are rearranged
+    into non-overlapping blocks of size `block_size x block_size`
+  * The width the output tensor is `input_depth * block_size`, whereas the
+    height is `input_height * block_size`.
+  * The Y, X coordinates within each block of the output image are determined
+    by the high order component of the input channel index.
+  * The depth of the input tensor must be divisible by
+    `block_size * block_size`.
+
+The `data_format` attr specifies the layout of the input and output tensors
+with the following options:
+  "NHWC": `[ batch, height, width, channels ]`
+  "NCHW": `[ batch, channels, height, width ]`
+  "NCHW_VECT_C":
+      `qint8 [ batch, channels / 4, height, width, 4 ]`
+
+It is useful to consider the operation as transforming a 6-D Tensor.
+e.g. for data_format = NHWC,
+     Each element in the input tensor can be specified via 6 coordinates,
+     ordered by decreasing memory layout significance as:
+     n,iY,iX,bY,bX,oC  (where n=batch index, iX, iY means X or Y coordinates
+                        within the input image, bX, bY means coordinates
+                        within the output block, oC means output channels).
+     The output would be the input transposed to the following layout:
+     n,iY,bY,iX,bX,oC
+
+This operation is useful for resizing the activations between convolutions
+(but keeping all data), e.g. instead of pooling. It is also useful for training
+purely convolutional models.
+
+For example, given an input of shape `[1, 1, 1, 4]`, data_format = "NHWC" and
+block_size = 2:
+
+```
+x = [[[[1, 2, 3, 4]]]]
+
+```
+
+This operation will output a tensor of shape `[1, 2, 2, 1]`:
+
+```
+   [[[[1], [2]],
+     [[3], [4]]]]
+```
+
+Here, the input has a batch of 1 and each batch element has shape `[1, 1, 4]`,
+the corresponding output will have 2x2 elements and will have a depth of
+1 channel (1 = `4 / (block_size * block_size)`).
+The output element shape is `[2, 2, 1]`.
+
+For an input tensor with larger depth, here of shape `[1, 1, 1, 12]`, e.g.
+
+```
+x = [[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]]]
+```
+
+This operation, for block size of 2, will return the following tensor of shape
+`[1, 2, 2, 3]`
+
+```
+   [[[[1, 2, 3], [4, 5, 6]],
+     [[7, 8, 9], [10, 11, 12]]]]
+
+```
+
+Similarly, for the following input of shape `[1 2 2 4]`, and a block size of 2:
+
+```
+x =  [[[[1, 2, 3, 4],
+       [5, 6, 7, 8]],
+      [[9, 10, 11, 12],
+       [13, 14, 15, 16]]]]
+```
+
+the operator will return the following tensor of shape `[1 4 4 1]`:
+
+```
+x = [[[ [1],   [2],  [5],  [6]],
+      [ [3],   [4],  [7],  [8]],
+      [ [9],  [10], [13],  [14]],
+      [ [11], [12], [15],  [16]]]]
+
+```
+
+block_size: The size of the spatial block, same as in Space2Depth.
+)doc");
 
 // --------------------------------------------------------------------------
 
@@ -2336,7 +4568,34 @@ REGISTER_OP("ExtractImagePatches")
           {batch_size_dim, output_rows, output_cols, output_depth_dim});
       c->set_output(0, output_shape);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Extract `patches` from `images` and put them in the "depth" output dimension.
+
+images: 4-D Tensor with shape `[batch, in_rows, in_cols, depth]`.
+patches: 4-D Tensor with shape `[batch, out_rows, out_cols, ksize_rows *
+  ksize_cols * depth]` containing image patches with size
+  `ksize_rows x ksize_cols x depth` vectorized in the "depth" dimension. Note
+  `out_rows` and `out_cols` are the dimensions of the output patches.
+ksizes: The size of the sliding window for each dimension of `images`.
+strides: 1-D of length 4. How far the centers of two consecutive patches are in
+  the images. Must be: `[1, stride_rows, stride_cols, 1]`.
+rates: 1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the
+  input stride, specifying how far two consecutive patch samples are in the
+  input. Equivalent to extracting patches with
+  `patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by
+  subsampling them spatially by a factor of `rates`. This is equivalent to
+  `rate` in dilated (a.k.a. Atrous) convolutions.
+padding: The type of padding algorithm to use.
+
+We specify the size-related attributes as:
+
+```python
+      ksizes = [1, ksize_rows, ksize_cols, 1]
+      strides = [1, strides_rows, strides_cols, 1]
+      rates = [1, rates_rows, rates_cols, 1]
+```
+)doc");
 
 // --------------------------------------------------------------------------
 
@@ -2400,7 +4659,23 @@ REGISTER_OP("Bitcast")
 
       c->set_output(0, new_shape);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Bitcasts a tensor from one type to another without copying data.
+
+Given a tensor `input`, this operation returns a tensor that has the same buffer
+data as `input` with datatype `type`.
+
+If the input datatype `T` is larger than the output datatype `type` then the
+shape changes from [...] to [..., sizeof(`T`)/sizeof(`type`)].
+
+If `T` is smaller than `type`, the operator requires that the rightmost
+dimension be equal to sizeof(`type`)/sizeof(`T`). The shape then goes from
+[..., sizeof(`type`)/sizeof(`T`)] to [...].
+
+*NOTE*: Bitcast is implemented as a low-level cast, so machines with different
+endian orderings will give different results.
+)doc");
 
 REGISTER_OP("OneHot")
     .Input("indices: TI")
@@ -2436,7 +4711,106 @@ REGISTER_OP("OneHot")
       TF_RETURN_IF_ERROR(c->Concatenate(front, back, &out));
       c->set_output(0, out);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Returns a one-hot tensor.
+
+The locations represented by indices in `indices` take value `on_value`,
+while all other locations take value `off_value`.
+
+If the input `indices` is rank `N`, the output will have rank `N+1`,
+The new axis is created at dimension `axis` (default: the new axis is
+appended at the end).
+
+If `indices` is a scalar the output shape will be a vector of length `depth`.
+
+If `indices` is a vector of length `features`, the output shape will be:
+```
+  features x depth if axis == -1
+  depth x features if axis == 0
+```
+
+If `indices` is a matrix (batch) with shape `[batch, features]`,
+the output shape will be:
+```
+  batch x features x depth if axis == -1
+  batch x depth x features if axis == 1
+  depth x batch x features if axis == 0
+```
+
+
+Examples
+=========
+
+Suppose that
+
+```
+  indices = [0, 2, -1, 1]
+  depth = 3
+  on_value = 5.0
+  off_value = 0.0
+  axis = -1
+```
+
+Then output is `[4 x 3]`:
+
+    ```output =
+      [5.0 0.0 0.0]  // one_hot(0)
+      [0.0 0.0 5.0]  // one_hot(2)
+      [0.0 0.0 0.0]  // one_hot(-1)
+      [0.0 5.0 0.0]  // one_hot(1)
+    ```
+
+Suppose that
+
+```
+  indices = [0, 2, -1, 1]
+  depth = 3
+  on_value = 0.0
+  off_value = 3.0
+  axis = 0
+```
+
+Then output is `[3 x 4]`:
+
+    ```output =
+      [0.0 3.0 3.0 3.0]
+      [3.0 3.0 3.0 0.0]
+      [3.0 3.0 3.0 3.0]
+      [3.0 0.0 3.0 3.0]
+    //  ^                one_hot(0)
+    //      ^            one_hot(2)
+    //          ^        one_hot(-1)
+    //              ^    one_hot(1)
+    ```
+Suppose that
+
+```
+  indices = [[0, 2], [1, -1]]
+  depth = 3
+  on_value = 1.0
+  off_value = 0.0
+  axis = -1
+```
+
+Then output is `[2 x 2 x 3]`:
+
+    ```output =
+      [
+        [1.0, 0.0, 0.0]  // one_hot(0)
+        [0.0, 0.0, 1.0]  // one_hot(2)
+      ][
+        [0.0, 1.0, 0.0]  // one_hot(1)
+        [0.0, 0.0, 0.0]  // one_hot(-1)
+      ]```
+
+indices: A tensor of indices.
+depth: A scalar defining the depth of the one hot dimension.
+on_value: A scalar defining the value to fill in output when `indices[j] = i`.
+off_value: A scalar defining the value to fill in output when `indices[j] != i`.
+axis: The axis to fill (default: -1, a new inner-most axis).
+output: The one-hot tensor.
+)doc");
 
 // EXPERIMENTAL. DO NOT USE OR DEPEND ON THIS YET.
 REGISTER_OP("QuantizeAndDequantize")
@@ -2449,7 +4823,10 @@ REGISTER_OP("QuantizeAndDequantize")
     .Output("output: T")
     .Attr("T: {bfloat16, float, double}")
     .SetShapeFn(shape_inference::UnchangedShape)
-    .Deprecated(22, "Replaced by QuantizeAndDequantizeV2");
+    .Deprecated(22, "Replaced by QuantizeAndDequantizeV2")
+    .Doc(R"doc(
+Use QuantizeAndDequantizeV2 instead.
+)doc");
 
 // TODO(suharshs): Deprecate QuantizeAndDequantizeV2.
 REGISTER_OP("QuantizeAndDequantizeV2")
@@ -2467,7 +4844,69 @@ REGISTER_OP("QuantizeAndDequantizeV2")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
       c->set_output(0, c->input(0));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Quantizes then dequantizes a tensor.
+
+This op simulates the precision loss from the quantized forward pass by:
+1. Quantizing the tensor to fixed point numbers, which should match the target
+   quantization method when it is used in inference.
+2. Dequantizing it back to floating point numbers for the following ops, most
+   likely matmul.
+
+There are different ways to quantize. This version does not use the full range
+of the output type, choosing to elide the lowest possible value for symmetry
+(e.g., output range is -127 to 127, not -128 to 127 for signed 8 bit
+quantization), so that 0.0 maps to 0.
+
+To perform this op, we first find the range of values in our tensor. The range
+we use is always centered on 0, so we find m such that
+
+1. m = max(abs(input_min), abs(input_max)) if range_given is true,
+2. m = max(abs(min_elem(input)), abs(max_elem(input))) otherwise.
+
+Our input tensor range is then [-m, m].
+
+Next, we choose our fixed-point quantization buckets, [min_fixed, max_fixed].
+If signed_input is true, this is
+
+  [min_fixed, max_fixed ] =
+      [-(1 << (num_bits - 1) - 1), (1 << (num_bits - 1)) - 1].
+
+Otherwise, if signed_input is false, the fixed-point range is
+
+  [min_fixed, max_fixed] = [0, (1 << num_bits) - 1].
+
+From this we compute our scaling factor, s:
+
+  s = (max_fixed - min_fixed) / (2 * m).
+
+Now we can quantize and dequantize the elements of our tensor.  An element e
+is transformed into e':
+
+  e' = (e * s).round_to_nearest() / s.
+
+Note that we have a different number of buckets in the signed vs. unsigned
+cases.  For example, if num_bits == 8, we get 254 buckets in the signed case
+vs. 255 in the unsigned case.
+
+For example, suppose num_bits = 8 and m = 1.  Then
+
+  [min_fixed, max_fixed] = [-127, 127], and
+  s = (127 + 127) / 2 = 127.
+
+Given the vector {-1, -0.5, 0, 0.3}, this is quantized to
+{-127, -63, 0, 38}, and dequantized to {-1, -63.0/127, 0, 38.0/127}.
+
+input: Tensor to quantize and then dequantize.
+signed_input: If the quantization is signed or unsigned.
+num_bits: The bitwidth of the quantization.
+range_given: If the range is given or should be computed from the tensor.
+input_min: If range_given, this is the min of the range, otherwise this input
+           will be ignored.
+input_max: If range_given, this is the max of the range, otherwise this input
+           will be ignored.
+)doc");
 
 REGISTER_OP("QuantizeAndDequantizeV3")
     .Input("input: T")
@@ -2485,7 +4924,13 @@ REGISTER_OP("QuantizeAndDequantizeV3")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
       c->set_output(0, c->input(0));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Quantizes then dequantizes a tensor.
+
+This is almost identical to QuantizeAndDequantizeV2, except that num_bits is a
+tensor, so its value can change during training.
+)doc");
 
 REGISTER_OP("QuantizeV2")
     .Input("input: float")
@@ -2507,7 +4952,110 @@ REGISTER_OP("QuantizeV2")
       c->set_output(1, c->Scalar());
       c->set_output(2, c->Scalar());
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Quantize the 'input' tensor of type float to 'output' tensor of type 'T'.
+
+[min_range, max_range] are scalar floats that specify the range for
+the 'input' data. The 'mode' attribute controls exactly which calculations are
+used to convert the float values to their quantized equivalents.  The
+'round_mode' attribute controls which rounding tie-breaking algorithm is used
+when rounding float values to their quantized equivalents.
+
+In 'MIN_COMBINED' mode, each value of the tensor will undergo the following:
+
+```
+out[i] = (in[i] - min_range) * range(T) / (max_range - min_range)
+if T == qint8, out[i] -= (range(T) + 1) / 2.0
+```
+here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`
+
+*MIN_COMBINED Mode Example*
+
+Assume the input is type float and has a possible range of [0.0, 6.0] and the
+output type is quint8 ([0, 255]). The min_range and max_range values should be
+specified as 0.0 and 6.0. Quantizing from float to quint8 will multiply each
+value of the input by 255/6 and cast to quint8.
+
+If the output type was qint8 ([-128, 127]), the operation will additionally
+subtract each value by 128 prior to casting, so that the range of values aligns
+with the range of qint8.
+
+If the mode is 'MIN_FIRST', then this approach is used:
+
+```
+num_discrete_values = 1 << (# of bits in T)
+range_adjust = num_discrete_values / (num_discrete_values - 1)
+range = (range_max - range_min) * range_adjust
+range_scale = num_discrete_values / range
+quantized = round(input * range_scale) - round(range_min * range_scale) +
+  numeric_limits<T>::min()
+quantized = max(quantized, numeric_limits<T>::min())
+quantized = min(quantized, numeric_limits<T>::max())
+```
+
+The biggest difference between this and MIN_COMBINED is that the minimum range
+is rounded first, before it's subtracted from the rounded value. With
+MIN_COMBINED, a small bias is introduced where repeated iterations of quantizing
+and dequantizing will introduce a larger and larger error.
+
+*SCALED mode Example*
+
+`SCALED` mode matches the quantization approach used in
+`QuantizeAndDequantize{V2|V3}`.
+
+If the mode is `SCALED`, we do not use the full range of the output type,
+choosing to elide the lowest possible value for symmetry (e.g., output range is
+-127 to 127, not -128 to 127 for signed 8 bit quantization), so that 0.0 maps to
+0.
+
+We first find the range of values in our tensor. The
+range we use is always centered on 0, so we find m such that
+```c++
+  m = max(abs(input_min), abs(input_max))
+```
+
+Our input tensor range is then `[-m, m]`.
+
+Next, we choose our fixed-point quantization buckets, `[min_fixed, max_fixed]`.
+If T is signed, this is
+```
+  num_bits = sizeof(T) * 8
+  [min_fixed, max_fixed] =
+      [-(1 << (num_bits - 1) - 1), (1 << (num_bits - 1)) - 1]
+```
+
+Otherwise, if T is unsigned, the fixed-point range is
+```
+  [min_fixed, max_fixed] = [0, (1 << num_bits) - 1]
+```
+
+From this we compute our scaling factor, s:
+```c++
+  s = (max_fixed - min_fixed) / (2 * m)
+```
+
+Now we can quantize the elements of our tensor:
+```c++
+result = round(input * s)
+```
+
+One thing to watch out for is that the operator may choose to adjust the
+requested minimum and maximum values slightly during the quantization process,
+so you should always use the output ports as the range for further calculations.
+For example, if the requested minimum and maximum values are close to equal,
+they will be separated by a small epsilon value to prevent ill-formed quantized
+buffers from being created. Otherwise, you can end up with buffers where all the
+quantized values map to the same float value, which causes problems for
+operations that have to perform further calculations on them.
+
+min_range: The minimum scalar value possibly produced for the input.
+max_range: The maximum scalar value possibly produced for the input.
+output: The quantized data produced from the float input.
+output_min: The actual minimum scalar value used for the output.
+output_max: The actual maximum scalar value used for the output.
+
+)doc");
 
 REGISTER_OP("Dequantize")
     .Input("input: T")
@@ -2522,7 +5070,88 @@ REGISTER_OP("Dequantize")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
       TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Dequantize the 'input' tensor into a float Tensor.
+
+[min_range, max_range] are scalar floats that specify the range for
+the 'input' data. The 'mode' attribute controls exactly which calculations are
+used to convert the float values to their quantized equivalents.
+
+In 'MIN_COMBINED' mode, each value of the tensor will undergo the following:
+
+```
+if T == qint8, in[i] += (range(T) + 1)/ 2.0
+out[i] = min_range + (in[i]* (max_range - min_range) / range(T))
+```
+here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`
+
+*MIN_COMBINED Mode Example*
+
+If the input comes from a QuantizedRelu6, the output type is
+quint8 (range of 0-255) but the possible range of QuantizedRelu6 is
+0-6.  The min_range and max_range values are therefore 0.0 and 6.0.
+Dequantize on quint8 will take each value, cast to float, and multiply
+by 6 / 255.
+Note that if quantizedtype is qint8, the operation will additionally add
+each value by 128 prior to casting.
+
+If the mode is 'MIN_FIRST', then this approach is used:
+
+```c++
+num_discrete_values = 1 << (# of bits in T)
+range_adjust = num_discrete_values / (num_discrete_values - 1)
+range = (range_max - range_min) * range_adjust
+range_scale = range / num_discrete_values
+const double offset_input = static_cast<double>(input) - lowest_quantized;
+result = range_min + ((input - numeric_limits<T>::min()) * range_scale)
+```
+
+*SCALED mode Example*
+
+`SCALED` mode matches the quantization approach used in
+`QuantizeAndDequantize{V2|V3}`.
+
+If the mode is `SCALED`, we do not use the full range of the output type,
+choosing to elide the lowest possible value for symmetry (e.g., output range is
+-127 to 127, not -128 to 127 for signed 8 bit quantization), so that 0.0 maps to
+0.
+
+We first find the range of values in our tensor. The
+range we use is always centered on 0, so we find m such that
+```c++
+  m = max(abs(input_min), abs(input_max))
+```
+
+Our input tensor range is then `[-m, m]`.
+
+Next, we choose our fixed-point quantization buckets, `[min_fixed, max_fixed]`.
+If T is signed, this is
+```
+  num_bits = sizeof(T) * 8
+  [min_fixed, max_fixed] =
+      [-(1 << (num_bits - 1) - 1), (1 << (num_bits - 1)) - 1]
+```
+
+Otherwise, if T is unsigned, the fixed-point range is
+```
+  [min_fixed, max_fixed] = [0, (1 << num_bits) - 1]
+```
+
+From this we compute our scaling factor, s:
+```c++
+  s = (2 * m) / (max_fixed - min_fixed)
+```
+
+Now we can dequantize the elements of our tensor:
+```c++
+result = input * s
+```
+
+min_range: The minimum scalar value possibly produced for the input.
+max_range: The maximum scalar value possibly produced for the input.
+
+)doc");
 
 REGISTER_OP("QuantizedConcat")
     .Input("concat_dim: int32")
@@ -2544,7 +5173,22 @@ REGISTER_OP("QuantizedConcat")
       c->set_output(1, c->Scalar());
       c->set_output(2, c->Scalar());
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Concatenates quantized tensors along one dimension.
+
+concat_dim: 0-D.  The dimension along which to concatenate.  Must be in the
+  range [0, rank(values)).
+values: The `N` Tensors to concatenate. Their ranks and types must match,
+  and their sizes must match in all dimensions except `concat_dim`.
+input_mins: The minimum scalar values for each of the input tensors.
+input_maxes: The maximum scalar values for each of the input tensors.
+output_min: The float value that the minimum quantized output value represents.
+output_max: The float value that the maximum quantized output value represents.
+output: A `Tensor` with the concatenation of values stacked along the
+  `concat_dim` dimension.  This tensor's shape matches that of `values` except
+  in `concat_dim` where it has the sum of the sizes.
+)doc");
 
 REGISTER_OP("QuantizedReshape")
     .Input("tensor: T")
@@ -2564,7 +5208,17 @@ REGISTER_OP("QuantizedReshape")
       c->set_output(1, c->Scalar());
       c->set_output(2, c->Scalar());
       return Status::OK();
-    });
+    })
+    .Doc(R"Doc(
+Reshapes a quantized tensor as per the Reshape op.
+```
+
+shape: Defines the shape of the output tensor.
+input_min: The minimum value of the input.
+input_max: The maximum value of the input.
+output_min: This value is copied from input_min.
+output_max: This value is copied from input_max.
+)Doc");
 
 REGISTER_OP("QuantizedInstanceNorm")
     .Input("x: T")
@@ -2592,7 +5246,24 @@ REGISTER_OP("QuantizedInstanceNorm")
       c->set_output(1, c->Scalar());
       c->set_output(2, c->Scalar());
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Quantized Instance normalization.
+
+x: A 4D input Tensor.
+x_min: The value represented by the lowest quantized input.
+x_max: The value represented by the highest quantized input.
+y: A 4D Tensor.
+y_min: The value represented by the lowest quantized output.
+y_max: The value represented by the highest quantized output.
+output_range_given: If True, `given_y_min` and `given_y_min`
+  and `given_y_max` are used as the output range. Otherwise,
+  the implementation computes the output range.
+given_y_min: Output in `y_min` if `output_range_given` is True.
+given_y_max: Output in `y_max` if `output_range_given` is True.
+variance_epsilon: A small float number to avoid dividing by 0.
+min_separation: Minimum value of `y_max - y_min`
+)doc");
 
 namespace {
 
@@ -2666,7 +5337,88 @@ REGISTER_OP("ScatterNd")
     .Output("output: T")
     .Attr("T: type")
     .Attr("Tindices: {int32, int64}")
-    .SetShapeFn(ScatterNdShape);
+    .SetShapeFn(ScatterNdShape)
+    .Doc(R"doc(
+Scatter `updates` into a new (initially zero) tensor according to `indices`.
+
+Creates a new tensor by applying sparse `updates` to individual
+values or slices within a zero tensor of the given `shape` according to
+indices.  This operator is the inverse of the @{tf.gather_nd} operator which
+extracts values or slices from a given tensor.
+
+**WARNING**: The order in which updates are applied is nondeterministic, so the
+output will be nondeterministic if `indices` contains duplicates.
+
+`indices` is an integer tensor containing indices into a new tensor of shape
+`shape`.  The last dimension of `indices` can be at most the rank of `shape`:
+
+    indices.shape[-1] <= shape.rank
+
+The last dimension of `indices` corresponds to indices into elements
+(if `indices.shape[-1] = shape.rank`) or slices
+(if `indices.shape[-1] < shape.rank`) along dimension `indices.shape[-1]` of
+`shape`.  `updates` is a tensor with shape
+
+    indices.shape[:-1] + shape[indices.shape[-1]:]
+
+The simplest form of scatter is to insert individual elements in a tensor by
+index. For example, say we want to insert 4 scattered elements in a rank-1
+tensor with 8 elements.
+
+<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="https://www.tensorflow.org/images/ScatterNd1.png" alt>
+</div>
+
+In Python, this scatter operation would look like this:
+
+```python
+    indices = tf.constant([[4], [3], [1], [7]])
+    updates = tf.constant([9, 10, 11, 12])
+    shape = tf.constant([8])
+    scatter = tf.scatter_nd(indices, updates, shape)
+    with tf.Session() as sess:
+      print(sess.run(scatter))
+```
+
+The resulting tensor would look like this:
+
+    [0, 11, 0, 10, 9, 0, 0, 12]
+
+We can also, insert entire slices of a higher rank tensor all at once. For
+example, if we wanted to insert two slices in the first dimension of a
+rank-3 tensor with two matrices of new values.
+
+<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="https://www.tensorflow.org/images/ScatterNd2.png" alt>
+</div>
+
+In Python, this scatter operation would look like this:
+
+```python
+    indices = tf.constant([[0], [2]])
+    updates = tf.constant([[[5, 5, 5, 5], [6, 6, 6, 6],
+                            [7, 7, 7, 7], [8, 8, 8, 8]],
+                           [[5, 5, 5, 5], [6, 6, 6, 6],
+                            [7, 7, 7, 7], [8, 8, 8, 8]]])
+    shape = tf.constant([4, 4, 4])
+    scatter = tf.scatter_nd(indices, updates, shape)
+    with tf.Session() as sess:
+      print(sess.run(scatter))
+```
+
+The resulting tensor would look like this:
+
+    [[[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]],
+     [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
+     [[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]],
+     [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]]
+
+indices: Index tensor.
+updates: Updates to scatter into output.
+shape: 1-D. The shape of the resulting tensor.
+output: A new tensor with the given shape and updates applied according
+  to the indices.
+)doc");
 
 REGISTER_OP("ScatterNdNonAliasingAdd")
     .Input("input: T")
@@ -2675,7 +5427,53 @@ REGISTER_OP("ScatterNdNonAliasingAdd")
     .Output("output: T")
     .Attr("T: numbertype")
     .Attr("Tindices: {int32, int64}")
-    .SetShapeFn(shape_inference::ScatterNdUpdateShape);
+    .SetShapeFn(shape_inference::ScatterNdUpdateShape)
+    .Doc(R"doc(
+Applies sparse addition to `input` using individual values or slices
+from `updates` according to indices `indices`.  The updates are non-aliasing:
+`input` is only modified in-place if no other operations will use it.
+Otherwise, a copy of `input` is made.  This operation has a gradient with
+respect to both `input` and `updates`.
+
+`input` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
+
+`indices` must be integer tensor, containing indices into `input`.
+It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.
+
+The innermost dimension of `indices` (with length `K`) corresponds to
+indices into elements (if `K = P`) or `(P-K)`-dimensional slices
+(if `K < P`) along the `K`th dimension of `input`.
+
+`updates` is `Tensor` of rank `Q-1+P-K` with shape:
+
+```
+[d_0, ..., d_{Q-2}, input.shape[K], ..., input.shape[P-1]].
+```
+
+For example, say we want to add 4 scattered elements to a rank-1 tensor to 8
+elements. In Python, that addition would look like this:
+
+    input = tf.constant([1, 2, 3, 4, 5, 6, 7, 8])
+    indices = tf.constant([[4], [3], [1], [7]])
+    updates = tf.constant([9, 10, 11, 12])
+    output = tf.scatter_nd_non_aliasing_add(input, indices, updates)
+    with tf.Session() as sess:
+      print(sess.run(output))
+
+The resulting value `output` would look like this:
+
+    [1, 13, 3, 14, 14, 6, 7, 20]
+
+See @{tf.scatter_nd} for more details about how to make updates to slices.
+
+input: A Tensor.
+indices: A Tensor. Must be one of the following types: `int32`, `int64`.
+  A tensor of indices into `input`.
+updates: A Tensor. Must have the same type as ref. A tensor of updated values
+  to add to `input`.
+output: A `Tensor` with the same shape as `input`, containing values of `input`
+  updated with `updates`.
+)doc");
 
 REGISTER_OP("FakeQuantWithMinMaxArgs")
     .Attr("min: float = -6.0")
@@ -2684,7 +5482,18 @@ REGISTER_OP("FakeQuantWithMinMaxArgs")
     .Attr("narrow_range: bool = false")
     .Input("inputs: float")
     .Output("outputs: float")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Fake-quantize the 'inputs' tensor, type float to 'outputs' tensor of same type.
+
+Attributes `[min; max]` define the clamping range for the `inputs` data.
+`inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]`
+when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and
+then de-quantized and output as floats in `[min; max]` interval.
+`num_bits` is the bitwidth of the quantization; between 2 and 8, inclusive.
+
+Quantization is called fake since the output is still in floating point.
+)doc");
 
 REGISTER_OP("FakeQuantWithMinMaxArgsGradient")
     .Attr("min: float = -6.0")
@@ -2694,7 +5503,15 @@ REGISTER_OP("FakeQuantWithMinMaxArgsGradient")
     .Input("gradients: float")
     .Input("inputs: float")
     .Output("backprops: float")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Compute gradients for a FakeQuantWithMinMaxArgs operation.
+
+gradients: Backpropagated gradients above the FakeQuantWithMinMaxArgs operation.
+inputs: Values passed as inputs to the FakeQuantWithMinMaxArgs operation.
+backprops: Backpropagated gradients below the FakeQuantWithMinMaxArgs operation:
+  `gradients * (inputs >= min && inputs <= max)`.
+)doc");
 
 REGISTER_OP("FakeQuantWithMinMaxVars")
     .Attr("num_bits: int = 8")
@@ -2709,7 +5526,20 @@ REGISTER_OP("FakeQuantWithMinMaxVars")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
       TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Fake-quantize the 'inputs' tensor of type float via global float scalars `min`
+and `max` to 'outputs' tensor of same shape as `inputs`.
+
+`[min; max]` define the clamping range for the `inputs` data.
+`inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]`
+when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and
+then de-quantized and output as floats in `[min; max]` interval.
+`num_bits` is the bitwidth of the quantization; between 2 and 8, inclusive.
+
+This operation has a gradient and thus allows for training `min` and `max`
+values.
+)doc");
 
 REGISTER_OP("FakeQuantWithMinMaxVarsGradient")
     .Attr("num_bits: int = 8")
@@ -2735,7 +5565,22 @@ REGISTER_OP("FakeQuantWithMinMaxVarsGradient")
       c->set_output(1, min_max);
       c->set_output(2, min_max);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Compute gradients for a FakeQuantWithMinMaxVars operation.
+
+gradients: Backpropagated gradients above the FakeQuantWithMinMaxVars operation.
+inputs: Values passed as inputs to the FakeQuantWithMinMaxVars operation.
+min, max: Quantization interval, scalar floats.
+num_bits: The bitwidth of the quantization; between 2 and 8, inclusive.
+narrow_range: Whether to quantize into 2^num_bits - 1 distinct values.
+backprops_wrt_input: Backpropagated gradients w.r.t. inputs:
+  `gradients * (inputs >= min && inputs <= max)`.
+backprop_wrt_min: Backpropagated gradients w.r.t. min parameter:
+  `sum(gradients * (inputs < min))`.
+backprop_wrt_max: Backpropagated gradients w.r.t. max parameter:
+  `sum(gradients * (inputs > max))`.
+)doc");
 
 REGISTER_OP("FakeQuantWithMinMaxVarsPerChannel")
     .Attr("num_bits: int = 8")
@@ -2757,7 +5602,21 @@ REGISTER_OP("FakeQuantWithMinMaxVarsPerChannel")
 
       c->set_output(0, input);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Fake-quantize the 'inputs' tensor of type float and one of the shapes: `[d]`,
+`[b, d]` `[b, h, w, d]` via per-channel floats `min` and `max` of shape `[d]`
+to 'outputs' tensor of same shape as `inputs`.
+
+`[min; max]` define the clamping range for the `inputs` data.
+`inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]`
+when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and
+then de-quantized and output as floats in `[min; max]` interval.
+`num_bits` is the bitwidth of the quantization; between 2 and 8, inclusive.
+
+This operation has a gradient and thus allows for training `min` and `max`
+values.
+)doc");
 
 REGISTER_OP("FakeQuantWithMinMaxVarsPerChannelGradient")
     .Attr("num_bits: int = 8")
@@ -2786,7 +5645,25 @@ REGISTER_OP("FakeQuantWithMinMaxVarsPerChannelGradient")
       c->set_output(1, min_max);
       c->set_output(2, min_max);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Compute gradients for a FakeQuantWithMinMaxVarsPerChannel operation.
+
+gradients: Backpropagated gradients above the FakeQuantWithMinMaxVars operation,
+  shape one of: `[d]`, `[b, d]`,  `[b, h, w, d]`.
+inputs: Values passed as inputs to the FakeQuantWithMinMaxVars operation, shape
+  same as `gradients`.
+min, max: Quantization interval, floats of shape `[d]`.
+num_bits: The bitwidth of the quantization; between 2 and 8, inclusive.
+narrow_range: Whether to quantize into 2^num_bits - 1 distinct values.
+backprops_wrt_input: Backpropagated gradients w.r.t. inputs, shape same as
+  `inputs`:
+    `gradients * (inputs >= min && inputs <= max)`.
+backprop_wrt_min: Backpropagated gradients w.r.t. min parameter, shape `[d]`:
+  `sum_per_d(gradients * (inputs < min))`.
+backprop_wrt_max: Backpropagated gradients w.r.t. max parameter, shape `[d]`:
+  `sum_per_d(gradients * (inputs > max))`.
+)doc");
 
 #ifdef INTEL_MKL
 REGISTER_OP("_MklConcat")
diff --git a/tensorflow/core/ops/audio_ops.cc b/tensorflow/core/ops/audio_ops.cc
index bcc46761c1..d944e385a8 100644
--- a/tensorflow/core/ops/audio_ops.cc
+++ b/tensorflow/core/ops/audio_ops.cc
@@ -128,13 +128,52 @@ REGISTER_OP("DecodeWav")
     .Attr("desired_samples: int = -1")
     .Output("audio: float")
     .Output("sample_rate: int32")
-    .SetShapeFn(DecodeWavShapeFn);
+    .SetShapeFn(DecodeWavShapeFn)
+    .Doc(R"doc(
+Decode a 16-bit PCM WAV file to a float tensor.
+
+The -32768 to 32767 signed 16-bit values will be scaled to -1.0 to 1.0 in float.
+
+When desired_channels is set, if the input contains fewer channels than this
+then the last channel will be duplicated to give the requested number, else if
+the input has more channels than requested then the additional channels will be
+ignored.
+
+If desired_samples is set, then the audio will be cropped or padded with zeroes
+to the requested length.
+
+The first output contains a Tensor with the content of the audio samples. The
+lowest dimension will be the number of channels, and the second will be the
+number of samples. For example, a ten-sample-long stereo WAV file should give an
+output shape of [10, 2].
+
+contents: The WAV-encoded audio, usually from a file.
+desired_channels: Number of sample channels wanted.
+desired_samples: Length of audio requested.
+audio: 2-D with shape `[length, channels]`.
+sample_rate: Scalar holding the sample rate found in the WAV header.
+)doc");
 
 REGISTER_OP("EncodeWav")
     .Input("audio: float")
     .Input("sample_rate: int32")
     .Output("contents: string")
-    .SetShapeFn(EncodeWavShapeFn);
+    .SetShapeFn(EncodeWavShapeFn)
+    .Doc(R"doc(
+Encode audio data using the WAV file format.
+
+This operation will generate a string suitable to be saved out to create a .wav
+audio file. It will be encoded in the 16-bit PCM format. It takes in float
+values in the range -1.0f to 1.0f, and any outside that value will be clamped to
+that range.
+
+`audio` is a 2-D float Tensor of shape `[length, channels]`.
+`sample_rate` is a scalar Tensor holding the rate to use (e.g. 44100).
+
+audio: 2-D with shape `[length, channels]`.
+sample_rate: Scalar containing the sample frequency.
+contents: 0-D. WAV-encoded file contents.
+)doc");
 
 REGISTER_OP("AudioSpectrogram")
     .Input("input: float")
@@ -142,7 +181,44 @@ REGISTER_OP("AudioSpectrogram")
     .Attr("stride: int")
     .Attr("magnitude_squared: bool = false")
     .Output("spectrogram: float")
-    .SetShapeFn(SpectrogramShapeFn);
+    .SetShapeFn(SpectrogramShapeFn)
+    .Doc(R"doc(
+Produces a visualization of audio data over time.
+
+Spectrograms are a standard way of representing audio information as a series of
+slices of frequency information, one slice for each window of time. By joining
+these together into a sequence, they form a distinctive fingerprint of the sound
+over time.
+
+This op expects to receive audio data as an input, stored as floats in the range
+-1 to 1, together with a window width in samples, and a stride specifying how
+far to move the window between slices. From this it generates a three
+dimensional output. The lowest dimension has an amplitude value for each
+frequency during that time slice. The next dimension is time, with successive
+frequency slices. The final dimension is for the channels in the input, so a
+stereo audio input would have two here for example.
+
+This means the layout when converted and saved as an image is rotated 90 degrees
+clockwise from a typical spectrogram. Time is descending down the Y axis, and
+the frequency decreases from left to right.
+
+Each value in the result represents the square root of the sum of the real and
+imaginary parts of an FFT on the current window of samples. In this way, the
+lowest dimension represents the power of each frequency in the current window,
+and adjacent windows are concatenated in the next dimension.
+
+To get a more intuitive and visual look at what this operation does, you can run
+tensorflow/examples/wav_to_spectrogram to read in an audio file and save out the
+resulting spectrogram as a PNG image.
+
+input: Float representation of audio data.
+window_size: How wide the input window is in samples. For the highest efficiency
+  this should be a power of two, but other values are accepted.
+stride: How widely apart the center of adjacent sample windows should be.
+magnitude_squared: Whether to return the squared magnitude or just the
+  magnitude. Using squared magnitude can avoid extra calculations.
+spectrogram: 3D representation of the audio frequencies as an image.
+)doc");
 
 REGISTER_OP("Mfcc")
     .Input("spectrogram: float")
@@ -152,6 +228,26 @@ REGISTER_OP("Mfcc")
     .Attr("filterbank_channel_count: int = 40")
     .Attr("dct_coefficient_count: int = 13")
     .Output("output: float")
-    .SetShapeFn(MfccShapeFn);
+    .SetShapeFn(MfccShapeFn)
+    .Doc(R"doc(
+Transforms a spectrogram into a form that's useful for speech recognition.
+
+Mel Frequency Cepstral Coefficients are a way of representing audio data that's
+been effective as an input feature for machine learning. They are created by
+taking the spectrum of a spectrogram (a 'cepstrum'), and discarding some of the
+higher frequencies that are less significant to the human ear. They have a long
+history in the speech recognition world, and https://en.wikipedia.org/wiki/Mel-frequency_cepstrum
+is a good resource to learn more.
+
+spectrogram: Typically produced by the Spectrogram op, with magnitude_squared
+  set to true.
+sample_rate: How many samples per second the source audio used.
+upper_frequency_limit: The highest frequency to use when calculating the
+  ceptstrum.
+lower_frequency_limit: The lowest frequency to use when calculating the
+  ceptstrum.
+filterbank_channel_count: Resolution of the Mel bank used internally.
+dct_coefficient_count: How many output channels to produce per time slice.
+)doc");
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/bitwise_ops.cc b/tensorflow/core/ops/bitwise_ops.cc
index 694e79b13e..2889953bdb 100644
--- a/tensorflow/core/ops/bitwise_ops.cc
+++ b/tensorflow/core/ops/bitwise_ops.cc
@@ -24,7 +24,13 @@ REGISTER_OP("Invert")
     .Input("x: T")
     .Output("y: T")
     .Attr("T: {int8, int16, int32, int64, uint8, uint16, uint32, uint64}")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Flips all bits elementwise.
+
+The result will have exactly those bits set, that are not set in `x`. The
+computation is performed on the underlying representation of x.
+)doc");
 
 #define BINARY_BITWISE()                                                     \
   Input("x: T")                                                              \
@@ -38,16 +44,64 @@ REGISTER_OP("PopulationCount")
     .Input("x: T")
     .Output("y: uint8")
     .Attr("T: {int8, int16, int32, int64, uint8, uint16, uint32, uint64}")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Computes element-wise population count (a.k.a. popcount, bitsum, bitcount).
+
+For each entry in `x`, calculates the number of `1` (on) bits in the binary
+representation of that entry.
+
+**NOTE**: It is more efficient to first `tf.bitcast` your tensors into
+`int32` or `int64` and perform the bitcount on the result, than to feed in
+8- or 16-bit inputs and then aggregate the resulting counts.
+)doc");
+
+REGISTER_OP("BitwiseAnd")
+    .BINARY_BITWISE()
+    .Doc(R"doc(
+Elementwise computes the bitwise AND of `x` and `y`.
+
+The result will have those bits set, that are set in both `x` and `y`. The
+computation is performed on the underlying representations of `x` and `y`.
+)doc");
+
+REGISTER_OP("BitwiseOr")
+    .BINARY_BITWISE()
+    .Doc(R"doc(
+Elementwise computes the bitwise OR of `x` and `y`.
+
+The result will have those bits set, that are set in `x`, `y` or both. The
+computation is performed on the underlying representations of `x` and `y`.
+)doc");
+
+REGISTER_OP("BitwiseXor")
+    .BINARY_BITWISE()
+    .Doc(R"doc(
+Elementwise computes the bitwise XOR of `x` and `y`.
+
+The result will have those bits set, that are different in `x` and `y`. The
+computation is performed on the underlying representations of `x` and `y`.
+)doc");
 
-REGISTER_OP("BitwiseAnd").BINARY_BITWISE();
+REGISTER_OP("LeftShift")
+    .BINARY_BITWISE()
+    .Doc(R"doc(
+Elementwise computes the bitwise left-shift of `x` and `y`.
 
-REGISTER_OP("BitwiseOr").BINARY_BITWISE();
+If `y` is negative, or greater than or equal to the width of `x` in bits the
+result is implementation defined.
+)doc");
 
-REGISTER_OP("BitwiseXor").BINARY_BITWISE();
+REGISTER_OP("RightShift")
+    .BINARY_BITWISE()
+    .Doc(R"doc(
+Elementwise computes the bitwise right-shift of `x` and `y`.
 
-REGISTER_OP("LeftShift").BINARY_BITWISE();
+Performs a logical shift for unsigned integer types, and an arithmetic shift
+for signed integer types.
 
-REGISTER_OP("RightShift").BINARY_BITWISE();
+If `y` is negative, or greater than or equal to than the width of `x` in bits
+the result is implementation defined.
+)doc");
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/candidate_sampling_ops.cc b/tensorflow/core/ops/candidate_sampling_ops.cc
index 6e4d100b04..18700be67a 100644
--- a/tensorflow/core/ops/candidate_sampling_ops.cc
+++ b/tensorflow/core/ops/candidate_sampling_ops.cc
@@ -55,7 +55,42 @@ REGISTER_OP("UniformCandidateSampler")
     .Attr("seed: int = 0")
     .Attr("seed2: int = 0")
     .SetShapeFn(CandidateSamplerShapeFn)
-    .SetIsStateful();
+    .SetIsStateful()
+    .Doc(R"doc(
+Generates labels for candidate sampling with a uniform distribution.
+
+See explanations of candidate sampling and the data formats at
+go/candidate-sampling.
+
+For each batch, this op picks a single set of sampled candidate labels.
+
+The advantages of sampling candidates per-batch are simplicity and the
+possibility of efficient dense matrix multiplication. The disadvantage is that
+the sampled candidates must be chosen independently of the context and of the
+true labels.
+
+true_classes: A batch_size * num_true matrix, in which each row contains the
+  IDs of the num_true target_classes in the corresponding original label.
+sampled_candidates: A vector of length num_sampled, in which each element is
+  the ID of a sampled candidate.
+true_expected_count: A batch_size * num_true matrix, representing
+  the number of times each candidate is expected to occur in a batch
+  of sampled candidates. If unique=true, then this is a probability.
+sampled_expected_count: A vector of length num_sampled, for each sampled
+  candidate representing the number of times the candidate is expected
+  to occur in a batch of sampled candidates.  If unique=true, then this is a
+  probability.
+num_true: Number of true labels per context.
+num_sampled: Number of candidates to randomly sample.
+unique: If unique is true, we sample with rejection, so that all sampled
+  candidates in a batch are unique. This requires some approximation to
+  estimate the post-rejection sampling probabilities.
+range_max: The sampler will sample integers from the interval [0, range_max).
+seed: If either seed or seed2 are set to be non-zero, the random number
+  generator is seeded by the given seed.  Otherwise, it is seeded by a
+  random seed.
+seed2: An second seed to avoid seed collision.
+)doc");
 
 REGISTER_OP("LogUniformCandidateSampler")
     .Input("true_classes: int64")
@@ -69,7 +104,43 @@ REGISTER_OP("LogUniformCandidateSampler")
     .Attr("seed: int = 0")
     .Attr("seed2: int = 0")
     .SetShapeFn(CandidateSamplerShapeFn)
-    .SetIsStateful();
+    .SetIsStateful()
+    .Doc(R"doc(
+Generates labels for candidate sampling with a log-uniform distribution.
+
+See explanations of candidate sampling and the data formats at
+go/candidate-sampling.
+
+For each batch, this op picks a single set of sampled candidate labels.
+
+The advantages of sampling candidates per-batch are simplicity and the
+possibility of efficient dense matrix multiplication. The disadvantage is that
+the sampled candidates must be chosen independently of the context and of the
+true labels.
+
+
+true_classes: A batch_size * num_true matrix, in which each row contains the
+  IDs of the num_true target_classes in the corresponding original label.
+sampled_candidates: A vector of length num_sampled, in which each element is
+  the ID of a sampled candidate.
+true_expected_count: A batch_size * num_true matrix, representing
+  the number of times each candidate is expected to occur in a batch
+  of sampled candidates. If unique=true, then this is a probability.
+sampled_expected_count: A vector of length num_sampled, for each sampled
+  candidate representing the number of times the candidate is expected
+  to occur in a batch of sampled candidates.  If unique=true, then this is a
+  probability.
+num_true: Number of true labels per context.
+num_sampled: Number of candidates to randomly sample.
+unique: If unique is true, we sample with rejection, so that all sampled
+  candidates in a batch are unique. This requires some approximation to
+  estimate the post-rejection sampling probabilities.
+range_max: The sampler will sample integers from the interval [0, range_max).
+seed: If either seed or seed2 are set to be non-zero, the random number
+  generator is seeded by the given seed.  Otherwise, it is seeded by a
+  random seed.
+seed2: An second seed to avoid seed collision.
+)doc");
 
 REGISTER_OP("LearnedUnigramCandidateSampler")
     .Input("true_classes: int64")
@@ -83,7 +154,42 @@ REGISTER_OP("LearnedUnigramCandidateSampler")
     .Attr("seed: int = 0")
     .Attr("seed2: int = 0")
     .SetShapeFn(CandidateSamplerShapeFn)
-    .SetIsStateful();
+    .SetIsStateful()
+    .Doc(R"doc(
+Generates labels for candidate sampling with a learned unigram distribution.
+
+See explanations of candidate sampling and the data formats at
+go/candidate-sampling.
+
+For each batch, this op picks a single set of sampled candidate labels.
+
+The advantages of sampling candidates per-batch are simplicity and the
+possibility of efficient dense matrix multiplication. The disadvantage is that
+the sampled candidates must be chosen independently of the context and of the
+true labels.
+
+true_classes: A batch_size * num_true matrix, in which each row contains the
+  IDs of the num_true target_classes in the corresponding original label.
+sampled_candidates: A vector of length num_sampled, in which each element is
+  the ID of a sampled candidate.
+true_expected_count: A batch_size * num_true matrix, representing
+  the number of times each candidate is expected to occur in a batch
+  of sampled candidates. If unique=true, then this is a probability.
+sampled_expected_count: A vector of length num_sampled, for each sampled
+  candidate representing the number of times the candidate is expected
+  to occur in a batch of sampled candidates.  If unique=true, then this is a
+  probability.
+num_true: Number of true labels per context.
+num_sampled: Number of candidates to randomly sample.
+unique: If unique is true, we sample with rejection, so that all sampled
+  candidates in a batch are unique. This requires some approximation to
+  estimate the post-rejection sampling probabilities.
+range_max: The sampler will sample integers from the interval [0, range_max).
+seed: If either seed or seed2 are set to be non-zero, the random number
+  generator is seeded by the given seed.  Otherwise, it is seeded by a
+  random seed.
+seed2: An second seed to avoid seed collision.
+)doc");
 
 REGISTER_OP("ThreadUnsafeUnigramCandidateSampler")
     .Input("true_classes: int64")
@@ -97,7 +203,42 @@ REGISTER_OP("ThreadUnsafeUnigramCandidateSampler")
     .Attr("seed: int = 0")
     .Attr("seed2: int = 0")
     .SetShapeFn(CandidateSamplerShapeFn)
-    .SetIsStateful();
+    .SetIsStateful()
+    .Doc(R"doc(
+Generates labels for candidate sampling with a learned unigram distribution.
+
+See explanations of candidate sampling and the data formats at
+go/candidate-sampling.
+
+For each batch, this op picks a single set of sampled candidate labels.
+
+The advantages of sampling candidates per-batch are simplicity and the
+possibility of efficient dense matrix multiplication. The disadvantage is that
+the sampled candidates must be chosen independently of the context and of the
+true labels.
+
+true_classes: A batch_size * num_true matrix, in which each row contains the
+  IDs of the num_true target_classes in the corresponding original label.
+sampled_candidates: A vector of length num_sampled, in which each element is
+  the ID of a sampled candidate.
+true_expected_count: A batch_size * num_true matrix, representing
+  the number of times each candidate is expected to occur in a batch
+  of sampled candidates. If unique=true, then this is a probability.
+sampled_expected_count: A vector of length num_sampled, for each sampled
+  candidate representing the number of times the candidate is expected
+  to occur in a batch of sampled candidates.  If unique=true, then this is a
+  probability.
+num_true: Number of true labels per context.
+num_sampled: Number of candidates to randomly sample.
+unique: If unique is true, we sample with rejection, so that all sampled
+  candidates in a batch are unique. This requires some approximation to
+  estimate the post-rejection sampling probabilities.
+range_max: The sampler will sample integers from the interval [0, range_max).
+seed: If either seed or seed2 are set to be non-zero, the random number
+  generator is seeded by the given seed.  Otherwise, it is seeded by a
+  random seed.
+seed2: An second seed to avoid seed collision.
+)doc");
 
 REGISTER_OP("FixedUnigramCandidateSampler")
     .Input("true_classes: int64")
@@ -117,7 +258,70 @@ REGISTER_OP("FixedUnigramCandidateSampler")
     .Attr("seed: int = 0")
     .Attr("seed2: int = 0")
     .SetShapeFn(CandidateSamplerShapeFn)
-    .SetIsStateful();
+    .SetIsStateful()
+    .Doc(R"doc(
+Generates labels for candidate sampling with a learned unigram distribution.
+
+A unigram sampler could use a fixed unigram distribution read from a
+file or passed in as an in-memory array instead of building up the distribution
+from data on the fly. There is also an option to skew the distribution by
+applying a distortion power to the weights.
+
+The vocabulary file should be in CSV-like format, with the last field
+being the weight associated with the word.
+
+For each batch, this op picks a single set of sampled candidate labels.
+
+The advantages of sampling candidates per-batch are simplicity and the
+possibility of efficient dense matrix multiplication. The disadvantage is that
+the sampled candidates must be chosen independently of the context and of the
+true labels.
+
+true_classes: A batch_size * num_true matrix, in which each row contains the
+  IDs of the num_true target_classes in the corresponding original label.
+sampled_candidates: A vector of length num_sampled, in which each element is
+  the ID of a sampled candidate.
+true_expected_count: A batch_size * num_true matrix, representing
+  the number of times each candidate is expected to occur in a batch
+  of sampled candidates. If unique=true, then this is a probability.
+sampled_expected_count: A vector of length num_sampled, for each sampled
+  candidate representing the number of times the candidate is expected
+  to occur in a batch of sampled candidates.  If unique=true, then this is a
+  probability.
+num_true: Number of true labels per context.
+num_sampled: Number of candidates to randomly sample.
+unique: If unique is true, we sample with rejection, so that all sampled
+  candidates in a batch are unique. This requires some approximation to
+  estimate the post-rejection sampling probabilities.
+range_max: The sampler will sample integers from the interval [0, range_max).
+vocab_file: Each valid line in this file (which should have a CSV-like format)
+  corresponds to a valid word ID. IDs are in sequential order, starting from
+  num_reserved_ids. The last entry in each line is expected to be a value
+  corresponding to the count or relative probability. Exactly one of vocab_file
+  and unigrams needs to be passed to this op.
+distortion: The distortion is used to skew the unigram probability distribution.
+  Each weight is first raised to the distortion's power before adding to the
+  internal unigram distribution. As a result, distortion = 1.0 gives regular
+  unigram sampling (as defined by the vocab file), and distortion = 0.0 gives
+  a uniform distribution.
+num_reserved_ids: Optionally some reserved IDs can be added in the range [0,
+  ..., num_reserved_ids) by the users. One use case is that a special unknown
+  word token is used as ID 0. These IDs will have a sampling probability of 0.
+num_shards: A sampler can be used to sample from a subset of the original range
+  in order to speed up the whole computation through parallelism. This parameter
+  (together with 'shard') indicates the number of partitions that are being
+  used in the overall computation.
+shard: A sampler can be used to sample from a subset of the original range
+  in order to speed up the whole computation through parallelism. This parameter
+  (together with 'num_shards') indicates the particular partition number of a
+  sampler op, when partitioning is being used.
+unigrams: A list of unigram counts or probabilities, one per ID in sequential
+  order. Exactly one of vocab_file and unigrams should be passed to this op.
+seed: If either seed or seed2 are set to be non-zero, the random number
+  generator is seeded by the given seed.  Otherwise, it is seeded by a
+  random seed.
+seed2: An second seed to avoid seed collision.
+)doc");
 
 REGISTER_OP("AllCandidateSampler")
     .Input("true_classes: int64")
@@ -130,7 +334,41 @@ REGISTER_OP("AllCandidateSampler")
     .Attr("seed: int = 0")
     .Attr("seed2: int = 0")
     .SetShapeFn(CandidateSamplerShapeFn)
-    .SetIsStateful();
+    .SetIsStateful()
+    .Doc(R"doc(
+Generates labels for candidate sampling with a learned unigram distribution.
+
+See explanations of candidate sampling and the data formats at
+go/candidate-sampling.
+
+For each batch, this op picks a single set of sampled candidate labels.
+
+The advantages of sampling candidates per-batch are simplicity and the
+possibility of efficient dense matrix multiplication. The disadvantage is that
+the sampled candidates must be chosen independently of the context and of the
+true labels.
+
+true_classes: A batch_size * num_true matrix, in which each row contains the
+  IDs of the num_true target_classes in the corresponding original label.
+sampled_candidates: A vector of length num_sampled, in which each element is
+  the ID of a sampled candidate.
+true_expected_count: A batch_size * num_true matrix, representing
+  the number of times each candidate is expected to occur in a batch
+  of sampled candidates. If unique=true, then this is a probability.
+sampled_expected_count: A vector of length num_sampled, for each sampled
+  candidate representing the number of times the candidate is expected
+  to occur in a batch of sampled candidates.  If unique=true, then this is a
+  probability.
+num_true: Number of true labels per context.
+num_sampled: Number of candidates to produce.
+unique: If unique is true, we sample with rejection, so that all sampled
+  candidates in a batch are unique. This requires some approximation to
+  estimate the post-rejection sampling probabilities.
+seed: If either seed or seed2 are set to be non-zero, the random number
+  generator is seeded by the given seed.  Otherwise, it is seeded by a
+  random seed.
+seed2: An second seed to avoid seed collision.
+)doc");
 
 REGISTER_OP("ComputeAccidentalHits")
     .Input("true_classes: int64")
@@ -158,6 +396,27 @@ REGISTER_OP("ComputeAccidentalHits")
       c->set_output(1, v);
       c->set_output(2, v);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Computes the ids of the positions in sampled_candidates that match true_labels.
+
+When doing log-odds NCE, the result of this op should be passed through a
+SparseToDense op, then added to the logits of the sampled candidates. This has
+the effect of 'removing' the sampled labels that match the true labels by
+making the classifier sure that they are sampled labels.
+
+true_classes: The true_classes output of UnpackSparseLabels.
+sampled_candidates: The sampled_candidates output of CandidateSampler.
+indices: A vector of indices corresponding to rows of true_candidates.
+ids: A vector of IDs of positions in sampled_candidates that match a true_label
+  for the row with the corresponding index in indices.
+weights: A vector of the same length as indices and ids, in which each element
+  is -FLOAT_MAX.
+num_true: Number of true labels per context.
+seed: If either seed or seed2 are set to be non-zero, the random number
+  generator is seeded by the given seed.  Otherwise, it is seeded by a
+  random seed.
+seed2: An second seed to avoid seed collision.
+)doc");
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/checkpoint_ops.cc b/tensorflow/core/ops/checkpoint_ops.cc
index 5fe82e1653..08b00c8255 100644
--- a/tensorflow/core/ops/checkpoint_ops.cc
+++ b/tensorflow/core/ops/checkpoint_ops.cc
@@ -38,7 +38,49 @@ REGISTER_OP("GenerateVocabRemapping")
       c->set_output(0, c->Vector(num_new_vocab));
       c->set_output(1, c->Scalar());
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Given a path to new and old vocabulary files, returns a remapping Tensor of
+length `num_new_vocab`, where `remapping[i]` contains the row number in the old
+vocabulary that corresponds to row `i` in the new vocabulary (starting at line
+`new_vocab_offset` and up to `num_new_vocab` entities), or `-1` if entry `i`
+in the new vocabulary is not in the old vocabulary.  The old vocabulary is
+constrained to the first `old_vocab_size` entries if `old_vocab_size` is not the
+default value of -1.
+
+`num_vocab_offset` enables
+use in the partitioned variable case, and should generally be set through
+examining partitioning info.  The format of the files should be a text file,
+with each line containing a single entity within the vocabulary.
+
+For example, with `new_vocab_file` a text file containing each of the following
+elements on a single line: `[f0, f1, f2, f3]`, old_vocab_file = [f1, f0, f3],
+`num_new_vocab = 3, new_vocab_offset = 1`, the returned remapping would be
+`[0, -1, 2]`.
+
+The op also returns a count of how many entries in the new vocabulary
+were present in the old vocabulary, which is used to calculate the number of
+values to initialize in a weight matrix remapping
+
+This functionality can be used to remap both row vocabularies (typically,
+features) and column vocabularies (typically, classes) from TensorFlow
+checkpoints.  Note that the partitioning logic relies on contiguous vocabularies
+corresponding to div-partitioned variables.  Moreover, the underlying remapping
+uses an IndexTable (as opposed to an inexact CuckooTable), so client code should
+use the corresponding index_table_from_file() as the FeatureColumn framework
+does (as opposed to tf.feature_to_id(), which uses a CuckooTable).
+
+new_vocab_file: Path to the new vocab file.
+old_vocab_file: Path to the old vocab file.
+new_vocab_offset: How many entries into the new vocab file to start reading.
+num_new_vocab: Number of entries in the new vocab file to remap.
+old_vocab_size: Number of entries in the old vocab file to consider.  If -1,
+  use the entire old vocabulary.
+remapping: A Tensor of length num_new_vocab where the element at index i
+  is equal to the old ID that maps to the new ID i.  This element is -1 for any
+  new ID that is not found in the old vocabulary.
+num_present: Number of new vocab entries found in old vocab.
+)doc");
 
 REGISTER_OP("LoadAndRemapMatrix")
     .Input("ckpt_path: string")
@@ -67,5 +109,63 @@ REGISTER_OP("LoadAndRemapMatrix")
 
       c->set_output(0, c->Matrix(num_rows, num_cols));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Loads a 2-D (matrix) `Tensor` with name `old_tensor_name` from the checkpoint
+at `ckpt_path` and potentially reorders its rows and columns using the
+specified remappings.
+
+Most users should use one of the wrapper initializers (such as
+`tf.contrib.framework.load_and_remap_matrix_initializer`) instead of this
+function directly.
+
+The remappings are 1-D tensors with the following properties:
+
+* `row_remapping` must have exactly `num_rows` entries. Row `i` of the output
+  matrix will be initialized from the row corresponding to index
+  `row_remapping[i]` in the old `Tensor` from the checkpoint.
+* `col_remapping` must have either 0 entries (indicating that no column
+  reordering is needed) or `num_cols` entries. If specified, column `j` of the
+  output matrix will be initialized from the column corresponding to index
+  `col_remapping[j]` in the old `Tensor` from the checkpoint.
+* A value of -1 in either of the remappings signifies a "missing" entry. In that
+  case, values from the `initializing_values` tensor will be used to fill that
+  missing row or column. If `row_remapping` has `r` missing entries and
+  `col_remapping` has `c` missing entries, then the following condition must be
+  true:
+
+`(r * num_cols) + (c * num_rows) - (r * c) == len(initializing_values)`
+
+The remapping tensors can be generated using the GenerateVocabRemapping op.
+
+As an example, with row_remapping = [1, 0, -1], col_remapping = [0, 2, -1],
+initializing_values = [0.5, -0.5, 0.25, -0.25, 42], and w(i, j) representing
+the value from row i, column j of the old tensor in the checkpoint, the output
+matrix will look like the following:
+
+[[w(1, 0),  w(1, 2),  0.5],
+ [w(0, 0),  w(0, 2), -0.5],
+ [0.25,    -0.25,      42]]
+
+ckpt_path: Path to the TensorFlow checkpoint (version 2, `TensorBundle`) from
+  which the old matrix `Tensor` will be loaded.
+old_tensor_name: Name of the 2-D `Tensor` to load from checkpoint.
+row_remapping: An int `Tensor` of row remappings (generally created by
+  `generate_vocab_remapping`).  Even if no row remapping is needed, this must
+  still be an index-valued Tensor (e.g. [0, 1, 2, ...]), or a shifted
+  index-valued `Tensor` (e.g. [8, 9, 10, ...], for partitioned `Variables`).
+col_remapping: An int `Tensor` of column remappings (generally created by
+  `generate_vocab_remapping`).  May be a size-0 `Tensor` if only row remapping
+  is to be done (e.g. column ordering is the same).
+initializing_values: A float `Tensor` containing  values to fill in for cells
+  in the output matrix that are not loaded from the checkpoint. Length must be
+  exactly the same as the number of missing / new cells.
+num_rows: Number of rows (length of the 1st dimension) in the output matrix.
+num_cols: Number of columns (length of the 2nd dimension) in the output matrix.
+max_rows_in_memory: The maximum number of rows to load from the checkpoint at
+  once. If less than or equal to 0, the entire matrix will be loaded into
+  memory. Setting this arg trades increased disk reads for lower memory usage.
+output_matrix: Output matrix containing existing values loaded from the
+  checkpoint, and with any missing values filled in from initializing_values.
+)doc");
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/control_flow_ops.cc b/tensorflow/core/ops/control_flow_ops.cc
index 81e9fcfa95..61089658d7 100644
--- a/tensorflow/core/ops/control_flow_ops.cc
+++ b/tensorflow/core/ops/control_flow_ops.cc
@@ -47,7 +47,20 @@ REGISTER_OP("Switch")
     .Output("output_false: T")
     .Output("output_true: T")
     .Attr("T: type")
-    .SetShapeFn(SwitchShape);
+    .SetShapeFn(SwitchShape)
+    .Doc(R"doc(
+Forwards `data` to the output port determined by `pred`.
+
+If `pred` is true, the `data` input is forwarded to `output_true`. Otherwise,
+the data goes to `output_false`.
+
+See also `RefSwitch` and `Merge`.
+
+data: The tensor to be forwarded to the appropriate output.
+pred: A scalar that specifies which output port will receive data.
+output_false: If `pred` is false, data will be forwarded to this output.
+output_true: If `pred` is true, data will be forwarded to this output.
+)doc");
 
 REGISTER_OP("RefSwitch")
     .Input("data: Ref(T)")
@@ -56,7 +69,20 @@ REGISTER_OP("RefSwitch")
     .Output("output_true: Ref(T)")
     .Attr("T: type")
     .SetAllowsUninitializedInput()
-    .SetShapeFn(SwitchShape);
+    .SetShapeFn(SwitchShape)
+    .Doc(R"doc(
+Forwards the ref tensor `data` to the output port determined by `pred`.
+
+If `pred` is true, the `data` input is forwarded to `output_true`. Otherwise,
+the data goes to `output_false`.
+
+See also `Switch` and `Merge`.
+
+data: The ref tensor to be forwarded to the appropriate output.
+pred: A scalar that specifies which output port will receive data.
+output_false: If `pred` is false, data will be forwarded to this output.
+output_true: If `pred` is true, data will be forwarded to this output.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("RefSelect")
@@ -84,7 +110,14 @@ REGISTER_OP("RefSelect")
       }
       c->set_output(0, first_input);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Forwards the `index`th element of `inputs` to `output`.
+
+index: A scalar that determines the input that gets selected.
+inputs: A list of ref tensors, one of which will be forwarded to `output`.
+output: The forwarded tensor.
+)doc");
 
 // --------------------------------------------------------------------------
 namespace {
@@ -120,7 +153,20 @@ REGISTER_OP("Merge")
     .Output("value_index: int32")
     .Attr("T: type")
     .Attr("N: int >= 1")
-    .SetShapeFn(MergeShape);
+    .SetShapeFn(MergeShape)
+    .Doc(R"doc(
+Forwards the value of an available tensor from `inputs` to `output`.
+
+`Merge` waits for at least one of the tensors in `inputs` to become available.
+It is usually combined with `Switch` to implement branching.
+
+`Merge` forwards the first tensor to become available to `output`, and sets
+`value_index` to its index in `inputs`.
+
+inputs: The input tensors, exactly one of which will become available.
+output: Will be set to the available input tensor.
+value_index: The index of the chosen input tensor in `inputs`.
+)doc");
 
 REGISTER_OP("RefMerge")
     .Input("inputs: Ref(N * T)")
@@ -128,7 +174,20 @@ REGISTER_OP("RefMerge")
     .Output("value_index: int32")
     .Attr("T: type")
     .Attr("N: int >= 1")
-    .SetShapeFn(MergeShape);
+    .SetShapeFn(MergeShape)
+    .Doc(R"doc(
+Forwards the value of an available tensor from `inputs` to `output`.
+
+`Merge` waits for at least one of the tensors in `inputs` to become available.
+It is usually combined with `Switch` to implement branching.
+
+`Merge` forwards the first tensor for become available to `output`, and sets
+`value_index` to its index in `inputs`.
+
+inputs: The input tensors, exactly one of which will become available.
+output: Will be set to the available input tensor.
+value_index: The index of the chosen input tensor in `inputs`.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Enter")
@@ -155,7 +214,22 @@ REGISTER_OP("Enter")
       }
 
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Creates or finds a child frame, and makes `data` available to the child frame.
+
+This op is used together with `Exit` to create loops in the graph.
+The unique `frame_name` is used by the `Executor` to identify frames. If
+`is_constant` is true, `output` is a constant in the child frame; otherwise
+it may be changed in the child frame. At most `parallel_iterations` iterations
+are run in parallel in the child frame.
+
+data: The tensor to be made available to the child frame.
+frame_name: The name of the child frame.
+is_constant: If true, the output is constant within the child frame.
+parallel_iterations: The number of iterations allowed to run in parallel.
+output: The same tensor as `data`.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("RefEnter")
@@ -165,33 +239,75 @@ REGISTER_OP("RefEnter")
     .Attr("frame_name: string")
     .Attr("is_constant: bool = false")
     .Attr("parallel_iterations: int = 10")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Creates or finds a child frame, and makes `data` available to the child frame.
+
+The unique `frame_name` is used by the `Executor` to identify frames. If
+`is_constant` is true, `output` is a constant in the child frame; otherwise
+it may be changed in the child frame. At most `parallel_iterations` iterations
+are run in parallel in the child frame.
+
+data: The tensor to be made available to the child frame.
+frame_name: The name of the child frame.
+is_constant: If true, the output is constant within the child frame.
+parallel_iterations: The number of iterations allowed to run in parallel.
+output: The same tensor as `data`.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Exit")
     .Input("data: T")
     .Output("output: T")
     .Attr("T: type")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Exits the current frame to its parent frame.
+
+Exit makes its input `data` available to the parent frame.
+
+data: The tensor to be made available to the parent frame.
+output: The same tensor as `data`.
+)doc");
 
 REGISTER_OP("RefExit")
     .Input("data: Ref(T)")
     .Output("output: Ref(T)")
     .Attr("T: type")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Exits the current frame to its parent frame.
+
+Exit makes its input `data` available to the parent frame.
+
+data: The tensor to be made available to the parent frame.
+output: The same tensor as `data`.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("NextIteration")
     .Input("data: T")
     .Output("output: T")
     .Attr("T: type")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Makes its input available to the next iteration.
+
+data: The tensor to be made available to the next iteration.
+output: The same tensor as `data`.
+)doc");
 
 REGISTER_OP("RefNextIteration")
     .Input("data: Ref(T)")
     .Output("output: Ref(T)")
     .Attr("T: type")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Makes its input available to the next iteration.
+
+data: The tensor to be made available to the next iteration.
+output: The same tensor as `data`.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("LoopCond")
@@ -199,15 +315,40 @@ REGISTER_OP("LoopCond")
     .Output("output: bool")
     .SetShapeFn([](InferenceContext* c) {
       return shape_inference::UnchangedShapeWithRank(c, 0);
-    });
+    })
+    .Doc(R"doc(
+Forwards the input to the output.
+
+This operator represents the loop termination condition used by the
+"pivot" switches of a loop.
+
+input: A boolean scalar, representing the branch predicate of the Switch op.
+output: The same tensor as `input`.
+)doc");
 
 // --------------------------------------------------------------------------
-REGISTER_OP("ControlTrigger").SetShapeFn(shape_inference::NoOutputs);
+REGISTER_OP("ControlTrigger")
+    .SetShapeFn(shape_inference::NoOutputs)
+    .Doc(R"docstring(
+Does nothing. Serves as a control trigger for scheduling.
+
+Only useful as a placeholder for control edges.
+)docstring");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Abort")
     .Attr("error_msg: string = ''")
     .Attr("exit_without_error: bool = false")
-    .SetShapeFn(shape_inference::NoOutputs);
+    .SetShapeFn(shape_inference::NoOutputs)
+    .Doc(R"doc(
+Raise a exception to abort the process when called.
+
+If exit_without_error is true, the process will exit normally,
+otherwise it will exit with a SIGABORT signal.
+
+Returns nothing but an exception.
+
+error_msg: A string which is the message associated with the exception.
+)doc");
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/ctc_ops.cc b/tensorflow/core/ops/ctc_ops.cc
index f2322c730b..1a69106d80 100644
--- a/tensorflow/core/ops/ctc_ops.cc
+++ b/tensorflow/core/ops/ctc_ops.cc
@@ -59,7 +59,30 @@ REGISTER_OP("CTCLoss")
       c->set_output(0, c->Vector(batch_size));
       c->set_output(1, inputs);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Calculates the CTC Loss (log probability) for each batch entry.  Also calculates
+the gradient.  This class performs the softmax operation for you, so inputs
+should be e.g. linear projections of outputs by an LSTM.
+
+inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
+labels_indices: The indices of a `SparseTensor<int32, 2>`.
+  `labels_indices(i, :) == [b, t]` means `labels_values(i)` stores the id for
+  `(batch b, time t)`.
+labels_values: The values (labels) associated with the given batch and time.
+sequence_length: A vector containing sequence lengths (batch).
+preprocess_collapse_repeated: Scalar, if true then repeated labels are
+  collapsed prior to the CTC calculation.
+ctc_merge_repeated: Scalar.  If set to false, *during* CTC calculation
+  repeated non-blank labels will not be merged and are interpreted as
+  individual labels.  This is a simplified version of CTC.
+ignore_longer_outputs_than_inputs: Scalar. If set to true, during CTC
+  calculation, items that have longer output sequences than input sequences
+  are skipped: they don't contribute to the loss term and have zero-gradient.
+loss: A vector (batch) containing log-probabilities.
+gradient: The gradient of `loss`.  3-D, shape:
+  `(max_time x batch_size x num_classes)`.
+)doc");
 
 REGISTER_OP("CTCGreedyDecoder")
     .Input("inputs: float")
@@ -87,7 +110,32 @@ REGISTER_OP("CTCGreedyDecoder")
       c->set_output(2, c->Vector(2));
       c->set_output(3, c->Matrix(batch_size, 1));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Performs greedy decoding on the logits given in inputs.
+
+A note about the attribute merge_repeated: if enabled, when
+consecutive logits' maximum indices are the same, only the first of
+these is emitted.  Labeling the blank '*', the sequence "A B B * B B"
+becomes "A B B" if merge_repeated = True and "A B B B B" if
+merge_repeated = False.
+
+Regardless of the value of merge_repeated, if the maximum index of a given
+time and batch corresponds to the blank, index `(num_classes - 1)`, no new
+element is emitted.
+
+inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
+sequence_length: A vector containing sequence lengths, size `(batch_size)`.
+merge_repeated: If True, merge repeated classes in output.
+decoded_indices: Indices matrix, size `(total_decoded_outputs x 2)`,
+  of a `SparseTensor<int64, 2>`.  The rows store: [batch, time].
+decoded_values: Values vector, size: `(total_decoded_outputs)`,
+  of a `SparseTensor<int64, 2>`.  The vector stores the decoded classes.
+decoded_shape: Shape vector, size `(2)`, of the decoded SparseTensor.
+  Values are: `[batch_size, max_decoded_length]`.
+log_probability: Matrix, size `(batch_size x 1)`, containing sequence
+  log-probabilities.
+)doc");
 
 REGISTER_OP("CTCBeamSearchDecoder")
     .Input("inputs: float")
@@ -128,6 +176,32 @@ REGISTER_OP("CTCBeamSearchDecoder")
       }
       c->set_output(out_idx++, c->Matrix(batch_size, top_paths));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Performs beam search decoding on the logits given in input.
+
+A note about the attribute merge_repeated: For the beam search decoder,
+this means that if consecutive entries in a beam are the same, only
+the first of these is emitted.  That is, when the top path is "A B B B B",
+"A B" is returned if merge_repeated = True but "A B B B B" is
+returned if merge_repeated = False.
+
+inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
+sequence_length: A vector containing sequence lengths, size `(batch)`.
+beam_width: A scalar >= 0 (beam search beam width).
+top_paths: A scalar >= 0, <= beam_width (controls output size).
+merge_repeated: If true, merge repeated classes in output.
+decoded_indices: A list (length: top_paths) of indices matrices.  Matrix j,
+  size `(total_decoded_outputs[j] x 2)`, has indices of a
+  `SparseTensor<int64, 2>`.  The rows store: [batch, time].
+decoded_values: A list (length: top_paths) of values vectors.  Vector j,
+  size `(length total_decoded_outputs[j])`, has the values of a
+  `SparseTensor<int64, 2>`.  The vector stores the decoded classes for beam j.
+decoded_shape: A list (length: top_paths) of shape vector.  Vector j,
+  size `(2)`, stores the shape of the decoded `SparseTensor[j]`.
+  Its values are: `[batch_size, max_decoded_length[j]]`.
+log_probability: A matrix, shaped: `(batch_size x top_paths)`.  The
+  sequence log-probabilities.
+)doc");
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/data_flow_ops.cc b/tensorflow/core/ops/data_flow_ops.cc
index c7a03bed19..cc0ea38b9a 100644
--- a/tensorflow/core/ops/data_flow_ops.cc
+++ b/tensorflow/core/ops/data_flow_ops.cc
@@ -84,7 +84,51 @@ REGISTER_OP("DynamicPartition")
       }
 
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Partitions `data` into `num_partitions` tensors using indices from `partitions`.
+
+For each index tuple `js` of size `partitions.ndim`, the slice `data[js, ...]`
+becomes part of `outputs[partitions[js]]`.  The slices with `partitions[js] = i`
+are placed in `outputs[i]` in lexicographic order of `js`, and the first
+dimension of `outputs[i]` is the number of entries in `partitions` equal to `i`.
+In detail,
+
+```python
+    outputs[i].shape = [sum(partitions == i)] + data.shape[partitions.ndim:]
+
+    outputs[i] = pack([data[js, ...] for js if partitions[js] == i])
+```
+
+`data.shape` must start with `partitions.shape`.
+
+For example:
+
+```python
+    # Scalar partitions.
+    partitions = 1
+    num_partitions = 2
+    data = [10, 20]
+    outputs[0] = []  # Empty with shape [0, 2]
+    outputs[1] = [[10, 20]]
+
+    # Vector partitions.
+    partitions = [0, 0, 1, 1, 0]
+    num_partitions = 2
+    data = [10, 20, 30, 40, 50]
+    outputs[0] = [10, 20, 50]
+    outputs[1] = [30, 40]
+```
+
+See `dynamic_stitch` for an example on how to merge partitions back.
+
+<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="https://www.tensorflow.org/images/DynamicPartition.png" alt>
+</div>
+
+partitions: Any shape.  Indices in the range `[0, num_partitions)`.
+num_partitions: The number of partitions to output.
+)doc");
 
 namespace {
 
@@ -145,7 +189,73 @@ REGISTER_OP("DynamicStitch")
     .Output("merged: T")
     .Attr("N : int >= 1")
     .Attr("T : type")
-    .SetShapeFn(DynamicStitchShapeFunction);
+    .SetShapeFn(DynamicStitchShapeFunction)
+    .Doc(R"doc(
+Interleave the values from the `data` tensors into a single tensor.
+
+Builds a merged tensor such that
+
+```python
+    merged[indices[m][i, ..., j], ...] = data[m][i, ..., j, ...]
+```
+
+For example, if each `indices[m]` is scalar or vector, we have
+
+```python
+    # Scalar indices:
+    merged[indices[m], ...] = data[m][...]
+
+    # Vector indices:
+    merged[indices[m][i], ...] = data[m][i, ...]
+```
+
+Each `data[i].shape` must start with the corresponding `indices[i].shape`,
+and the rest of `data[i].shape` must be constant w.r.t. `i`.  That is, we
+must have `data[i].shape = indices[i].shape + constant`.  In terms of this
+`constant`, the output shape is
+
+    merged.shape = [max(indices)] + constant
+
+Values are merged in order, so if an index appears in both `indices[m][i]` and
+`indices[n][j]` for `(m,i) < (n,j)` the slice `data[n][j]` will appear in the
+merged result. If you do not need this guarantee, ParallelDynamicStitch might
+perform better on some devices.
+
+For example:
+
+```python
+    indices[0] = 6
+    indices[1] = [4, 1]
+    indices[2] = [[5, 2], [0, 3]]
+    data[0] = [61, 62]
+    data[1] = [[41, 42], [11, 12]]
+    data[2] = [[[51, 52], [21, 22]], [[1, 2], [31, 32]]]
+    merged = [[1, 2], [11, 12], [21, 22], [31, 32], [41, 42],
+              [51, 52], [61, 62]]
+```
+
+This method can be used to merge partitions created by `dynamic_partition`
+as illustrated on the following example:
+
+```python
+    # Apply function (increments x_i) on elements for which a certain condition
+    # apply (x_i != -1 in this example).
+    x=tf.constant([0.1, -1., 5.2, 4.3, -1., 7.4])
+    condition_mask=tf.not_equal(x,tf.constant(-1.))
+    partitioned_data = tf.dynamic_partition(
+        x, tf.cast(condition_mask, tf.int32) , 2)
+    partitioned_data[1] = partitioned_data[1] + 1.0
+    condition_indices = tf.dynamic_partition(
+        tf.range(tf.shape(x)[0]), tf.cast(condition_mask, tf.int32) , 2)
+    x = tf.dynamic_stitch(condition_indices, partitioned_data)
+    # Here x=[1.1, -1., 6.2, 5.3, -1, 8.4], the -1. values remain
+    # unchanged.
+```
+
+<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="https://www.tensorflow.org/images/DynamicStitch.png" alt>
+</div>
+)doc");
 
 REGISTER_OP("ParallelDynamicStitch")
     .Input("indices: N * int32")
@@ -153,7 +263,72 @@ REGISTER_OP("ParallelDynamicStitch")
     .Output("merged: T")
     .Attr("N : int >= 1")
     .Attr("T : type")
-    .SetShapeFn(DynamicStitchShapeFunction);
+    .SetShapeFn(DynamicStitchShapeFunction)
+    .Doc(R"doc(
+Interleave the values from the `data` tensors into a single tensor.
+
+Builds a merged tensor such that
+
+```python
+    merged[indices[m][i, ..., j], ...] = data[m][i, ..., j, ...]
+```
+
+For example, if each `indices[m]` is scalar or vector, we have
+
+```python
+    # Scalar indices:
+    merged[indices[m], ...] = data[m][...]
+
+    # Vector indices:
+    merged[indices[m][i], ...] = data[m][i, ...]
+```
+
+Each `data[i].shape` must start with the corresponding `indices[i].shape`,
+and the rest of `data[i].shape` must be constant w.r.t. `i`.  That is, we
+must have `data[i].shape = indices[i].shape + constant`.  In terms of this
+`constant`, the output shape is
+
+    merged.shape = [max(indices)] + constant
+
+Values may be merged in parallel, so if an index appears in both `indices[m][i]`
+and `indices[n][j]`, the result may be invalid. This differs from the normal
+DynamicStitch operator that defines the behavior in that case.
+
+For example:
+
+```python
+    indices[0] = 6
+    indices[1] = [4, 1]
+    indices[2] = [[5, 2], [0, 3]]
+    data[0] = [61, 62]
+    data[1] = [[41, 42], [11, 12]]
+    data[2] = [[[51, 52], [21, 22]], [[1, 2], [31, 32]]]
+    merged = [[1, 2], [11, 12], [21, 22], [31, 32], [41, 42],
+              [51, 52], [61, 62]]
+```
+
+This method can be used to merge partitions created by `dynamic_partition`
+as illustrated on the following example:
+
+```python
+    # Apply function (increments x_i) on elements for which a certain condition
+    # apply (x_i != -1 in this example).
+    x=tf.constant([0.1, -1., 5.2, 4.3, -1., 7.4])
+    condition_mask=tf.not_equal(x,tf.constant(-1.))
+    partitioned_data = tf.dynamic_partition(
+        x, tf.cast(condition_mask, tf.int32) , 2)
+    partitioned_data[1] = partitioned_data[1] + 1.0
+    condition_indices = tf.dynamic_partition(
+        tf.range(tf.shape(x)[0]), tf.cast(condition_mask, tf.int32) , 2)
+    x = tf.dynamic_stitch(condition_indices, partitioned_data)
+    # Here x=[1.1, -1., 6.2, 5.3, -1, 8.4], the -1. values remain
+    # unchanged.
+```
+
+<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="https://www.tensorflow.org/images/DynamicStitch.png" alt>
+</div>
+)doc");
 
 // --------------------------------------------------------------------------
 
@@ -207,7 +382,29 @@ REGISTER_OP("RandomShuffleQueue")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(TwoElementOutput);
+    .SetShapeFn(TwoElementOutput)
+    .Doc(R"doc(
+A queue that randomizes the order of elements.
+
+handle: The handle to the queue.
+component_types: The type of each component in a value.
+shapes: The shape of each component in a value. The length of this attr must
+  be either 0 or the same as the length of component_types. If the length of
+  this attr is 0, the shapes of queue elements are not constrained, and
+  only one element may be dequeued at a time.
+capacity: The upper bound on the number of elements in this queue.
+  Negative numbers mean no limit.
+min_after_dequeue: Dequeue will block unless there would be this
+  many elements after the dequeue or the queue is closed. This
+  ensures a minimum level of mixing of elements.
+seed: If either seed or seed2 is set to be non-zero, the random number
+  generator is seeded by the given seed.  Otherwise, a random seed is used.
+seed2: A second seed to avoid seed collision.
+container: If non-empty, this queue is placed in the given container.
+        Otherwise, a default container is used.
+shared_name: If non-empty, this queue will be shared under the given name
+  across multiple sessions.
+)doc");
 
 REGISTER_OP("RandomShuffleQueueV2")
     .Output("handle: resource")
@@ -220,7 +417,29 @@ REGISTER_OP("RandomShuffleQueueV2")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+A queue that randomizes the order of elements.
+
+handle: The handle to the queue.
+component_types: The type of each component in a value.
+shapes: The shape of each component in a value. The length of this attr must
+  be either 0 or the same as the length of component_types. If the length of
+  this attr is 0, the shapes of queue elements are not constrained, and
+  only one element may be dequeued at a time.
+capacity: The upper bound on the number of elements in this queue.
+  Negative numbers mean no limit.
+min_after_dequeue: Dequeue will block unless there would be this
+  many elements after the dequeue or the queue is closed. This
+  ensures a minimum level of mixing of elements.
+seed: If either seed or seed2 is set to be non-zero, the random number
+  generator is seeded by the given seed.  Otherwise, a random seed is used.
+seed2: A second seed to avoid seed collision.
+container: If non-empty, this queue is placed in the given container.
+        Otherwise, a default container is used.
+shared_name: If non-empty, this queue will be shared under the given name
+  across multiple sessions.
+)doc");
 
 REGISTER_OP("FIFOQueue")
     .Output("handle: Ref(string)")
@@ -230,7 +449,23 @@ REGISTER_OP("FIFOQueue")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(TwoElementOutput);
+    .SetShapeFn(TwoElementOutput)
+    .Doc(R"doc(
+A queue that produces elements in first-in first-out order.
+
+handle: The handle to the queue.
+component_types: The type of each component in a value.
+shapes: The shape of each component in a value. The length of this attr must
+  be either 0 or the same as the length of component_types. If the length of
+  this attr is 0, the shapes of queue elements are not constrained, and
+  only one element may be dequeued at a time.
+capacity: The upper bound on the number of elements in this queue.
+  Negative numbers mean no limit.
+container: If non-empty, this queue is placed in the given container.
+        Otherwise, a default container is used.
+shared_name: If non-empty, this queue will be shared under the given name
+  across multiple sessions.
+)doc");
 
 REGISTER_OP("FIFOQueueV2")
     .Output("handle: resource")
@@ -240,7 +475,23 @@ REGISTER_OP("FIFOQueueV2")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+A queue that produces elements in first-in first-out order.
+
+handle: The handle to the queue.
+component_types: The type of each component in a value.
+shapes: The shape of each component in a value. The length of this attr must
+  be either 0 or the same as the length of component_types. If the length of
+  this attr is 0, the shapes of queue elements are not constrained, and
+  only one element may be dequeued at a time.
+capacity: The upper bound on the number of elements in this queue.
+  Negative numbers mean no limit.
+container: If non-empty, this queue is placed in the given container.
+        Otherwise, a default container is used.
+shared_name: If non-empty, this queue will be shared under the given name
+  across multiple sessions.
+)doc");
 
 REGISTER_OP("PaddingFIFOQueue")
     .Output("handle: Ref(string)")
@@ -250,7 +501,31 @@ REGISTER_OP("PaddingFIFOQueue")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(TwoElementOutput);
+    .SetShapeFn(TwoElementOutput)
+    .Doc(R"doc(
+A queue that produces elements in first-in first-out order.
+
+Variable-size shapes are allowed by setting the corresponding shape dimensions
+to 0 in the shape attr.  In this case DequeueMany will pad up to the maximum
+size of any given element in the minibatch.  See below for details.
+
+handle: The handle to the queue.
+component_types: The type of each component in a value.
+shapes: The shape of each component in a value. The length of this attr must
+  be either 0 or the same as the length of component_types.
+  Shapes of fixed rank but variable size are allowed by setting
+  any shape dimension to -1.  In this case, the inputs' shape may vary along
+  the given dimension, and DequeueMany will pad the given dimension with
+  zeros up to the maximum shape of all elements in the given batch.
+  If the length of this attr is 0, different queue elements may have
+  different ranks and shapes, but only one element may be dequeued at a time.
+capacity: The upper bound on the number of elements in this queue.
+  Negative numbers mean no limit.
+container: If non-empty, this queue is placed in the given container.
+  Otherwise, a default container is used.
+shared_name: If non-empty, this queue will be shared under the given name
+  across multiple sessions.
+)doc");
 
 REGISTER_OP("PaddingFIFOQueueV2")
     .Output("handle: resource")
@@ -260,7 +535,31 @@ REGISTER_OP("PaddingFIFOQueueV2")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+A queue that produces elements in first-in first-out order.
+
+Variable-size shapes are allowed by setting the corresponding shape dimensions
+to 0 in the shape attr.  In this case DequeueMany will pad up to the maximum
+size of any given element in the minibatch.  See below for details.
+
+handle: The handle to the queue.
+component_types: The type of each component in a value.
+shapes: The shape of each component in a value. The length of this attr must
+  be either 0 or the same as the length of component_types.
+  Shapes of fixed rank but variable size are allowed by setting
+  any shape dimension to -1.  In this case, the inputs' shape may vary along
+  the given dimension, and DequeueMany will pad the given dimension with
+  zeros up to the maximum shape of all elements in the given batch.
+  If the length of this attr is 0, different queue elements may have
+  different ranks and shapes, but only one element may be dequeued at a time.
+capacity: The upper bound on the number of elements in this queue.
+  Negative numbers mean no limit.
+container: If non-empty, this queue is placed in the given container.
+  Otherwise, a default container is used.
+shared_name: If non-empty, this queue will be shared under the given name
+  across multiple sessions.
+)doc");
 
 REGISTER_OP("PriorityQueue")
     .Output("handle: Ref(string)")
@@ -270,7 +569,29 @@ REGISTER_OP("PriorityQueue")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(TwoElementOutput);
+    .SetShapeFn(TwoElementOutput)
+    .Doc(R"doc(
+A queue that produces elements sorted by the first component value.
+
+Note that the PriorityQueue requires the first component of any element
+to be a scalar int64, in addition to the other elements declared by
+component_types.  Therefore calls to Enqueue and EnqueueMany (resp. Dequeue
+and DequeueMany) on a PriorityQueue will all require (resp. output) one extra
+entry in their input (resp. output) lists.
+
+handle: The handle to the queue.
+component_types: The type of each component in a value.
+shapes: The shape of each component in a value. The length of this attr must
+  be either 0 or the same as the length of component_types. If the length of
+  this attr is 0, the shapes of queue elements are not constrained, and
+  only one element may be dequeued at a time.
+capacity: The upper bound on the number of elements in this queue.
+  Negative numbers mean no limit.
+container: If non-empty, this queue is placed in the given container.
+  Otherwise, a default container is used.
+shared_name: If non-empty, this queue will be shared under the given name
+  across multiple sessions.
+)doc");
 
 REGISTER_OP("PriorityQueueV2")
     .Output("handle: resource")
@@ -280,48 +601,158 @@ REGISTER_OP("PriorityQueueV2")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+A queue that produces elements sorted by the first component value.
+
+Note that the PriorityQueue requires the first component of any element
+to be a scalar int64, in addition to the other elements declared by
+component_types.  Therefore calls to Enqueue and EnqueueMany (resp. Dequeue
+and DequeueMany) on a PriorityQueue will all require (resp. output) one extra
+entry in their input (resp. output) lists.
+
+handle: The handle to the queue.
+component_types: The type of each component in a value.
+shapes: The shape of each component in a value. The length of this attr must
+  be either 0 or the same as the length of component_types. If the length of
+  this attr is 0, the shapes of queue elements are not constrained, and
+  only one element may be dequeued at a time.
+capacity: The upper bound on the number of elements in this queue.
+  Negative numbers mean no limit.
+container: If non-empty, this queue is placed in the given container.
+  Otherwise, a default container is used.
+shared_name: If non-empty, this queue will be shared under the given name
+  across multiple sessions.
+)doc");
 
 REGISTER_OP("FakeQueue")
     .Input("resource: resource")
     .Output("handle: Ref(string)")
     .SetIsStateful()
-    .SetShapeFn(TwoElementOutput);
+    .SetShapeFn(TwoElementOutput)
+    .Doc("Deprecated. Do not use.");
 
 REGISTER_OP("QueueEnqueue")
     .Input("handle: Ref(string)")
     .Input("components: Tcomponents")
     .Attr("Tcomponents: list(type) >= 1")
     .Attr("timeout_ms: int = -1")
-    .SetShapeFn(shape_inference::UnknownShape);
+    .SetShapeFn(shape_inference::UnknownShape)
+    .Doc(R"doc(
+Enqueues a tuple of one or more tensors in the given queue.
+
+The components input has k elements, which correspond to the components of
+tuples stored in the given queue.
+
+N.B. If the queue is full, this operation will block until the given
+element has been enqueued (or 'timeout_ms' elapses, if specified).
+
+handle: The handle to a queue.
+components: One or more tensors from which the enqueued tensors should be taken.
+timeout_ms: If the queue is full, this operation will block for up to
+  timeout_ms milliseconds.
+  Note: This option is not supported yet.
+)doc");
 
 REGISTER_OP("QueueEnqueueV2")
     .Input("handle: resource")
     .Input("components: Tcomponents")
     .Attr("Tcomponents: list(type) >= 1")
     .Attr("timeout_ms: int = -1")
-    .SetShapeFn(shape_inference::UnknownShape);
+    .SetShapeFn(shape_inference::UnknownShape)
+    .Doc(R"doc(
+Enqueues a tuple of one or more tensors in the given queue.
+
+The components input has k elements, which correspond to the components of
+tuples stored in the given queue.
+
+N.B. If the queue is full, this operation will block until the given
+element has been enqueued (or 'timeout_ms' elapses, if specified).
+
+handle: The handle to a queue.
+components: One or more tensors from which the enqueued tensors should be taken.
+timeout_ms: If the queue is full, this operation will block for up to
+  timeout_ms milliseconds.
+  Note: This option is not supported yet.
+)doc");
 
 REGISTER_OP("QueueEnqueueMany")
     .Input("handle: Ref(string)")
     .Input("components: Tcomponents")
     .Attr("Tcomponents: list(type) >= 1")
     .Attr("timeout_ms: int = -1")
-    .SetShapeFn(shape_inference::UnknownShape);
+    .SetShapeFn(shape_inference::UnknownShape)
+    .Doc(R"doc(
+Enqueues zero or more tuples of one or more tensors in the given queue.
+
+This operation slices each component tensor along the 0th dimension to
+make multiple queue elements. All of the tuple components must have the
+same size in the 0th dimension.
+
+The components input has k elements, which correspond to the components of
+tuples stored in the given queue.
+
+N.B. If the queue is full, this operation will block until the given
+elements have been enqueued (or 'timeout_ms' elapses, if specified).
+
+handle: The handle to a queue.
+components: One or more tensors from which the enqueued tensors should
+  be taken.
+timeout_ms: If the queue is too full, this operation will block for up
+  to timeout_ms milliseconds.
+  Note: This option is not supported yet.
+)doc");
 
 REGISTER_OP("QueueEnqueueManyV2")
     .Input("handle: resource")
     .Input("components: Tcomponents")
     .Attr("Tcomponents: list(type) >= 1")
     .Attr("timeout_ms: int = -1")
-    .SetShapeFn(shape_inference::UnknownShape);
+    .SetShapeFn(shape_inference::UnknownShape)
+    .Doc(R"doc(
+Enqueues zero or more tuples of one or more tensors in the given queue.
+
+This operation slices each component tensor along the 0th dimension to
+make multiple queue elements. All of the tuple components must have the
+same size in the 0th dimension.
+
+The components input has k elements, which correspond to the components of
+tuples stored in the given queue.
+
+N.B. If the queue is full, this operation will block until the given
+elements have been enqueued (or 'timeout_ms' elapses, if specified).
+
+handle: The handle to a queue.
+components: One or more tensors from which the enqueued tensors should
+  be taken.
+timeout_ms: If the queue is too full, this operation will block for up
+  to timeout_ms milliseconds.
+  Note: This option is not supported yet.
+)doc");
 
 REGISTER_OP("QueueDequeue")
     .Input("handle: Ref(string)")
     .Output("components: component_types")
     .Attr("component_types: list(type) >= 1")
     .Attr("timeout_ms: int = -1")
-    .SetShapeFn(shape_inference::UnknownShape);
+    .SetShapeFn(shape_inference::UnknownShape)
+    .Doc(R"doc(
+Dequeues a tuple of one or more tensors from the given queue.
+
+This operation has k outputs, where k is the number of components
+in the tuples stored in the given queue, and output i is the ith
+component of the dequeued tuple.
+
+N.B. If the queue is empty, this operation will block until an element
+has been dequeued (or 'timeout_ms' elapses, if specified).
+
+handle: The handle to a queue.
+components: One or more tensors that were dequeued as a tuple.
+component_types: The type of each component in a tuple.
+timeout_ms: If the queue is empty, this operation will block for up to
+  timeout_ms milliseconds.
+  Note: This option is not supported yet.
+)doc");
 
 REGISTER_OP("QueueDequeueV2")
     .Input("handle: resource")
@@ -338,7 +769,24 @@ REGISTER_OP("QueueDequeueV2")
       } else {
         return shape_inference::UnknownShape(c);
       }
-    });
+    })
+    .Doc(R"doc(
+Dequeues a tuple of one or more tensors from the given queue.
+
+This operation has k outputs, where k is the number of components
+in the tuples stored in the given queue, and output i is the ith
+component of the dequeued tuple.
+
+N.B. If the queue is empty, this operation will block until an element
+has been dequeued (or 'timeout_ms' elapses, if specified).
+
+handle: The handle to a queue.
+components: One or more tensors that were dequeued as a tuple.
+component_types: The type of each component in a tuple.
+timeout_ms: If the queue is empty, this operation will block for up to
+  timeout_ms milliseconds.
+  Note: This option is not supported yet.
+)doc");
 
 REGISTER_OP("QueueDequeueMany")
     .Input("handle: Ref(string)")
@@ -346,7 +794,32 @@ REGISTER_OP("QueueDequeueMany")
     .Output("components: component_types")
     .Attr("component_types: list(type) >= 1")
     .Attr("timeout_ms: int = -1")
-    .SetShapeFn(shape_inference::UnknownShape);
+    .SetShapeFn(shape_inference::UnknownShape)
+    .Doc(R"doc(
+Dequeues `n` tuples of one or more tensors from the given queue.
+
+If the queue is closed and there are fewer than `n` elements, then an
+OutOfRange error is returned.
+
+This operation concatenates queue-element component tensors along the
+0th dimension to make a single component tensor.  All of the components
+in the dequeued tuple will have size `n` in the 0th dimension.
+
+This operation has `k` outputs, where `k` is the number of components in
+the tuples stored in the given queue, and output `i` is the ith
+component of the dequeued tuple.
+
+N.B. If the queue is empty, this operation will block until `n` elements
+have been dequeued (or 'timeout_ms' elapses, if specified).
+
+handle: The handle to a queue.
+n: The number of tuples to dequeue.
+components: One or more tensors that were dequeued as a tuple.
+component_types: The type of each component in a tuple.
+timeout_ms: If the queue has fewer than n elements, this operation
+  will block for up to timeout_ms milliseconds.
+  Note: This option is not supported yet.
+)doc");
 
 REGISTER_OP("QueueDequeueManyV2")
     .Input("handle: resource")
@@ -366,7 +839,32 @@ REGISTER_OP("QueueDequeueManyV2")
         n_shape = c->Vector(n);
       }
       return DequeueManyV2Shape(c, n_shape);
-    });
+    })
+    .Doc(R"doc(
+Dequeues `n` tuples of one or more tensors from the given queue.
+
+If the queue is closed and there are fewer than `n` elements, then an
+OutOfRange error is returned.
+
+This operation concatenates queue-element component tensors along the
+0th dimension to make a single component tensor.  All of the components
+in the dequeued tuple will have size `n` in the 0th dimension.
+
+This operation has `k` outputs, where `k` is the number of components in
+the tuples stored in the given queue, and output `i` is the ith
+component of the dequeued tuple.
+
+N.B. If the queue is empty, this operation will block until `n` elements
+have been dequeued (or 'timeout_ms' elapses, if specified).
+
+handle: The handle to a queue.
+n: The number of tuples to dequeue.
+components: One or more tensors that were dequeued as a tuple.
+component_types: The type of each component in a tuple.
+timeout_ms: If the queue has fewer than n elements, this operation
+  will block for up to timeout_ms milliseconds.
+  Note: This option is not supported yet.
+)doc");
 
 REGISTER_OP("QueueDequeueUpTo")
     .Input("handle: Ref(string)")
@@ -374,7 +872,36 @@ REGISTER_OP("QueueDequeueUpTo")
     .Output("components: component_types")
     .Attr("component_types: list(type) >= 1")
     .Attr("timeout_ms: int = -1")
-    .SetShapeFn(shape_inference::UnknownShape);
+    .SetShapeFn(shape_inference::UnknownShape)
+    .Doc(R"doc(
+Dequeues `n` tuples of one or more tensors from the given queue.
+
+This operation is not supported by all queues.  If a queue does not support
+DequeueUpTo, then an Unimplemented error is returned.
+
+If the queue is closed and there are more than 0 but less than `n`
+elements remaining, then instead of returning an OutOfRange error like
+QueueDequeueMany, less than `n` elements are returned immediately.  If
+the queue is closed and there are 0 elements left in the queue, then
+an OutOfRange error is returned just like in QueueDequeueMany.
+Otherwise the behavior is identical to QueueDequeueMany:
+
+This operation concatenates queue-element component tensors along the
+0th dimension to make a single component tensor.  All of the components
+in the dequeued tuple will have size `n` in the 0th dimension.
+
+This operation has k outputs, where `k` is the number of components in
+the tuples stored in the given queue, and output `i` is the ith
+component of the dequeued tuple.
+
+handle: The handle to a queue.
+n: The number of tuples to dequeue.
+components: One or more tensors that were dequeued as a tuple.
+component_types: The type of each component in a tuple.
+timeout_ms: If the queue has fewer than n elements, this operation
+  will block for up to timeout_ms milliseconds.
+  Note: This option is not supported yet.
+)doc");
 
 REGISTER_OP("QueueDequeueUpToV2")
     .Input("handle: resource")
@@ -384,44 +911,133 @@ REGISTER_OP("QueueDequeueUpToV2")
     .Attr("timeout_ms: int = -1")
     .SetShapeFn([](InferenceContext* c) {
       return DequeueManyV2Shape(c, c->Vector(InferenceContext::kUnknownDim));
-    });
+    })
+    .Doc(R"doc(
+Dequeues `n` tuples of one or more tensors from the given queue.
+
+This operation is not supported by all queues.  If a queue does not support
+DequeueUpTo, then an Unimplemented error is returned.
+
+If the queue is closed and there are more than 0 but less than `n`
+elements remaining, then instead of returning an OutOfRange error like
+QueueDequeueMany, less than `n` elements are returned immediately.  If
+the queue is closed and there are 0 elements left in the queue, then
+an OutOfRange error is returned just like in QueueDequeueMany.
+Otherwise the behavior is identical to QueueDequeueMany:
+
+This operation concatenates queue-element component tensors along the
+0th dimension to make a single component tensor.  All of the components
+in the dequeued tuple will have size n in the 0th dimension.
+
+This operation has `k` outputs, where `k` is the number of components in
+the tuples stored in the given queue, and output `i` is the ith
+component of the dequeued tuple.
+
+handle: The handle to a queue.
+n: The number of tuples to dequeue.
+components: One or more tensors that were dequeued as a tuple.
+component_types: The type of each component in a tuple.
+timeout_ms: If the queue has fewer than n elements, this operation
+  will block for up to timeout_ms milliseconds.
+  Note: This option is not supported yet.
+)doc");
 
 REGISTER_OP("QueueClose")
     .Input("handle: Ref(string)")
     .SetShapeFn(TwoElementVectorInputsAndScalarOutputs)
-    .Attr("cancel_pending_enqueues: bool = false");
+    .Attr("cancel_pending_enqueues: bool = false")
+    .Doc(R"doc(
+Closes the given queue.
+
+This operation signals that no more elements will be enqueued in the
+given queue. Subsequent Enqueue(Many) operations will fail.
+Subsequent Dequeue(Many) operations will continue to succeed if
+sufficient elements remain in the queue. Subsequent Dequeue(Many)
+operations that would block will fail immediately.
+
+handle: The handle to a queue.
+cancel_pending_enqueues: If true, all pending enqueue requests that are
+  blocked on the given queue will be canceled.
+)doc");
 
 REGISTER_OP("QueueCloseV2")
     .Input("handle: resource")
     .SetShapeFn(shape_inference::NoOutputs)
-    .Attr("cancel_pending_enqueues: bool = false");
+    .Attr("cancel_pending_enqueues: bool = false")
+    .Doc(R"doc(
+Closes the given queue.
+
+This operation signals that no more elements will be enqueued in the
+given queue. Subsequent Enqueue(Many) operations will fail.
+Subsequent Dequeue(Many) operations will continue to succeed if
+sufficient elements remain in the queue. Subsequent Dequeue(Many)
+operations that would block will fail immediately.
+
+handle: The handle to a queue.
+cancel_pending_enqueues: If true, all pending enqueue requests that are
+  blocked on the given queue will be canceled.
+)doc");
 
 REGISTER_OP("QueueIsClosed")
     .Input("handle: Ref(string)")
     .Output("is_closed: bool")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Returns true if queue is closed.
+
+This operation returns true if the queue is closed and false if the queue
+is open.
+
+handle: The handle to a queue.
+)doc");
 
 REGISTER_OP("QueueIsClosedV2")
     .Input("handle: resource")
     .Output("is_closed: bool")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Returns true if queue is closed.
+
+This operation returns true if the queue is closed and false if the queue
+is open.
+
+handle: The handle to a queue.
+)doc");
 
 REGISTER_OP("QueueSize")
     .Input("handle: Ref(string)")
     .Output("size: int32")
-    .SetShapeFn(TwoElementVectorInputsAndScalarOutputs);
+    .SetShapeFn(TwoElementVectorInputsAndScalarOutputs)
+    .Doc(R"doc(
+Computes the number of elements in the given queue.
+
+handle: The handle to a queue.
+size: The number of elements in the given queue.
+)doc");
 
 REGISTER_OP("QueueSizeV2")
     .Input("handle: resource")
     .Output("size: int32")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Computes the number of elements in the given queue.
+
+handle: The handle to a queue.
+size: The number of elements in the given queue.
+)doc");
 
 // --------------------------------------------------------------------------
 
 REGISTER_OP("AccumulatorNumAccumulated")
     .Input("handle: Ref(string)")
     .Output("num_accumulated: int32")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Returns the number of gradients aggregated in the given accumulators.
+
+handle: The handle to an accumulator.
+num_accumulated: The number of gradients aggregated in the given accumulator.
+)doc");
 
 REGISTER_OP("AccumulatorSetGlobalStep")
     .Input("handle: Ref(string)")
@@ -430,7 +1046,16 @@ REGISTER_OP("AccumulatorSetGlobalStep")
       ShapeHandle unused;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Updates the accumulator with a new value for global_step.
+
+Logs warning if the accumulator's value is already higher than
+new_global_step.
+
+handle: The handle to an accumulator.
+new_global_step: The new global_step value to set.
+)doc");
 
 REGISTER_OP("ConditionalAccumulator")
     .Output("handle: Ref(string)")
@@ -442,7 +1067,25 @@ REGISTER_OP("ConditionalAccumulator")
     .SetShapeFn([](InferenceContext* c) {
       c->set_output(0, c->Vector(2));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+A conditional accumulator for aggregating gradients.
+
+The accumulator accepts gradients marked with local_step greater or
+equal to the most recent global_step known to the accumulator. The
+average can be extracted from the accumulator, provided sufficient
+gradients have been accumulated. Extracting the average automatically
+resets the aggregate to 0, and increments the global_step recorded by
+the accumulator.
+
+handle: The handle to the accumulator.
+dtype: The type of the value being accumulated.
+shape: The shape of the values, can be [], in which case shape is unknown.
+container: If non-empty, this accumulator is placed in the given container.
+  Otherwise, a default container is used.
+shared_name: If non-empty, this accumulator will be shared under the
+  given name across multiple sessions.
+)doc");
 
 REGISTER_OP("AccumulatorApplyGradient")
     .Input("handle: Ref(string)")
@@ -453,7 +1096,18 @@ REGISTER_OP("AccumulatorApplyGradient")
       ShapeHandle unused;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Applies a gradient to a given accumulator.
+
+Does not add if local_step is lesser than the accumulator's global_step.
+
+handle: The handle to a accumulator.
+local_step: The local_step value at which the gradient was computed.
+gradient: A tensor of the gradient to be accumulated.
+dtype: The data type of accumulated gradients. Needs to correspond to the type
+  of the accumulator.
+)doc");
 
 REGISTER_OP("AccumulatorTakeGradient")
     .Input("handle: Ref(string)")
@@ -467,7 +1121,22 @@ REGISTER_OP("AccumulatorTakeGradient")
       // shape information.
       return shape_inference::UnknownShape(c);
     })
-    .Attr("dtype: numbertype");
+    .Attr("dtype: numbertype")
+    .Doc(R"doc(
+Extracts the average gradient in the given ConditionalAccumulator.
+
+The op blocks until sufficient (i.e., more than num_required)
+gradients have been accumulated.  If the accumulator has already
+aggregated more than num_required gradients, it returns the average of
+the accumulated gradients.  Also automatically increments the recorded
+global_step in the accumulator by 1, and resets the aggregate to 0.
+
+handle: The handle to an accumulator.
+num_required: Number of gradients required before we return an aggregate.
+average: The average of the accumulated gradients.
+dtype: The data type of accumulated gradients. Needs to correspond to the type
+  of the accumulator.
+)doc");
 
 REGISTER_OP("SparseConditionalAccumulator")
     .Output("handle: Ref(string)")
@@ -479,7 +1148,25 @@ REGISTER_OP("SparseConditionalAccumulator")
     .SetShapeFn([](InferenceContext* c) {
       c->set_output(0, c->Vector(2));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+A conditional accumulator for aggregating sparse gradients.
+
+The accumulator accepts gradients marked with local_step greater or
+equal to the most recent global_step known to the accumulator. The
+average can be extracted from the accumulator, provided sufficient
+gradients have been accumulated. Extracting the average automatically
+resets the aggregate to 0, and increments the global_step recorded by
+the accumulator.
+
+handle: The handle to the accumulator.
+dtype: The type of the value being accumulated.
+shape: The shape of the values.
+container: If non-empty, this accumulator is placed in the given container.
+  Otherwise, a default container is used.
+shared_name: If non-empty, this accumulator will be shared under the given name
+  across multiple sessions.
+)doc");
 
 REGISTER_OP("SparseAccumulatorApplyGradient")
     .Input("handle: Ref(string)")
@@ -493,7 +1180,26 @@ REGISTER_OP("SparseAccumulatorApplyGradient")
       ShapeHandle unused;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Applies a sparse gradient to a given accumulator.
+
+Does not add if local_step is smaller than the accumulator's
+global_step.
+
+handle: The handle to a accumulator.
+local_step: The local_step value at which the sparse gradient was computed.
+gradient_indices: Indices of the sparse gradient to be accumulated. Must be a
+  vector.
+gradient_values: Values are the non-zero slices of the gradient, and must have
+  the same first dimension as indices, i.e., the nnz represented by indices and
+  values must be consistent.
+gradient_shape: Shape of the sparse gradient to be accumulated.
+dtype: The data type of accumulated gradients. Needs to correspond to the type
+  of the accumulator.
+has_known_shape: Boolean indicating whether gradient_shape is unknown, in which
+  case the input is ignored during validation.
+)doc");
 
 REGISTER_OP("SparseAccumulatorTakeGradient")
     .Input("handle: Ref(string)")
@@ -509,7 +1215,25 @@ REGISTER_OP("SparseAccumulatorTakeGradient")
       // by 'handle', but which is not available here, so we lose
       // shape information.
       return shape_inference::UnknownShape(c);
-    });
+    })
+    .Doc(R"doc(
+Extracts the average sparse gradient in a SparseConditionalAccumulator.
+
+The op will blocks until sufficient (i.e., more than num_required)
+gradients have been accumulated. If the accumulator has already
+aggregated more than num_required gradients, it will return its
+average of the accumulated gradients.  Also automatically increments
+the recorded global_step in the accumulator by 1, and resets the
+aggregate to 0.
+
+handle: The handle to a SparseConditionalAccumulator.
+num_required: Number of gradients required before we return an aggregate.
+indices: Indices of the average of the accumulated sparse gradients.
+values: Values of the average of the accumulated sparse gradients.
+shape: Shape of the average of the accumulated sparse gradients.
+dtype: The data type of accumulated gradients. Needs to correspond to the type
+  of the accumulator.
+)doc");
 
 // --------------------------------------------------------------------------
 
@@ -519,7 +1243,17 @@ REGISTER_OP("StackV2")
     .Attr("elem_type: type")
     .Attr("stack_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(TwoElementOutput);
+    .SetShapeFn(TwoElementOutput)
+    .Doc(R"doc(
+A stack that produces elements in first-in last-out order.
+
+max_size: The maximum size of the stack if non-negative. If negative, the stack
+  size is unlimited.
+handle: The handle to the stack.
+elem_type: The type of the elements on the stack.
+stack_name: Overrides the name used for the temporary stack resource. Default
+value is the name of the 'Stack' op (which is guaranteed unique).
+)doc");
 
 REGISTER_OP("StackPushV2")
     .Input("handle: resource")
@@ -530,17 +1264,37 @@ REGISTER_OP("StackPushV2")
     .SetShapeFn([](shape_inference::InferenceContext* c) {
       c->set_output(0, c->input(1));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Push an element onto the stack.
+
+handle: The handle to a stack.
+elem: The tensor to be pushed onto the stack.
+output: The same tensor as the input 'elem'.
+swap_memory: Swap `elem` to CPU. Default to false.
+)doc");
 
 REGISTER_OP("StackPopV2")
     .Input("handle: resource")
     .Output("elem: elem_type")
     .Attr("elem_type: type")
-    .SetShapeFn(shape_inference::UnknownShape);
+    .SetShapeFn(shape_inference::UnknownShape)
+    .Doc(R"doc(
+Pop the element at the top of the stack.
+
+handle: The handle to a stack.
+elem: The tensor that is popped from the top of the stack.
+elem_type: The type of the elem that is popped.
+)doc");
 
 REGISTER_OP("StackCloseV2")
     .Input("handle: resource")
-    .SetShapeFn(TwoElementVectorInputsAndScalarOutputs);
+    .SetShapeFn(TwoElementVectorInputsAndScalarOutputs)
+    .Doc(R"doc(
+Delete the stack from its resource container.
+
+handle: The handle to a stack.
+)doc");
 
 // Deprecated ref-typed variants of stack.
 
@@ -549,7 +1303,10 @@ REGISTER_OP("Stack")
     .Attr("elem_type: type")
     .Attr("stack_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(TwoElementOutput);
+    .SetShapeFn(TwoElementOutput)
+    .Doc(R"doc(
+Deprecated, use StackV2.
+)doc");
 
 REGISTER_OP("StackPush")
     .Input("handle: Ref(string)")
@@ -560,17 +1317,26 @@ REGISTER_OP("StackPush")
     .SetShapeFn([](shape_inference::InferenceContext* c) {
       c->set_output(0, c->input(1));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Deprecated, use StackPushV2.
+)doc");
 
 REGISTER_OP("StackPop")
     .Input("handle: Ref(string)")
     .Output("elem: elem_type")
     .Attr("elem_type: type")
-    .SetShapeFn(shape_inference::UnknownShape);
+    .SetShapeFn(shape_inference::UnknownShape)
+    .Doc(R"doc(
+Deprecated, use StackPopV2.
+)doc");
 
 REGISTER_OP("StackClose")
     .Input("handle: Ref(string)")
-    .SetShapeFn(TwoElementVectorInputsAndScalarOutputs);
+    .SetShapeFn(TwoElementVectorInputsAndScalarOutputs)
+    .Doc(R"doc(
+Deprecated, use StackCloseV2.
+)doc");
 
 // --------------------------------------------------------------------------
 
@@ -591,7 +1357,34 @@ REGISTER_OP("TensorArrayV3")
       c->set_output(0, c->Vector(2));
       c->set_output(1, c->Scalar());
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+An array of Tensors of given size.
+
+Write data via Write and read via Read or Pack.
+
+handle: The handle to the TensorArray.
+flow: A scalar used to control gradient flow.
+size: The size of the array.
+dtype: The type of the elements on the tensor_array.
+element_shape: The expected shape of an element, if known. Used to
+  validate the shapes of TensorArray elements. If this shape is not
+  fully specified, gathering zero-size TensorArrays is an error.
+dynamic_size: A boolean that determines whether writes to the TensorArray
+  are allowed to grow the size.  By default, this is not allowed.
+clear_after_read: If true (default), Tensors in the TensorArray are cleared
+  after being read.  This disables multiple read semantics but allows early
+  release of memory.
+identical_element_shapes: If true (default is false), then all
+  elements in the TensorArray will be expected to have have identical shapes.
+  This allows certain behaviors, like dynamically checking for
+  consistent shapes on write, and being able to fill in properly
+  shaped zero tensors on stack -- even if the element_shape attribute
+  is not fully defined.
+tensor_array_name: Overrides the name used for the temporary tensor_array
+  resource. Default value is the name of the 'TensorArray' op (which
+  is guaranteed unique).
+)doc");
 
 REGISTER_OP("TensorArrayGradV3")
     .Input("handle: resource")
@@ -608,7 +1401,52 @@ REGISTER_OP("TensorArrayGradV3")
       c->set_output(0, c->Vector(2));
       c->set_output(1, c->Scalar());
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Creates a TensorArray for storing the gradients of values in the given handle.
+
+If the given TensorArray gradient already exists, returns a reference to it.
+
+Locks the size of the original TensorArray by disabling its dynamic size flag.
+
+**A note about the input flow_in:**
+
+The handle flow_in forces the execution of the gradient lookup to occur
+only after certain other operations have occurred.  For example, when
+the forward TensorArray is dynamically sized, writes to this TensorArray
+may resize the object.  The gradient TensorArray is statically sized based
+on the size of the forward TensorArray when this operation executes.
+Furthermore, the size of the forward TensorArray is frozen by this call.
+As a result, the flow is used to ensure that the call to generate the gradient
+TensorArray only happens after all writes are executed.
+
+In the case of dynamically sized TensorArrays, gradient computation should
+only be performed on read operations that have themselves been chained via
+flow to occur only after all writes have executed. That way the final size
+of the forward TensorArray is known when this operation is called.
+
+**A note about the source attribute:**
+
+TensorArray gradient calls use an accumulator TensorArray object.  If
+multiple gradients are calculated and run in the same session, the multiple
+gradient nodes may accidentally flow through the same accumulator TensorArray.
+This double counts and generally breaks the TensorArray gradient flow.
+
+The solution is to identify which gradient call this particular
+TensorArray gradient is being called in.  This is performed by identifying
+a unique string (e.g. "gradients", "gradients_1", ...) from the input
+gradient Tensor's name.  This string is used as a suffix when creating
+the TensorArray gradient object here (the attribute `source`).
+
+The attribute `source` is added as a suffix to the forward TensorArray's
+name when performing the creation / lookup, so that each separate gradient
+calculation gets its own TensorArray accumulator.
+
+handle: The handle to the forward TensorArray.
+flow_in: A float scalar that enforces proper chaining of operations.
+source: The gradient source string, used to decide which gradient TensorArray
+  to return.
+)doc");
 
 REGISTER_OP("TensorArrayWriteV3")
     .Input("handle: resource")
@@ -627,7 +1465,16 @@ REGISTER_OP("TensorArrayWriteV3")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
       TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
       return shape_inference::ScalarShape(c);
-    });
+    })
+    .Doc(R"doc(
+Push an element onto the tensor_array.
+
+handle: The handle to a TensorArray.
+index: The position to write to inside the TensorArray.
+value: The tensor to write to the TensorArray.
+flow_in: A float scalar that enforces proper chaining of operations.
+flow_out: A float scalar that enforces proper chaining of operations.
+)doc");
 
 REGISTER_OP("TensorArrayReadV3")
     .Input("handle: resource")
@@ -644,7 +1491,15 @@ REGISTER_OP("TensorArrayReadV3")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
       TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
       return shape_inference::UnknownShape(c);
-    });
+    })
+    .Doc(R"doc(
+Read an element from the TensorArray into output `value`.
+
+handle: The handle to a TensorArray.
+dtype: The type of the elem that is returned.
+flow_in: A float scalar that enforces proper chaining of operations.
+value: The tensor that is read from the TensorArray.
+)doc");
 
 REGISTER_OP("TensorArrayGatherV3")
     .Input("handle: resource")
@@ -661,7 +1516,22 @@ REGISTER_OP("TensorArrayGatherV3")
       TF_RETURN_IF_ERROR(c->WithValue(c->Dim(c->input(0), 0), 2, &unused_dim));
       TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
       return shape_inference::UnknownShape(c);
-    });
+    })
+    .Doc(R"doc(
+Gather specific elements from the TensorArray into output `value`.
+
+All elements selected by `indices` must have the same shape.
+
+handle: The handle to a TensorArray.
+indices: The locations in the TensorArray from which to read tensor elements.
+dtype: The type of the elem that is returned.
+element_shape: The expected shape of an element, if known. Used to
+  validate the shapes of TensorArray elements. If this shape is not
+  fully specified, gathering zero-size TensorArrays is an error.
+flow_in: A float scalar that enforces proper chaining of operations.
+value: All of the elements in the TensorArray, concatenated along a new
+  axis (the new dimension 0).
+)doc");
 
 REGISTER_OP("TensorArrayScatterV3")
     .Input("handle: resource")
@@ -678,7 +1548,18 @@ REGISTER_OP("TensorArrayScatterV3")
       TF_RETURN_IF_ERROR(c->WithValue(c->Dim(c->input(0), 0), 2, &unused_dim));
       TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
       return shape_inference::ScalarShape(c);
-    });
+    })
+    .Doc(R"doc(
+Scatter the data from the input value into specific TensorArray elements.
+
+`indices` must be a vector, its length must match the first dim of `value`.
+
+handle: The handle to a TensorArray.
+indices: The locations at which to write the tensor elements.
+value: The concatenated tensor to write to the TensorArray.
+flow_in: A float scalar that enforces proper chaining of operations.
+flow_out: A float scalar that enforces proper chaining of operations.
+)doc");
 
 REGISTER_OP("TensorArrayConcatV3")
     .Input("handle: resource")
@@ -697,7 +1578,35 @@ REGISTER_OP("TensorArrayConcatV3")
       c->set_output(0, c->UnknownShape());
       c->set_output(1, c->Vector(c->UnknownDim()));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Concat the elements from the TensorArray into value `value`.
+
+Takes `T` elements of shapes
+
+  ```
+  (n0 x d0 x d1 x ...), (n1 x d0 x d1 x ...), ..., (n(T-1) x d0 x d1 x ...)
+  ```
+
+and concatenates them into a Tensor of shape:
+
+  ```(n0 + n1 + ... + n(T-1) x d0 x d1 x ...)```
+
+All elements must have the same shape (excepting the first dimension).
+
+handle: The handle to a TensorArray.
+dtype: The type of the elem that is returned.
+flow_in: A float scalar that enforces proper chaining of operations.
+element_shape_except0: The expected shape of an element, if known,
+  excluding the first dimension. Used to validate the shapes of
+  TensorArray elements. If this shape is not fully specified, concatenating
+  zero-size TensorArrays is an error.
+value: All of the elements in the TensorArray, concatenated along the first
+  axis.
+lengths: A vector of the row sizes of the original T elements in the
+  value output.  In the example above, this would be the values:
+  `(n1, n2, ..., n(T-1))`.
+)doc");
 
 REGISTER_OP("TensorArraySplitV3")
     .Input("handle: resource")
@@ -715,7 +1624,35 @@ REGISTER_OP("TensorArraySplitV3")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &unused));
       TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
       return shape_inference::ScalarShape(c);
-    });
+    })
+    .Doc(R"doc(
+Split the data from the input value into TensorArray elements.
+
+Assuming that `lengths` takes on values
+
+  ```(n0, n1, ..., n(T-1))```
+
+and that `value` has shape
+
+  ```(n0 + n1 + ... + n(T-1) x d0 x d1 x ...)```,
+
+this splits values into a TensorArray with T tensors.
+
+TensorArray index t will be the subtensor of values with starting position
+
+  ```(n0 + n1 + ... + n(t-1), 0, 0, ...)```
+
+and having size
+
+  ```nt x d0 x d1 x ...```
+
+handle: The handle to a TensorArray.
+value: The concatenated tensor to write to the TensorArray.
+lengths: The vector of lengths, how to split the rows of value into the
+  TensorArray.
+flow_in: A float scalar that enforces proper chaining of operations.
+flow_out: A float scalar that enforces proper chaining of operations.
+)doc");
 
 REGISTER_OP("TensorArraySizeV3")
     .Input("handle: resource")
@@ -727,7 +1664,14 @@ REGISTER_OP("TensorArraySizeV3")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &handle));
       TF_RETURN_IF_ERROR(c->WithValue(c->Dim(handle, 0), 2, &unused_dim));
       return shape_inference::ScalarShape(c);
-    });
+    })
+    .Doc(R"doc(
+Get the current size of the TensorArray.
+
+handle: The handle to a TensorArray (output of TensorArray or TensorArrayGrad).
+flow_in: A float scalar that enforces proper chaining of operations.
+size: The current size of the TensorArray.
+)doc");
 
 REGISTER_OP("TensorArrayCloseV3")
     .Input("handle: resource")
@@ -737,7 +1681,15 @@ REGISTER_OP("TensorArrayCloseV3")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &handle));
       TF_RETURN_IF_ERROR(c->WithValue(c->Dim(handle, 0), 2, &unused_dim));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Delete the TensorArray from its resource container.
+
+This enables the user to close and release the resource in the middle
+of a step/run.
+
+handle: The handle to a TensorArray (output of TensorArray or TensorArrayGrad).
+)doc");
 
 // --------------------------------------------------------------------------
 
@@ -769,7 +1721,8 @@ REGISTER_OP("TensorArrayV2")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused));
       c->set_output(0, c->Vector(2));
       return Status::OK();
-    });
+    })
+    .Doc("Deprecated. Use TensorArrayV3");
 REGISTER_OP("TensorArrayGrad")
     .Input("handle: string")
     .Input("flow_in: float")
@@ -792,7 +1745,8 @@ REGISTER_OP("TensorArrayGradV2")
       TF_RETURN_IF_ERROR(c->WithValue(c->Dim(handle, 0), 2, &unused_dim));
       c->set_output(0, c->Vector(2));
       return Status::OK();
-    });
+    })
+    .Doc("Deprecated. Use TensorArrayGradV3");
 REGISTER_OP("TensorArrayWrite")
     .Input("handle: Ref(string)")
     .Input("index: int32")
@@ -820,7 +1774,8 @@ REGISTER_OP("TensorArrayWriteV2")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
       TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
       return shape_inference::ScalarShape(c);
-    });
+    })
+    .Doc("Deprecated. Use TensorArrayGradV3");
 REGISTER_OP("TensorArrayRead")
     .Input("handle: Ref(string)")
     .Input("index: int32")
@@ -845,7 +1800,8 @@ REGISTER_OP("TensorArrayReadV2")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
       TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
       return shape_inference::UnknownShape(c);
-    });
+    })
+    .Doc("Deprecated. Use TensorArrayReadV3");
 REGISTER_OP("TensorArrayPack")
     .Input("handle: Ref(string)")
     .Input("flow_in: float")
@@ -887,7 +1843,8 @@ REGISTER_OP("TensorArrayGatherV2")
       TF_RETURN_IF_ERROR(c->WithValue(c->Dim(c->input(0), 0), 2, &unused_dim));
       TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
       return shape_inference::UnknownShape(c);
-    });
+    })
+    .Doc("Deprecated. Use TensorArrayGatherV3");
 REGISTER_OP("TensorArrayScatter")
     .Input("handle: Ref(string)")
     .Input("indices: int32")
@@ -913,7 +1870,8 @@ REGISTER_OP("TensorArrayScatterV2")
       TF_RETURN_IF_ERROR(c->WithValue(c->Dim(c->input(0), 0), 2, &unused_dim));
       TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
       return shape_inference::ScalarShape(c);
-    });
+    })
+    .Doc("Deprecated. Use TensorArrayScatterV3");
 REGISTER_OP("TensorArrayConcat")
     .Input("handle: Ref(string)")
     .Input("flow_in: float")
@@ -940,7 +1898,8 @@ REGISTER_OP("TensorArrayConcatV2")
       c->set_output(0, c->UnknownShape());
       c->set_output(1, c->Vector(c->UnknownDim()));
       return Status::OK();
-    });
+    })
+    .Doc("Deprecated. Use TensorArrayConcatV3");
 REGISTER_OP("TensorArraySplit")
     .Input("handle: Ref(string)")
     .Input("value: T")
@@ -967,7 +1926,8 @@ REGISTER_OP("TensorArraySplitV2")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &unused));
       TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
       return shape_inference::ScalarShape(c);
-    });
+    })
+    .Doc("Deprecated. Use TensorArraySplitV3");
 REGISTER_OP("TensorArraySize")
     .Input("handle: Ref(string)")
     .Input("flow_in: float")
@@ -985,7 +1945,8 @@ REGISTER_OP("TensorArraySizeV2")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &handle));
       TF_RETURN_IF_ERROR(c->WithValue(c->Dim(handle, 0), 2, &unused_dim));
       return shape_inference::ScalarShape(c);
-    });
+    })
+    .Doc("Deprecated. Use TensorArraySizeV3");
 REGISTER_OP("TensorArrayClose")
     .Input("handle: Ref(string)")
     .SetShapeFn([](InferenceContext* c) { return Status::OK(); })
@@ -999,7 +1960,8 @@ REGISTER_OP("TensorArrayCloseV2")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &handle));
       TF_RETURN_IF_ERROR(c->WithValue(c->Dim(handle, 0), 2, &unused_dim));
       return Status::OK();
-    });
+    })
+    .Doc("Deprecated. Use TensorArrayCloseV3");
 
 // --------------------------------------------------------------------------
 
@@ -1011,7 +1973,31 @@ REGISTER_OP("Barrier")
     .Attr("capacity: int = -1")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
-    .SetShapeFn(TwoElementOutput);
+    .SetShapeFn(TwoElementOutput)
+    .Doc(R"doc(
+Defines a barrier that persists across different graph executions.
+
+A barrier represents a key-value map, where each key is a string, and
+each value is a tuple of tensors.
+
+At runtime, the barrier contains 'complete' and 'incomplete'
+elements. A complete element has defined tensors for all components of
+its value tuple, and may be accessed using BarrierTakeMany. An
+incomplete element has some undefined components in its value tuple,
+and may be updated using BarrierInsertMany.
+
+handle: The handle to the barrier.
+component_types: The type of each component in a value.
+shapes: The shape of each component in a value. Each shape must be 1 in the
+  first dimension. The length of this attr must be the same as the length of
+  component_types.
+capacity: The capacity of the barrier.  The default capacity is MAX_INT32,
+  which is the largest capacity of the underlying queue.
+container: If non-empty, this barrier is placed in the given container.
+        Otherwise, a default container is used.
+shared_name: If non-empty, this barrier will be shared under the given name
+  across multiple sessions.
+)doc");
 
 REGISTER_OP("BarrierInsertMany")
     .Input("handle: Ref(string)")
@@ -1030,7 +2016,21 @@ REGISTER_OP("BarrierInsertMany")
       TF_RETURN_IF_ERROR(c->WithRankAtLeast(values, 1, &values));
       TF_RETURN_IF_ERROR(c->Merge(keys, c->Vector(c->Dim(values, 0)), &handle));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+For each key, assigns the respective value to the specified component.
+
+If a key is not found in the barrier, this operation will create a new
+incomplete element. If a key is found in the barrier, and the element
+already has a value at component_index, this operation will fail with
+INVALID_ARGUMENT, and leave the barrier in an undefined state.
+
+handle: The handle to a barrier.
+component_index: The component of the barrier elements that is being assigned.
+keys: A one-dimensional tensor of keys, with length n.
+values: An any-dimensional tensor of values, which are associated with the
+  respective keys. The 0th dimension must have length n.
+)doc");
 
 REGISTER_OP("BarrierTakeMany")
     .Input("handle: Ref(string)")
@@ -1042,22 +2042,78 @@ REGISTER_OP("BarrierTakeMany")
     .Attr("allow_small_batch: bool = false")
     .Attr("wait_for_incomplete: bool = false")
     .Attr("timeout_ms: int = -1")
-    .SetShapeFn(shape_inference::UnknownShape);
+    .SetShapeFn(shape_inference::UnknownShape)
+    .Doc(R"doc(
+Takes the given number of completed elements from a barrier.
+
+This operation concatenates completed-element component tensors along
+the 0th dimension to make a single component tensor.
+
+Elements come out of the barrier when they are complete, and in the order
+in which they were placed into the barrier.  The indices output provides
+information about the batch in which each element was originally inserted
+into the barrier.
+
+handle: The handle to a barrier.
+num_elements: A single-element tensor containing the number of elements to
+  take.
+indices: A one-dimensional tensor of indices, with length num_elems.
+  These indices refer to the batch in which the values were placed into the
+  barrier (starting with MIN_LONG and increasing with each BarrierInsertMany).
+keys: A one-dimensional tensor of keys, with length num_elements.
+values: One any-dimensional tensor per component in a barrier element. All
+  values have length num_elements in the 0th dimension.
+component_types: The type of each component in a value.
+allow_small_batch: Allow to return less than num_elements items if barrier is
+  already closed.
+timeout_ms: If the queue is empty, this operation will block for up to
+  timeout_ms milliseconds.
+  Note: This option is not supported yet.
+)doc");
 
 REGISTER_OP("BarrierClose")
     .Input("handle: Ref(string)")
     .SetShapeFn(TwoElementVectorInputsAndScalarOutputs)
-    .Attr("cancel_pending_enqueues: bool = false");
+    .Attr("cancel_pending_enqueues: bool = false")
+    .Doc(R"doc(
+Closes the given barrier.
+
+This operation signals that no more new elements will be inserted in the
+given barrier. Subsequent InsertMany that try to introduce a new key will fail.
+Subsequent InsertMany operations that just add missing components to already
+existing elements will continue to succeed. Subsequent TakeMany operations will
+continue to succeed if sufficient completed elements remain in the barrier.
+Subsequent TakeMany operations that would block will fail immediately.
+
+handle: The handle to a barrier.
+cancel_pending_enqueues: If true, all pending enqueue requests that are
+  blocked on the barrier's queue will be canceled. InsertMany will fail, even
+  if no new key is introduced.
+)doc");
 
 REGISTER_OP("BarrierReadySize")
     .Input("handle: Ref(string)")
     .Output("size: int32")
-    .SetShapeFn(TwoElementVectorInputsAndScalarOutputs);
+    .SetShapeFn(TwoElementVectorInputsAndScalarOutputs)
+    .Doc(R"doc(
+Computes the number of complete elements in the given barrier.
+
+handle: The handle to a barrier.
+size: The number of complete elements (i.e. those with all of their value
+  components set) in the barrier.
+)doc");
 
 REGISTER_OP("BarrierIncompleteSize")
     .Input("handle: Ref(string)")
     .Output("size: int32")
-    .SetShapeFn(TwoElementVectorInputsAndScalarOutputs);
+    .SetShapeFn(TwoElementVectorInputsAndScalarOutputs)
+    .Doc(R"doc(
+Computes the number of incomplete elements in the given barrier.
+
+handle: The handle to a barrier.
+size: The number of incomplete elements (i.e. those with some of their value
+  components not set) in the barrier.
+)doc");
 
 // --------------------------------------------------------------------------
 
@@ -1066,14 +2122,28 @@ REGISTER_OP("GetSessionHandle")
     .Output("handle: string")
     .Attr("T: type")
     .SetIsStateful()
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Store the input tensor in the state of the current session.
+
+value: The tensor to be stored.
+handle: The handle for the tensor stored in the session state, represented
+  as a string.
+)doc");
 
 REGISTER_OP("GetSessionHandleV2")
     .Input("value: T")
     .Output("handle: resource")
     .Attr("T: type")
     .SetIsStateful()
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Store the input tensor in the state of the current session.
+
+value: The tensor to be stored.
+handle: The handle for the tensor stored in the session state, represented
+  as a ResourceHandle object.
+)doc");
 
 REGISTER_OP("GetSessionTensor")
     .Input("handle: string")
@@ -1084,7 +2154,14 @@ REGISTER_OP("GetSessionTensor")
       ShapeHandle unused;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused));
       return shape_inference::UnknownShape(c);
-    });
+    })
+    .Doc(R"doc(
+Get the value of the tensor specified by its handle.
+
+handle: The handle for a tensor stored in the session state.
+value: The tensor for the given handle.
+dtype: The type of the output value.
+)doc");
 
 REGISTER_OP("DeleteSessionTensor")
     .Input("handle: string")
@@ -1093,7 +2170,12 @@ REGISTER_OP("DeleteSessionTensor")
       ShapeHandle unused;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Delete the tensor specified by its handle in the session.
+
+handle: The handle for a tensor stored in the session state.
+)doc");
 
 REGISTER_OP("Stage")
     .Input("values: dtypes")
@@ -1103,7 +2185,23 @@ REGISTER_OP("Stage")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetShapeFn(shape_inference::UnknownShape)
-    .SetIsStateful();
+    .SetIsStateful()
+    .Doc(R"doc(
+Stage values similar to a lightweight Enqueue.
+
+The basic functionality of this Op is similar to a queue with many
+fewer capabilities and options.  This Op is optimized for performance.
+
+values: a list of tensors
+dtypes A list of data types that inserted values should adhere to.
+capacity: Maximum number of elements in the Staging Area. If > 0, inserts
+  on the container will block when the capacity is reached.
+memory_limit: The maximum number of bytes allowed for Tensors in the Staging Area.
+  If > 0, inserts will block until sufficient space is available.
+container: If non-empty, this queue is placed in the given container. Otherwise,
+  a default container is used.
+shared_name: It is necessary to match this name to the matching Unstage Op.
+)doc");
 
 REGISTER_OP("Unstage")
     .Output("values: dtypes")
@@ -1113,7 +2211,13 @@ REGISTER_OP("Unstage")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetShapeFn(shape_inference::UnknownShape)
-    .SetIsStateful();
+    .SetIsStateful()
+    .Doc(R"doc(
+Op is similar to a lightweight Dequeue.
+
+The basic functionality is similar to dequeue with many fewer
+capabilities and options.  This Op is optimized for performance.
+)doc");
 
 REGISTER_OP("StagePeek")
     .Input("index: int32")
@@ -1124,7 +2228,13 @@ REGISTER_OP("StagePeek")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetShapeFn(shape_inference::UnknownShape)
-    .SetIsStateful();
+    .SetIsStateful()
+    .Doc(R"doc(
+Op peeks at the values at the specified index.  If the
+underlying container does not contain sufficient elements
+this op will block until it does.   This Op is optimized for
+performance.
+    )doc");
 
 REGISTER_OP("StageSize")
     .Output("size: int32")
@@ -1134,7 +2244,10 @@ REGISTER_OP("StageSize")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetShapeFn(shape_inference::ScalarShape)
-    .SetIsStateful();
+    .SetIsStateful()
+    .Doc(R"doc(
+Op returns the number of elements in the underlying container.
+    )doc");
 
 REGISTER_OP("StageClear")
     .Attr("capacity: int >= 0 = 0")
@@ -1143,7 +2256,10 @@ REGISTER_OP("StageClear")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetShapeFn(shape_inference::UnknownShape)
-    .SetIsStateful();
+    .SetIsStateful()
+    .Doc(R"doc(
+Op removes all elements in the underlying container.
+    )doc");
 
 // UnorderedMap
 REGISTER_OP("MapStage")
@@ -1157,7 +2273,19 @@ REGISTER_OP("MapStage")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetShapeFn(tensorflow::shape_inference::NoOutputs)
-    .SetIsStateful();
+    .SetIsStateful()
+    .Doc(R"doc(
+Stage (key, values) in the underlying container which behaves like a hashtable.
+
+key: int64
+values: a list of tensors
+dtypes A list of data types that inserted values should adhere to.
+capacity: Maximum number of elements in the Staging Area. If > 0, inserts
+  on the container will block when the capacity is reached.
+container: If non-empty, this queue is placed in the given container. Otherwise,
+  a default container is used.
+shared_name: It is necessary to match this name to the matching Unstage Op.
+)doc");
 
 REGISTER_OP("MapPeek")
     .Input("key: int64")
@@ -1169,7 +2297,12 @@ REGISTER_OP("MapPeek")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetShapeFn(tensorflow::shape_inference::UnknownShape)
-    .SetIsStateful();
+    .SetIsStateful()
+    .Doc(R"doc(
+Op peeks at the values at the specified key.  If the
+underlying container does not contain this key
+this op will block until it does.
+    )doc");
 
 REGISTER_OP("MapUnstage")
     .Input("key: int64")
@@ -1181,7 +2314,12 @@ REGISTER_OP("MapUnstage")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetShapeFn(tensorflow::shape_inference::UnknownShape)
-    .SetIsStateful();
+    .SetIsStateful()
+    .Doc(R"doc(
+Op removes and returns the values associated with the key
+from the underlying container.   If the underlying container
+does not contain this key, the op will block until it does.
+    )doc");
 
 REGISTER_OP("MapUnstageNoKey")
     .Input("indices: int32")
@@ -1193,7 +2331,12 @@ REGISTER_OP("MapUnstageNoKey")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetShapeFn(tensorflow::shape_inference::UnknownShape)
-    .SetIsStateful();
+    .SetIsStateful()
+    .Doc(R"doc(
+Op removes and returns a random (key, value)
+from the underlying container.   If the underlying container
+does not contain elements, the op will block until it does.
+      )doc");
 
 REGISTER_OP("MapSize")
     .Output("size: int32")
@@ -1203,7 +2346,10 @@ REGISTER_OP("MapSize")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetShapeFn(tensorflow::shape_inference::ScalarShape)
-    .SetIsStateful();
+    .SetIsStateful()
+    .Doc(R"doc(
+Op returns the number of elements in the underlying container.
+    )doc");
 
 REGISTER_OP("MapIncompleteSize")
     .Output("size: int32")
@@ -1213,7 +2359,10 @@ REGISTER_OP("MapIncompleteSize")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetShapeFn(tensorflow::shape_inference::ScalarShape)
-    .SetIsStateful();
+    .SetIsStateful()
+    .Doc(R"doc(
+Op returns the number of incomplete elements in the underlying container.
+    )doc");
 
 REGISTER_OP("MapClear")
     .Attr("capacity: int >= 0 = 0")
@@ -1222,7 +2371,10 @@ REGISTER_OP("MapClear")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetShapeFn(tensorflow::shape_inference::NoOutputs)
-    .SetIsStateful();
+    .SetIsStateful()
+    .Doc(R"doc(
+Op removes all elements in the underlying container.
+    )doc");
 
 // OrderedMap
 REGISTER_OP("OrderedMapStage")
@@ -1236,7 +2388,20 @@ REGISTER_OP("OrderedMapStage")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetShapeFn(tensorflow::shape_inference::NoOutputs)
-    .SetIsStateful();
+    .SetIsStateful()
+    .Doc(R"doc(
+Stage (key, values) in the underlying container which behaves like a ordered
+associative container.   Elements are ordered by key.
+
+key: int64
+values: a list of tensors
+dtypes A list of data types that inserted values should adhere to.
+capacity: Maximum number of elements in the Staging Area. If > 0, inserts
+  on the container will block when the capacity is reached.
+container: If non-empty, this queue is placed in the given container. Otherwise,
+  a default container is used.
+shared_name: It is necessary to match this name to the matching Unstage Op.
+)doc");
 
 REGISTER_OP("OrderedMapPeek")
     .Input("key: int64")
@@ -1248,7 +2413,13 @@ REGISTER_OP("OrderedMapPeek")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetShapeFn(tensorflow::shape_inference::UnknownShape)
-    .SetIsStateful();
+    .SetIsStateful()
+    .Doc(R"doc(
+Op peeks at the values at the specified key.  If the
+underlying container does not contain this key
+this op will block until it does.   This Op is optimized for
+performance.
+    )doc");
 
 REGISTER_OP("OrderedMapUnstage")
     .Input("key: int64")
@@ -1260,7 +2431,12 @@ REGISTER_OP("OrderedMapUnstage")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetShapeFn(tensorflow::shape_inference::UnknownShape)
-    .SetIsStateful();
+    .SetIsStateful()
+    .Doc(R"doc(
+Op removes and returns the values associated with the key
+from the underlying container.   If the underlying container
+does not contain this key, the op will block until it does.
+    )doc");
 
 REGISTER_OP("OrderedMapUnstageNoKey")
     .Input("indices: int32")
@@ -1272,7 +2448,12 @@ REGISTER_OP("OrderedMapUnstageNoKey")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetShapeFn(tensorflow::shape_inference::UnknownShape)
-    .SetIsStateful();
+    .SetIsStateful()
+    .Doc(R"doc(
+Op removes and returns the (key, value) element with the smallest
+key from the underlying container.   If the underlying container
+does not contain elements, the op will block until it does.
+      )doc");
 
 REGISTER_OP("OrderedMapSize")
     .Output("size: int32")
@@ -1282,7 +2463,10 @@ REGISTER_OP("OrderedMapSize")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetShapeFn(tensorflow::shape_inference::ScalarShape)
-    .SetIsStateful();
+    .SetIsStateful()
+    .Doc(R"doc(
+Op returns the number of elements in the underlying container.
+    )doc");
 
 REGISTER_OP("OrderedMapIncompleteSize")
     .Output("size: int32")
@@ -1292,7 +2476,10 @@ REGISTER_OP("OrderedMapIncompleteSize")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetShapeFn(tensorflow::shape_inference::ScalarShape)
-    .SetIsStateful();
+    .SetIsStateful()
+    .Doc(R"doc(
+Op returns the number of incomplete elements in the underlying container.
+    )doc");
 
 REGISTER_OP("OrderedMapClear")
     .Attr("capacity: int >= 0 = 0")
@@ -1301,7 +2488,10 @@ REGISTER_OP("OrderedMapClear")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetShapeFn(tensorflow::shape_inference::NoOutputs)
-    .SetIsStateful();
+    .SetIsStateful()
+    .Doc(R"doc(
+Op removes all elements in the underlying container.
+    )doc");
 
 REGISTER_OP("RecordInput")
     .Output("records: string")
@@ -1313,6 +2503,20 @@ REGISTER_OP("RecordInput")
     .Attr("batch_size: int = 32")
     .Attr("compression_type: string = ''")
     .SetIsStateful()
-    .SetShapeFn(shape_inference::UnknownShape);
+    .SetShapeFn(shape_inference::UnknownShape)
+    .Doc(R"doc(
+Emits randomized records.
+
+records: A tensor of shape [batch_size].
+file_pattern: Glob pattern for the data files.
+file_random_seed: Random seeds used to produce randomized records.
+file_shuffle_shift_ratio: Shifts the list of files after the list is randomly
+    shuffled.
+file_buffer_size: The randomization shuffling buffer.
+file_parallelism: How many sstables are opened and concurrently iterated over.
+batch_size: The batch size.
+compression_type: The type of compression for the file. Currently ZLIB and
+    GZIP are supported. Defaults to none.
+)doc");
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc
index b86816bb54..9f4e0e91a7 100644
--- a/tensorflow/core/ops/dataset_ops.cc
+++ b/tensorflow/core/ops/dataset_ops.cc
@@ -39,10 +39,13 @@ REGISTER_OP("TensorDataset")
     .Attr("output_shapes: list(shape) >= 1")
     .SetIsStateful()  // TODO(b/65524810): Source dataset ops must be marked
                       // stateful to inhibit constant folding.
-    .SetShapeFn(shape_inference::ScalarShape);  // TODO(mrry): Validate that
-                                                // `components` have shapes
-                                                // compatible with
-                                                // `output_shapes`.
+    .SetShapeFn(shape_inference::ScalarShape)  // TODO(mrry): Validate that
+                                               // `components` have shapes
+                                               // compatible with
+                                               // `output_shapes`.
+    .Doc(R"doc(
+Creates a dataset that emits `components` as a tuple of tensors once.
+)doc");
 
 REGISTER_OP("TensorSliceDataset")
     .Input("components: Toutput_types")
@@ -51,10 +54,13 @@ REGISTER_OP("TensorSliceDataset")
     .Attr("output_shapes: list(shape) >= 1")
     .SetIsStateful()  // TODO(b/65524810): Source dataset ops must be marked
                       // stateful to inhibit constant folding.
-    .SetShapeFn(shape_inference::ScalarShape);  // TODO(mrry): Validate that the
-                                                // dim-0 slices of `components`
-                                                // have shapes compatible with
-                                                // `output_shapes`.
+    .SetShapeFn(shape_inference::ScalarShape)  // TODO(mrry): Validate that the
+                                               // dim-0 slices of `components`
+                                               // have shapes compatible with
+                                               // `output_shapes`.
+    .Doc(R"doc(
+Creates a dataset that emits each dim-0 slice of `components` once.
+)doc");
 
 REGISTER_OP("SparseTensorSliceDataset")
     .Input("indices: int64")
@@ -64,7 +70,10 @@ REGISTER_OP("SparseTensorSliceDataset")
     .Attr("Tvalues: type")
     .SetIsStateful()  // TODO(b/65524810): Source dataset ops must be marked
                       // stateful to inhibit constant folding.
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Creates a dataset that splits a SparseTensor into elements row-wise.
+)doc");
 
 REGISTER_OP("ZipDataset")
     .Input("input_datasets: N * variant")
@@ -72,7 +81,10 @@ REGISTER_OP("ZipDataset")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
     .Attr("N: int >= 1")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Creates a dataset that zips together `input_datasets`.
+)doc");
 
 REGISTER_OP("ConcatenateDataset")
     .Input("input_dataset: variant")
@@ -80,7 +92,10 @@ REGISTER_OP("ConcatenateDataset")
     .Output("handle: variant")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Creates a dataset that concatenates `input_dataset` with `another_dataset`.
+)doc");
 
 REGISTER_OP("RepeatDataset")
     .Input("input_dataset: variant")
@@ -88,8 +103,14 @@ REGISTER_OP("RepeatDataset")
     .Output("handle: variant")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape);  // TODO(mrry): Validate the
-                                                // shape of `count`.
+    .SetShapeFn(shape_inference::ScalarShape)  // TODO(mrry): Validate the shape
+                                               // of `count`.
+    .Doc(R"doc(
+Creates a dataset that emits the outputs of `input_dataset` `count` times.
+
+count: A scalar representing the number of times that `input_dataset` should
+  be repeated. A value of `-1` indicates that it should be repeated infinitely.
+)doc");
 
 REGISTER_OP("TakeDataset")
     .Input("input_dataset: variant")
@@ -97,7 +118,14 @@ REGISTER_OP("TakeDataset")
     .Output("handle: variant")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Creates a dataset that contains `count` elements from the `input_dataset`.
+
+count: A scalar representing the number of elements from the `input_dataset`
+  that should be taken. A value of `-1` indicates that all of `input_dataset`
+  is taken.
+)doc");
 
 REGISTER_OP("SkipDataset")
     .Input("input_dataset: variant")
@@ -105,14 +133,23 @@ REGISTER_OP("SkipDataset")
     .Output("handle: variant")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Creates a dataset that skips `count` elements from the `input_dataset`.
+
+count: A scalar representing the number of elements from the `input_dataset`
+  that should be skipped.  If count is -1, skips everything.
+)doc");
 
 REGISTER_OP("IgnoreErrorsDataset")
     .Input("input_dataset: variant")
     .Output("handle: variant")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Creates a dataset that contains the elements of `input_dataset` ignoring errors.
+)doc");
 
 REGISTER_OP("BytesProducedStatsDataset")
     .Input("input_dataset: variant")
@@ -120,7 +157,10 @@ REGISTER_OP("BytesProducedStatsDataset")
     .Output("handle: variant")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Records the bytes size of each element of `input_dataset` in a StatsAggregator.
+)doc");
 
 REGISTER_OP("LatencyStatsDataset")
     .Input("input_dataset: variant")
@@ -128,7 +168,10 @@ REGISTER_OP("LatencyStatsDataset")
     .Output("handle: variant")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Records the latency of producing `input_dataset` elements in a StatsAggregator.
+)doc");
 
 REGISTER_OP("MapDataset")
     .Input("input_dataset: variant")
@@ -138,7 +181,10 @@ REGISTER_OP("MapDataset")
     .Attr("Targuments: list(type) >= 0")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Creates a dataset that applies `f` to the outputs of `input_dataset`.
+)doc");
 
 REGISTER_OP("ParallelMapDataset")
     .Input("input_dataset: variant")
@@ -149,7 +195,16 @@ REGISTER_OP("ParallelMapDataset")
     .Attr("Targuments: list(type) >= 0")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Creates a dataset that applies `f` to the outputs of `input_dataset`.
+
+Unlike a "MapDataset", which applies `f` sequentially, this dataset invokes up
+to `num_parallel_calls` copies of `f` in parallel.
+
+num_parallel_calls: The number of concurrent invocations of `f` that process
+  elements from `input_dataset` in parallel.
+)doc");
 
 REGISTER_OP("MapAndBatchDataset")
     .Input("input_dataset: variant")
@@ -161,7 +216,21 @@ REGISTER_OP("MapAndBatchDataset")
     .Attr("Targuments: list(type) >= 0")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Creates a dataset that applies `f` to the outputs of `input_dataset` and then
+batches `batch_size` of them.
+
+Unlike a "MapDataset", which applies `f` sequentially, this dataset invokes up
+to `batch_size * num_parallel_batches` copies of `f` in parallel.
+
+batch_size: A scalar representing the number of elements to accumulate in a
+  batch. It determines the number of concurrent invocations of `f` that process
+  elements from `input_dataset` in parallel.
+num_parallel_batches: A scalar representing the number of batches to create in
+  parallel. Processing multiple batches in parallel benefits workloads prone to
+  stragglers.
+)doc");
 
 REGISTER_OP("PrefetchDataset")
     .Input("input_dataset: variant")
@@ -169,7 +238,13 @@ REGISTER_OP("PrefetchDataset")
     .Output("handle: variant")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Creates a dataset that asynchronously prefetches elements from `input_dataset`.
+
+buffer_size: The maximum number of elements to buffer in an iterator over
+  this dataset.
+)doc");
 
 REGISTER_OP("ScanDataset")
     .Input("input_dataset: variant")
@@ -181,7 +256,10 @@ REGISTER_OP("ScanDataset")
     .Attr("Targuments: list(type) >= 0")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Creates a dataset successively reduces `f` over the elements of `input_dataset`.
+)doc");
 
 REGISTER_OP("FlatMapDataset")
     .Input("input_dataset: variant")
@@ -191,7 +269,18 @@ REGISTER_OP("FlatMapDataset")
     .Attr("Targuments: list(type) >= 0")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Creates a dataset that applies `f` to the outputs of `input_dataset`.
+
+Unlike MapDataset, the `f` in FlatMapDataset is expected to return a
+Dataset variant, and FlatMapDataset will flatten successive results
+into a single Dataset.
+
+f: A function mapping elements of `input_dataset`, concatenated with
+  `other_arguments`, to a Dataset variant that contains elements matching
+  `output_types` and `output_shapes`.
+)doc");
 
 REGISTER_OP("InterleaveDataset")
     .Input("input_dataset: variant")
@@ -203,7 +292,20 @@ REGISTER_OP("InterleaveDataset")
     .Attr("Targuments: list(type) >= 0")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Creates a dataset that applies `f` to the outputs of `input_dataset`.
+
+Unlike MapDataset, the `f` in InterleaveDataset is expected to return
+a Dataset variant, and InterleaveDataset will flatten successive
+results into a single Dataset. Unlike FlatMapDataset,
+InterleaveDataset will interleave sequences of up to `block_length`
+consecutive elements from `cycle_length` input elements.
+
+f: A function mapping elements of `input_dataset`, concatenated with
+  `other_arguments`, to a Dataset variant that contains elements matching
+  `output_types` and `output_shapes`.
+)doc");
 
 REGISTER_OP("ParallelInterleaveDataset")
     .Input("input_dataset: variant")
@@ -218,7 +320,22 @@ REGISTER_OP("ParallelInterleaveDataset")
     .Attr("Targuments: list(type) >= 0")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Creates a dataset that applies `f` to the outputs of `input_dataset`.
+
+The resulting dataset is similar to the `InterleaveDataset`, with the exception
+that if retrieving the next value from a dataset would cause the requester to
+block, it will skip that input dataset. This dataset is especially useful
+when loading data from a variable-latency datastores (e.g. HDFS, GCS), as it
+allows the training step to proceed so long as some data is available.
+
+!! WARNING !! This dataset is not deterministic!
+
+f: A function mapping elements of `input_dataset`, concatenated with
+   `other_arguments`, to a Dataset variant that contains elements matching
+   `output_types` and `output_shapes`.
+)doc");
 
 REGISTER_OP("GroupByWindowDataset")
     .Input("input_dataset: variant")
@@ -235,7 +352,15 @@ REGISTER_OP("GroupByWindowDataset")
     .Attr("Twindow_size_func_other_arguments: list(type) >= 0")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Creates a dataset that computes a windowed group-by on `input_dataset`.
+
+// TODO(mrry): Support non-int64 keys.
+
+key_func: A function mapping an element of `input_dataset`, concatenated
+  with `key_func_other_arguments` to a scalar value of type DT_INT64.
+)doc");
 
 REGISTER_OP("FilterDataset")
     .Input("input_dataset: variant")
@@ -245,7 +370,20 @@ REGISTER_OP("FilterDataset")
     .Attr("Targuments: list(type) >= 0")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Creates a dataset containing elements of `input_dataset` matching `predicate`.
+
+The `predicate` function must return a scalar boolean and accept the
+following arguments:
+
+* One tensor for each component of an element of `input_dataset`.
+* One tensor for each value in `other_arguments`.
+
+predicate: A function returning a scalar boolean.
+other_arguments: A list of tensors, typically values that were captured when
+  building a closure for `predicate`.
+)doc");
 
 REGISTER_OP("BatchDataset")
     .Input("input_dataset: variant")
@@ -253,7 +391,13 @@ REGISTER_OP("BatchDataset")
     .Output("handle: variant")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Creates a dataset that batches `batch_size` elements from `input_dataset`.
+
+batch_size: A scalar representing the number of elements to accumulate in a
+  batch.
+)doc");
 
 REGISTER_OP("PaddedBatchDataset")
     .Input("input_dataset: variant")
@@ -264,17 +408,29 @@ REGISTER_OP("PaddedBatchDataset")
     .Attr("Toutput_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
     .Attr("N: int >= 1")
-    .SetShapeFn(shape_inference::ScalarShape);  // TODO(mrry): Validate that
-                                                // `padded_shapes` are all
-                                                // vectors, the lengths of
-                                                // `output_types` and
-                                                // `output_shapes` are `N`,
-                                                // the `output_shapes` are (as
-                                                // far as possible to tell
-                                                // statically) compatible with
-                                                // `padded_shapes`, and
-                                                // that `padding_values` are
-                                                // all scalars.
+    .SetShapeFn(shape_inference::ScalarShape)  // TODO(mrry): Validate that
+                                               // `padded_shapes` are all
+                                               // vectors, the lengths of
+                                               // `output_types` and
+                                               // `output_shapes` are `N`,
+                                               // the `output_shapes` are (as
+                                               // far as possible to tell
+                                               // statically) compatible with
+                                               // `padded_shapes`, and
+                                               // that `padding_values` are
+                                               // all scalars.
+    .Doc(R"doc(
+Creates a dataset that batches and pads `batch_size` elements from the input.
+
+batch_size: A scalar representing the number of elements to accumulate in a
+  batch.
+padded_shapes: A list of int64 tensors representing the desired padded shapes
+  of the corresponding output components. These shapes may be partially
+  specified, using `-1` to indicate that a particular dimension should be
+  padded to the maximum size of all batch elements.
+padding_values: A list of scalars containing the padding value to use for
+  each of the outputs.
+)doc");
 
 REGISTER_OP("DenseToSparseBatchDataset")
     .Input("input_dataset: variant")
@@ -283,7 +439,17 @@ REGISTER_OP("DenseToSparseBatchDataset")
     .Output("handle: variant")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Creates a dataset that batches input elements into a SparseTensor.
+
+input_dataset: A handle to an input dataset. Must have a single component.
+batch_size: A scalar representing the number of elements to accumulate in a
+  batch.
+row_shape: A vector representing the dense shape of each row in the produced
+  SparseTensor. The shape may be partially specified, using `-1` to indicate
+  that a particular dimension should use the maximum size of all batch elements.
+)doc");
 
 REGISTER_OP("RangeDataset")
     .Input("start: int64")
@@ -294,7 +460,14 @@ REGISTER_OP("RangeDataset")
     .Attr("output_shapes: list(shape) >= 1")
     .SetIsStateful()  // TODO(b/65524810): Source dataset ops must be marked
                       // stateful to inhibit constant folding.
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Creates a dataset with a range of values. Corresponds to python's xrange.
+
+start: corresponds to start in python's xrange().
+stop: corresponds to stop in python's xrange().
+step: corresponds to step in python's xrange().
+)doc");
 
 REGISTER_OP("RandomDataset")
     .Input("seed: int64")
@@ -304,7 +477,15 @@ REGISTER_OP("RandomDataset")
     .Attr("output_shapes: list(shape) >= 1")
     .SetIsStateful()  // TODO(b/65524810): Source dataset ops must be marked
                       // stateful to inhibit constant folding.
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Creates a Dataset that returns pseudorandom numbers.
+
+seed: A scalar seed for the random number generator. If either seed or
+  seed2 is set to be non-zero, the random number generator is seeded
+  by the given seed.  Otherwise, a random seed is used.
+seed2: A second scalar seed to avoid seed collision.
+)doc");
 
 REGISTER_OP("ShuffleDataset")
     .Input("input_dataset: variant")
@@ -315,7 +496,23 @@ REGISTER_OP("ShuffleDataset")
     .Attr("reshuffle_each_iteration: bool = true")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Creates a dataset that shuffles elements from `input_dataset` pseudorandomly.
+
+buffer_size: The number of output elements to buffer in an iterator over
+  this dataset. Compare with the `min_after_dequeue` attr when creating a
+  `RandomShuffleQueue`.
+reshuffle_each_iteration: If true, each iterator over this dataset will be given
+  a different pseudorandomly generated seed, based on a sequence seeded by the
+  `seed` and `seed2` inputs. If false, each iterator will be given the same
+  seed, and repeated iteration over this dataset will yield the exact same
+  sequence of results.
+seed: A scalar seed for the random number generator. If either `seed` or
+  `seed2` is set to be non-zero, the random number generator is seeded
+  by the given seed.  Otherwise, a random seed is used.
+seed2: A second scalar seed to avoid seed collision.
+)doc");
 
 REGISTER_OP("ShuffleAndRepeatDataset")
     .Input("input_dataset: variant")
@@ -326,7 +523,21 @@ REGISTER_OP("ShuffleAndRepeatDataset")
     .Output("handle: variant")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Creates a dataset that shuffles and repeats elements from `input_dataset`
+pseudorandomly.
+
+buffer_size: The number of output elements to buffer in an iterator over
+  this dataset. Compare with the `min_after_dequeue` attr when creating a
+  `RandomShuffleQueue`.
+count: A scalar representing the number of times the underlying dataset
+  should be repeated. The default is `-1`, which results in infinite repetition.
+seed: A scalar seed for the random number generator. If either `seed` or
+  `seed2` is set to be non-zero, the random number generator is seeded
+  by the given seed.  Otherwise, a random seed is used.
+seed2: A second scalar seed to avoid seed collision.
+)doc");
 
 REGISTER_OP("CacheDataset")
     .Input("input_dataset: variant")
@@ -334,14 +545,28 @@ REGISTER_OP("CacheDataset")
     .Output("handle: variant")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Creates a dataset that caches elements from `input_dataset`.
+
+A CacheDataset will iterate over the input_dataset, and store tensors. If the
+cache already exists, the cache will be used. If the cache is inappropriate
+(e.g. cannot be opened, contains tensors of the wrong shape / size), an error
+will the returned when used.
+
+filename: A path on the filesystem where we should cache the dataset. Note: this
+  will be a directory.
+)doc");
 
 REGISTER_OP("UniqueDataset")
     .Input("input_dataset: variant")
     .Output("handle: variant")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Creates a dataset that contains the unique elements of `input_dataset`.
+)doc");
 
 REGISTER_OP("TextLineDataset")
     .Input("filenames: string")
@@ -350,10 +575,19 @@ REGISTER_OP("TextLineDataset")
     .Output("handle: variant")
     .SetIsStateful()  // TODO(b/65524810): Source dataset ops must be marked
                       // stateful to inhibit constant folding.
-    .SetShapeFn(shape_inference::ScalarShape);  // TODO(mrry): validate
-                                                // that `filenames` is
-                                                // a scalar or a
-                                                // vector.
+    .SetShapeFn(shape_inference::ScalarShape)  // TODO(mrry): validate
+                                               // that `filenames` is
+                                               // a scalar or a
+                                               // vector.
+    .Doc(R"doc(
+Creates a dataset that emits the lines of one or more text files.
+
+filenames: A scalar or a vector containing the name(s) of the file(s) to be
+  read.
+compression_type: A scalar containing either (i) the empty string (no
+  compression), (ii) "ZLIB", or (iii) "GZIP".
+buffer_size: A scalar containing the number of bytes to buffer.
+)doc");
 
 REGISTER_OP("SqlDataset")
     .Input("driver_name: string")
@@ -364,7 +598,14 @@ REGISTER_OP("SqlDataset")
     .Attr("output_shapes: list(shape) >= 1")
     .SetIsStateful()  // TODO(b/65524810): Source dataset ops must be marked
                       // stateful to inhibit constant folding.
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Creates a dataset that executes a SQL query and emits rows of the result set.
+
+driver_name: The database type. Currently, the only supported type is 'sqlite'.
+data_source_name: A connection string to connect to the database.
+query: A SQL query to execute.
+)doc");
 
 REGISTER_OP("FixedLengthRecordDataset")
     .Input("filenames: string")
@@ -375,7 +616,19 @@ REGISTER_OP("FixedLengthRecordDataset")
     .Output("handle: variant")
     .SetIsStateful()  // TODO(b/65524810): Source dataset ops must be marked
                       // stateful to inhibit constant folding.
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Creates a dataset that emits the records from one or more binary files.
+
+filenames: A scalar or a vector containing the name(s) of the file(s) to be
+  read.
+header_bytes: A scalar representing the number of bytes to skip at the
+  beginning of a file.
+record_bytes: A scalar representing the number of bytes in each record.
+footer_bytes: A scalar representing the number of bytes to skip at the end
+  of a file.
+buffer_size: A scalar representing the number of bytes to buffer. Must be > 0.
+)doc");
 
 REGISTER_OP("TFRecordDataset")
     .Input("filenames: string")
@@ -384,7 +637,17 @@ REGISTER_OP("TFRecordDataset")
     .Output("handle: variant")
     .SetIsStateful()  // TODO(b/65524810): Source dataset ops must be marked
                       // stateful to inhibit constant folding.
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Creates a dataset that emits the records from one or more TFRecord files.
+
+filenames: A scalar or vector containing the name(s) of the file(s) to be
+  read.
+compression_type: A scalar containing either (i) the empty string (no
+  compression), (ii) "ZLIB", or (iii) "GZIP".
+buffer_size: A scalar representing the number of bytes to buffer. A value of
+  0 means no buffering will be performed.
+)doc");
 
 REGISTER_OP("Iterator")
     .Output("handle: resource")
@@ -392,12 +655,24 @@ REGISTER_OP("Iterator")
     .Attr("container: string")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+A container for an iterator resource.
+
+handle: A handle to the iterator that can be passed to a "MakeIterator"
+  or "IteratorGetNext" op.
+)doc");
 
 REGISTER_OP("MakeIterator")
     .Input("dataset: variant")
     .Input("iterator: resource")
-    .SetShapeFn(shape_inference::NoOutputs);
+    .SetShapeFn(shape_inference::NoOutputs)
+    .Doc(R"doc(
+Makes a new iterator from the given `dataset` and stores it in `iterator`.
+
+This operation may be executed multiple times. Each execution will reset the
+iterator in `iterator` to the first element of `dataset`.
+)doc");
 
 REGISTER_OP("OneShotIterator")
     .Output("handle: resource")
@@ -407,7 +682,33 @@ REGISTER_OP("OneShotIterator")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Makes a "one-shot" iterator that can be iterated only once.
+
+A one-shot iterator bundles the logic for defining the dataset and
+the state of the iterator in a single op, which allows simple input
+pipelines to be defined without an additional initialization
+("MakeIterator") step.
+
+One-shot iterators have the following limitations:
+
+* They do not support parameterization: all logic for creating the underlying
+  dataset must be bundled in the `dataset_factory` function.
+* They are not resettable. Once a one-shot iterator reaches the end of its
+  underlying dataset, subsequent "IteratorGetNext" operations on that
+  iterator will always produce an `OutOfRange` error.
+
+For greater flexibility, use "Iterator" and "MakeIterator" to define
+an iterator using an arbitrary subgraph, which may capture tensors
+(including fed values) as parameters, and which may be reset multiple
+times by rerunning "MakeIterator".
+
+handle: A handle to the iterator that can be passed to an "IteratorGetNext"
+  op.
+dataset_factory: A function of type `() -> DT_VARIANT`, where the returned
+  DT_VARIANT is a dataset.
+)doc");
 
 REGISTER_OP("IteratorGetNext")
     .Input("iterator: resource")
@@ -431,7 +732,10 @@ REGISTER_OP("IteratorGetNext")
         c->set_output(static_cast<int>(i), output_shape_handle);
       }
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Gets the next output from the given iterator.
+)doc");
 
 REGISTER_OP("DatasetToSingleElement")
     .Input("dataset: variant")
@@ -455,44 +759,89 @@ REGISTER_OP("DatasetToSingleElement")
         c->set_output(static_cast<int>(i), output_shape_handle);
       }
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Outputs the single element from the given dataset.
+
+dataset: A handle to a dataset that contains a single element.
+components: The components of the single element of `input`.
+)doc");
 
 REGISTER_OP("IteratorToStringHandle")
     .Input("resource_handle: resource")
     .Output("string_handle: string")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Converts the given `resource_handle` representing an iterator to a string.
+
+resource_handle: A handle to an iterator resource.
+string_handle: A string representation of the given handle.
+)doc");
 
 REGISTER_OP("IteratorFromStringHandle")
     .Input("string_handle: string")
     .Output("resource_handle: resource")
     .Attr("output_types: list(type) >= 0 = []")
     .Attr("output_shapes: list(shape) >= 0 = []")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Converts the given string representing a handle to an iterator to a resource.
+
+string_handle: A string representation of the given handle.
+resource_handle: A handle to an iterator resource.
+output_types: If specified, defines the type of each tuple component in an
+  element produced by the resulting iterator.
+output_shapes: If specified, defines the shape of each tuple component in an
+  element produced by the resulting iterator.
+)doc");
 
 REGISTER_OP("SerializeIterator")
     .Input("resource_handle: resource")
     .Output("serialized: variant")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Converts the given `resource_handle` representing an iterator to a variant tensor.
+
+resource_handle: A handle to an iterator resource.
+serialized: A variant tensor storing the state of the iterator contained in the
+  resource.
+)doc");
 
 REGISTER_OP("DeserializeIterator")
     .Input("resource_handle: resource")
     .Input("serialized: variant")
-    .SetShapeFn(shape_inference::NoOutputs);
+    .SetShapeFn(shape_inference::NoOutputs)
+    .Doc(R"doc(
+Converts the given variant tensor to an iterator and stores it in the given resource.
+
+resource_handle: A handle to an iterator resource.
+serialized: A variant tensor storing the state of the iterator contained in the
+  resource.
+)doc");
 
 REGISTER_OP("StatsAggregatorHandle")
     .Output("handle: resource")
     .SetShapeFn(shape_inference::ScalarShape)
     .Attr("container: string = ''")
-    .Attr("shared_name: string = ''");
+    .Attr("shared_name: string = ''")
+    .Doc(R"doc(
+Creates a statistics manager resource.
+)doc");
 
 REGISTER_OP("IteratorSetStatsAggregator")
     .Input("iterator_handle: resource")
     .Input("stats_aggregator_handle: resource")
-    .SetShapeFn(shape_inference::NoOutputs);
+    .SetShapeFn(shape_inference::NoOutputs)
+    .Doc(R"doc(
+Associates the given iterator with the given statistics aggregator.
+)doc");
 
 REGISTER_OP("StatsAggregatorSummary")
     .Input("iterator: resource")
     .Output("summary: string")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Produces a summary of any statistics recorded by the given statistics manager.
+)doc");
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/functional_ops.cc b/tensorflow/core/ops/functional_ops.cc
index 515b31623b..5fd21ec88f 100644
--- a/tensorflow/core/ops/functional_ops.cc
+++ b/tensorflow/core/ops/functional_ops.cc
@@ -38,7 +38,33 @@ REGISTER_OP("SymbolicGradient")
         c->set_output(i, c->input(i));
       }
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Computes the gradient function for function f via backpropagation.
+
+input: a list of input tensors of size N + M;
+output: a list of output tensors of size N;
+Tin: the type list for the input list.
+Tout: the type list for the input list.
+f: The function we want to compute the gradient for.
+
+The function 'f' must be a numerical function which takes N inputs and
+produces M outputs. Its gradient function 'g', which is computed by
+this SymbolicGradient op is a function taking N + M inputs and
+produces N outputs.
+
+I.e. if we have
+   (y1, y2, ..., y_M) = f(x1, x2, ..., x_N),
+then, g is
+   (dL/dx1, dL/dx2, ..., dL/dx_N) = g(x1, x2, ..., x_N,
+                                     dL/dy1, dL/dy2, ..., dL/dy_M),
+
+where L is a scalar-value function of (x1, x2, ..., xN) (e.g., the
+loss function). dL/dx_i is the partial derivative of L with respect
+to x_i.
+
+(Needs some math expert to say the comment above better.)
+)doc");
 
 REGISTER_OP("RemoteCall")
     .Input("target: string")
@@ -47,5 +73,15 @@ REGISTER_OP("RemoteCall")
     .Attr("Tin: list(type)")
     .Attr("Tout: list(type)")
     .Attr("f: func")
-    .SetShapeFn(shape_inference::UnknownShape);
+    .SetShapeFn(shape_inference::UnknownShape)
+    .Doc(R"doc(
+Runs function `f` on a remote device indicated by `target`.
+
+target: A fully specified device name where we want to run the function.
+args: A list of arguments for the function.
+output: A list of return values.
+Tin: The type list for the arguments.
+Tout: The type list for the return values.
+f: The function to run remotely.
+)doc");
 }  // end namespace tensorflow
diff --git a/tensorflow/core/ops/image_ops.cc b/tensorflow/core/ops/image_ops.cc
index 31cc662d21..13762cc221 100644
--- a/tensorflow/core/ops/image_ops.cc
+++ b/tensorflow/core/ops/image_ops.cc
@@ -153,7 +153,26 @@ REGISTER_OP("ResizeArea")
     .Output("resized_images: float")
     .Attr("T: {int8, uint8, int16, uint16, int32, int64, half, float, double}")
     .Attr("align_corners: bool = false")
-    .SetShapeFn(ResizeShapeFn);
+    .SetShapeFn(ResizeShapeFn)
+    .Doc(R"doc(
+Resize `images` to `size` using area interpolation.
+
+Input images can be of different types but output images are always float.
+
+Each output pixel is computed by first transforming the pixel's footprint into
+the input tensor and then averaging the pixels that intersect the footprint. An
+input pixel's contribution to the average is weighted by the fraction of its
+area that intersects the footprint.  This is the same as OpenCV's INTER_AREA.
+
+images: 4-D with shape `[batch, height, width, channels]`.
+size:= A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
+  new size for the images.
+align_corners: If true, rescale input by (new_height - 1) / (height - 1), which
+  exactly aligns the 4 corners of images and resized images. If false, rescale
+  by new_height / height. Treat similarly the width dimension.
+resized_images: 4-D with shape
+  `[batch, new_height, new_width, channels]`.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("ResizeBicubic")
@@ -162,7 +181,21 @@ REGISTER_OP("ResizeBicubic")
     .Output("resized_images: float")
     .Attr("T: {int8, uint8, int16, uint16, int32, int64, half, float, double}")
     .Attr("align_corners: bool = false")
-    .SetShapeFn(ResizeShapeFn);
+    .SetShapeFn(ResizeShapeFn)
+    .Doc(R"doc(
+Resize `images` to `size` using bicubic interpolation.
+
+Input images can be of different types but output images are always float.
+
+images: 4-D with shape `[batch, height, width, channels]`.
+size:= A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
+  new size for the images.
+align_corners: If true, rescale input by (new_height - 1) / (height - 1), which
+  exactly aligns the 4 corners of images and resized images. If false, rescale
+  by new_height / height. Treat similarly the width dimension.
+resized_images: 4-D with shape
+  `[batch, new_height, new_width, channels]`.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("ResizeBicubicGrad")
@@ -174,7 +207,20 @@ REGISTER_OP("ResizeBicubicGrad")
     .SetShapeFn([](InferenceContext* c) {
       c->set_output(0, c->input(1));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Computes the gradient of bicubic interpolation.
+
+grads: 4-D with shape `[batch, height, width, channels]`.
+original_image: 4-D with shape `[batch, orig_height, orig_width, channels]`,
+  The image tensor that was resized.
+align_corners: If true, rescale grads by (orig_height - 1) / (height - 1), which
+  exactly aligns the 4 corners of grads and original_image. If false, rescale by
+  orig_height / height. Treat similarly the width dimension.
+output: 4-D with shape `[batch, orig_height, orig_width, channels]`.
+  Gradients with respect to the input image. Input image must have been
+  float or double.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("ResizeBilinear")
@@ -183,7 +229,21 @@ REGISTER_OP("ResizeBilinear")
     .Output("resized_images: float")
     .Attr("T: {int8, uint8, int16, uint16, int32, int64, half, float, double}")
     .Attr("align_corners: bool = false")
-    .SetShapeFn(ResizeShapeFn);
+    .SetShapeFn(ResizeShapeFn)
+    .Doc(R"doc(
+Resize `images` to `size` using bilinear interpolation.
+
+Input images can be of different types but output images are always float.
+
+images: 4-D with shape `[batch, height, width, channels]`.
+size:= A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
+  new size for the images.
+align_corners: If true, rescale input by (new_height - 1) / (height - 1), which
+  exactly aligns the 4 corners of images and resized images. If false, rescale
+  by new_height / height. Treat similarly the width dimension.
+resized_images: 4-D with shape
+  `[batch, new_height, new_width, channels]`.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("QuantizedResizeBilinear")
@@ -205,7 +265,21 @@ REGISTER_OP("QuantizedResizeBilinear")
       c->set_output(1, c->MakeShape({}));
       c->set_output(2, c->MakeShape({}));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Resize quantized `images` to `size` using quantized bilinear interpolation.
+
+Input images and output images must be quantized types.
+
+images: 4-D with shape `[batch, height, width, channels]`.
+size:= A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
+  new size for the images.
+align_corners: If true, rescale input by (new_height - 1) / (height - 1), which
+  exactly aligns the 4 corners of images and resized images. If false, rescale
+  by new_height / height. Treat similarly the width dimension.
+resized_images: 4-D with shape
+  `[batch, new_height, new_width, channels]`.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("ResizeBilinearGrad")
@@ -217,7 +291,20 @@ REGISTER_OP("ResizeBilinearGrad")
     .SetShapeFn([](InferenceContext* c) {
       c->set_output(0, c->input(1));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Computes the gradient of bilinear interpolation.
+
+grads: 4-D with shape `[batch, height, width, channels]`.
+original_image: 4-D with shape `[batch, orig_height, orig_width, channels]`,
+  The image tensor that was resized.
+align_corners: If true, rescale grads by (orig_height - 1) / (height - 1), which
+  exactly aligns the 4 corners of grads and original_image. If false, rescale by
+  orig_height / height. Treat similarly the width dimension.
+output: 4-D with shape `[batch, orig_height, orig_width, channels]`.
+  Gradients with respect to the input image. Input image must have been
+  float or double.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("ResizeNearestNeighbor")
@@ -226,7 +313,19 @@ REGISTER_OP("ResizeNearestNeighbor")
     .Output("resized_images: T")
     .Attr("T: {int8, uint8, int16, uint16, int32, int64, half, float, double}")
     .Attr("align_corners: bool = false")
-    .SetShapeFn(ResizeShapeFn);
+    .SetShapeFn(ResizeShapeFn)
+    .Doc(R"doc(
+Resize `images` to `size` using nearest neighbor interpolation.
+
+images: 4-D with shape `[batch, height, width, channels]`.
+size:= A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
+  new size for the images.
+align_corners: If true, rescale input by (new_height - 1) / (height - 1), which
+  exactly aligns the 4 corners of images and resized images. If false, rescale
+  by new_height / height. Treat similarly the width dimension.
+resized_images: 4-D with shape
+  `[batch, new_height, new_width, channels]`.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("ResizeNearestNeighborGrad")
@@ -255,7 +354,19 @@ REGISTER_OP("ResizeNearestNeighborGrad")
       }
       c->set_output(0, input);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Computes the gradient of nearest neighbor interpolation.
+
+grads: 4-D with shape `[batch, height, width, channels]`.
+size:= A 1-D int32 Tensor of 2 elements: `orig_height, orig_width`. The
+  original input size.
+align_corners: If true, rescale grads by (orig_height - 1) / (height - 1), which
+  exactly aligns the 4 corners of grads and original_image. If false, rescale by
+  orig_height / height. Treat similarly the width dimension.
+output: 4-D with shape `[batch, orig_height, orig_width, channels]`. Gradients
+  with respect to the input image.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("RandomCrop")
@@ -288,7 +399,25 @@ REGISTER_OP("RandomCrop")
       }
       c->set_output(0, c->MakeShape({h, w, channels}));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Randomly crop `image`.
+
+`size` is a 1-D int64 tensor with 2 elements representing the crop height and
+width.  The values must be non negative.
+
+This Op picks a random location in `image` and crops a `height` by `width`
+rectangle from that location.  The random location is picked so the cropped
+area will fit inside the original image.
+
+image: 3-D of shape `[height, width, channels]`.
+size: 1-D of length 2 containing: `crop_height`, `crop_width`..
+seed: If either seed or seed2 are set to be non-zero, the random number
+  generator is seeded by the given seed.  Otherwise, it is seeded by a
+  random seed.
+seed2: An second seed to avoid seed collision.
+output: 3-D of shape `[crop_height, crop_width, channels].`
+)doc");
 // TODO(shlens): Support variable rank in RandomCrop.
 
 // --------------------------------------------------------------------------
@@ -301,7 +430,17 @@ REGISTER_OP("DecodeJpeg")
     .Attr("acceptable_fraction: float = 1.0")
     .Attr("dct_method: string = ''")
     .Output("image: uint8")
-    .SetShapeFn(DecodeImageShapeFn);
+    .SetShapeFn(DecodeImageShapeFn)
+    .Doc(strings::StrCat(R"doc(
+Decode a JPEG-encoded image to a uint8 tensor.
+)doc",
+                         kDecodeJpegCommonDocStr, R"doc(
+This op also supports decoding PNGs and non-animated GIFs since the interface is
+the same, though it is cleaner to use `tf.image.decode_image`.
+
+contents: 0-D.  The JPEG-encoded image.
+)doc",
+                         kDecodeJpegCommonParamsDocStr));
 
 // --------------------------------------------------------------------------
 REGISTER_OP("DecodeAndCropJpeg")
@@ -343,7 +482,18 @@ REGISTER_OP("DecodeAndCropJpeg")
       }
       c->set_output(0, c->MakeShape({h, w, channels_dim}));
       return Status::OK();
-    });
+    })
+    .Doc(strings::StrCat(R"doc(
+Decode and Crop a JPEG-encoded image to a uint8 tensor.
+)doc",
+                         kDecodeJpegCommonDocStr, R"doc(
+It is equivalent to a combination of decode and crop, but much faster by only
+decoding partial jpeg image.
+
+contents: 0-D.  The JPEG-encoded image.
+crop_window: 1-D.  The crop window: [crop_y, crop_x, crop_height, crop_width].
+)doc",
+                         kDecodeJpegCommonParamsDocStr));
 
 // --------------------------------------------------------------------------
 REGISTER_OP("EncodeJpeg")
@@ -358,7 +508,40 @@ REGISTER_OP("EncodeJpeg")
     .Attr("y_density: int = 300")
     .Attr("xmp_metadata: string = ''")
     .Output("contents: string")
-    .SetShapeFn(EncodeImageShapeFn);
+    .SetShapeFn(EncodeImageShapeFn)
+    .Doc(R"doc(
+JPEG-encode an image.
+
+`image` is a 3-D uint8 Tensor of shape `[height, width, channels]`.
+
+The attr `format` can be used to override the color format of the encoded
+output.  Values can be:
+
+*   `''`: Use a default format based on the number of channels in the image.
+*   `grayscale`: Output a grayscale JPEG image.  The `channels` dimension
+    of `image` must be 1.
+*   `rgb`: Output an RGB JPEG image. The `channels` dimension
+    of `image` must be 3.
+
+If `format` is not specified or is the empty string, a default format is picked
+in function of the number of channels in `image`:
+
+*   1: Output a grayscale image.
+*   3: Output an RGB image.
+
+image: 3-D with shape `[height, width, channels]`.
+format: Per pixel image format.
+quality: Quality of the compression from 0 to 100 (higher is better and slower).
+progressive: If True, create a JPEG that loads progressively (coarse to fine).
+optimize_size: If True, spend CPU/RAM to reduce size with no quality change.
+chroma_downsampling: See http://en.wikipedia.org/wiki/Chroma_subsampling.
+density_unit: Unit used to specify `x_density` and `y_density`:
+   pixels per inch (`'in'`) or centimeter (`'cm'`).
+x_density: Horizontal pixels per density unit.
+y_density: Vertical pixels per density unit.
+xmp_metadata: If not empty, embed this XMP metadata in the image header.
+contents: 0-D. JPEG-encoded image.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("ExtractJpegShape")
@@ -370,7 +553,17 @@ REGISTER_OP("ExtractJpegShape")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused));
       c->set_output(0, c->Vector(3));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Extract the shape information of a JPEG-encoded image.
+
+This op only parses the image header, so it is much faster than DecodeJpeg.
+
+contents: 0-D. The JPEG-encoded image.
+image_shape: 1-D. The image shape with format [height, width, channels].
+output_type: (Optional) The output type of the operation (int32 or int64).
+    Defaults to int32.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("AdjustContrast")
@@ -383,7 +576,10 @@ REGISTER_OP("AdjustContrast")
     .Deprecated(2, "Use AdjustContrastv2 instead")
     .SetShapeFn([](InferenceContext* c) {
       return shape_inference::UnchangedShapeWithRankAtLeast(c, 3);
-    });
+    })
+    .Doc(R"Doc(
+Deprecated. Disallowed in GraphDef version >= 2.
+)Doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("AdjustContrastv2")
@@ -392,7 +588,24 @@ REGISTER_OP("AdjustContrastv2")
     .Output("output: float")
     .SetShapeFn([](InferenceContext* c) {
       return shape_inference::UnchangedShapeWithRankAtLeast(c, 3);
-    });
+    })
+    .Doc(R"Doc(
+Adjust the contrast of one or more images.
+
+`images` is a tensor of at least 3 dimensions.  The last 3 dimensions are
+interpreted as `[height, width, channels]`.  The other dimensions only
+represent a collection of images, such as `[batch, height, width, channels].`
+
+Contrast is adjusted independently for each channel of each image.
+
+For each channel, the Op first computes the mean of the image pixels in the
+channel and then adjusts each component of each pixel to
+`(x - mean) * contrast_factor + mean`.
+
+images: Images to adjust.  At least 3-D.
+contrast_factor: A float multiplier for adjusting contrast.
+output: The contrast-adjusted image or images.
+)Doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("AdjustHue")
@@ -401,7 +614,21 @@ REGISTER_OP("AdjustHue")
     .Output("output: float")
     .SetShapeFn([](InferenceContext* c) {
       return shape_inference::UnchangedShapeWithRankAtLeast(c, 3);
-    });
+    })
+    .Doc(R"Doc(
+Adjust the hue of one or more images.
+
+`images` is a tensor of at least 3 dimensions.  The last dimension is
+interpretted as channels, and must be three.
+
+The input image is considered in the RGB colorspace. Conceptually, the RGB
+colors are first mapped into HSV. A delta is then applied all the hue values,
+and then remapped back to RGB colorspace.
+
+images: Images to adjust.  At least 3-D.
+delta: A float delta to add to the hue.
+output: The hue-adjusted image or images.
+)Doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("AdjustSaturation")
@@ -410,7 +637,21 @@ REGISTER_OP("AdjustSaturation")
     .Output("output: float")
     .SetShapeFn([](InferenceContext* c) {
       return shape_inference::UnchangedShapeWithRankAtLeast(c, 3);
-    });
+    })
+    .Doc(R"Doc(
+Adjust the saturation of one or more images.
+
+`images` is a tensor of at least 3 dimensions.  The last dimension is
+interpretted as channels, and must be three.
+
+The input image is considered in the RGB colorspace. Conceptually, the RGB
+colors are first mapped into HSV. A scale is then applied all the saturation
+values, and then remapped back to RGB colorspace.
+
+images: Images to adjust.  At least 3-D.
+scale: A float scale to add to the saturation.
+output: The hue-adjusted image or images.
+)Doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("DecodePng")
@@ -418,7 +659,30 @@ REGISTER_OP("DecodePng")
     .Attr("channels: int = 0")
     .Attr("dtype: {uint8, uint16} = DT_UINT8")
     .Output("image: dtype")
-    .SetShapeFn(DecodeImageShapeFn);
+    .SetShapeFn(DecodeImageShapeFn)
+    .Doc(R"doc(
+Decode a PNG-encoded image to a uint8 or uint16 tensor.
+
+The attr `channels` indicates the desired number of color channels for the
+decoded image.
+
+Accepted values are:
+
+*   0: Use the number of channels in the PNG-encoded image.
+*   1: output a grayscale image.
+*   3: output an RGB image.
+*   4: output an RGBA image.
+
+If needed, the PNG-encoded image is transformed to match the requested number
+of color channels.
+
+This op also supports decoding JPEGs and non-animated GIFs since the interface
+is the same, though it is cleaner to use `tf.image.decode_image`.
+
+contents: 0-D.  The PNG-encoded image.
+channels: Number of color channels for the decoded image.
+image: 3-D with shape `[height, width, channels]`.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("EncodePng")
@@ -426,14 +690,48 @@ REGISTER_OP("EncodePng")
     .Attr("T: {uint8, uint16} = DT_UINT8")
     .Input("image: T")
     .Output("contents: string")
-    .SetShapeFn(EncodeImageShapeFn);
+    .SetShapeFn(EncodeImageShapeFn)
+    .Doc(R"doc(
+PNG-encode an image.
+
+`image` is a 3-D uint8 or uint16 Tensor of shape `[height, width, channels]`
+where `channels` is:
+
+*   1: for grayscale.
+*   2: for grayscale + alpha.
+*   3: for RGB.
+*   4: for RGBA.
+
+The ZLIB compression level, `compression`, can be -1 for the PNG-encoder
+default or a value from 0 to 9.  9 is the highest compression level, generating
+the smallest output, but is slower.
+
+image: 3-D with shape `[height, width, channels]`.
+compression: Compression level.
+contents: 0-D. PNG-encoded image.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("DecodeBmp")
     .Input("contents: string")
     .Output("image: uint8")
     .Attr("channels: int = 0")
-    .SetShapeFn(DecodeImageShapeFn);
+    .SetShapeFn(DecodeImageShapeFn)
+    .Doc(R"doc(
+Decode the first frame of a BMP-encoded image to a uint8 tensor.
+
+The attr `channels` indicates the desired number of color channels for the
+decoded image.
+
+Accepted values are:
+
+*   0: Use the number of channels in the BMP-encoded image.
+*   3: output an RGB image.
+*   4: output an RGBA image.
+
+contents: 0-D.  The BMP-encoded image.
+image: 3-D with shape `[height, width, channels]`. RGB order
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("DecodeGif")
@@ -446,21 +744,61 @@ REGISTER_OP("DecodeGif")
                                      InferenceContext::kUnknownDim,
                                      InferenceContext::kUnknownDim, 3}));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Decode the first frame of a GIF-encoded image to a uint8 tensor.
+
+GIF with frame or transparency compression are not supported
+convert animated GIF from compressed to uncompressed by:
+
+    convert $src.gif -coalesce $dst.gif
+
+This op also supports decoding JPEGs and PNGs, though it is cleaner to use
+`tf.image.decode_image`.
+
+contents: 0-D.  The GIF-encoded image.
+image: 4-D with shape `[num_frames, height, width, 3]`. RGB order
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("RGBToHSV")
     .Input("images: T")
     .Output("output: T")
     .Attr("T: {half, bfloat16, float, double} = DT_FLOAT")
-    .SetShapeFn(ColorspaceShapeFn);
+    .SetShapeFn(ColorspaceShapeFn)
+    .Doc(R"doc(
+Converts one or more images from RGB to HSV.
+
+Outputs a tensor of the same shape as the `images` tensor, containing the HSV
+value of the pixels. The output is only well defined if the value in `images`
+are in `[0,1]`.
+
+`output[..., 0]` contains hue, `output[..., 1]` contains saturation, and
+`output[..., 2]` contains value. All HSV values are in `[0,1]`. A hue of 0
+corresponds to pure red, hue 1/3 is pure green, and 2/3 is pure blue.
+
+images: 1-D or higher rank. RGB data to convert. Last dimension must be size 3.
+output: `images` converted to HSV.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("HSVToRGB")
     .Input("images: T")
     .Output("output: T")
     .Attr("T: {half, bfloat16, float, double} = DT_FLOAT")
-    .SetShapeFn(ColorspaceShapeFn);
+    .SetShapeFn(ColorspaceShapeFn)
+    .Doc(R"doc(
+Convert one or more images from HSV to RGB.
+
+Outputs a tensor of the same shape as the `images` tensor, containing the RGB
+value of the pixels. The output is only well defined if the value in `images`
+are in `[0,1]`.
+
+See `rgb_to_hsv` for a description of the HSV encoding.
+
+images: 1-D or higher rank. HSV data to convert. Last dimension must be size 3.
+output: `images` converted to RGB.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("DrawBoundingBoxes")
@@ -470,7 +808,28 @@ REGISTER_OP("DrawBoundingBoxes")
     .Attr("T: {float, half} = DT_FLOAT")
     .SetShapeFn([](InferenceContext* c) {
       return shape_inference::UnchangedShapeWithRankAtLeast(c, 3);
-    });
+    })
+    .Doc(R"doc(
+Draw bounding boxes on a batch of images.
+
+Outputs a copy of `images` but draws on top of the pixels zero or more bounding
+boxes specified by the locations in `boxes`. The coordinates of the each
+bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`. The
+bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and
+height of the underlying image.
+
+For example, if an image is 100 x 200 pixels (height x width) and the bounding
+box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of
+the bounding box will be `(40, 10)` to `(100, 50)` (in (x,y) coordinates).
+
+Parts of the bounding box may fall outside the image.
+
+images: 4-D with shape `[batch, height, width, depth]`. A batch of images.
+boxes: 3-D with shape `[batch, num_bounding_boxes, 4]` containing bounding
+  boxes.
+output: 4-D with the same shape as `images`. The batch of input images with
+  bounding boxes drawn on the images.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("SampleDistortedBoundingBox")
@@ -493,7 +852,77 @@ REGISTER_OP("SampleDistortedBoundingBox")
       c->set_output(1, c->Vector(3));
       c->set_output(2, c->MakeShape({1, 1, 4}));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Generate a single randomly distorted bounding box for an image.
+
+Bounding box annotations are often supplied in addition to ground-truth labels
+in image recognition or object localization tasks. A common technique for
+training such a system is to randomly distort an image while preserving
+its content, i.e. *data augmentation*. This Op outputs a randomly distorted
+localization of an object, i.e. bounding box, given an `image_size`,
+`bounding_boxes` and a series of constraints.
+
+The output of this Op is a single bounding box that may be used to crop the
+original image. The output is returned as 3 tensors: `begin`, `size` and
+`bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the
+image. The latter may be supplied to `tf.image.draw_bounding_boxes` to visualize
+what the bounding box looks like.
+
+Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. The
+bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and
+height of the underlying image.
+
+For example,
+
+```python
+    # Generate a single distorted bounding box.
+    begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box(
+        tf.shape(image),
+        bounding_boxes=bounding_boxes)
+
+    # Draw the bounding box in an image summary.
+    image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
+                                                  bbox_for_draw)
+    tf.summary.image('images_with_box', image_with_box)
+
+    # Employ the bounding box to distort the image.
+    distorted_image = tf.slice(image, begin, size)
+```
+
+Note that if no bounding box information is available, setting
+`use_image_if_no_bounding_boxes = true` will assume there is a single implicit
+bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is
+false and no bounding boxes are supplied, an error is raised.
+
+image_size: 1-D, containing `[height, width, channels]`.
+bounding_boxes: 3-D with shape `[batch, N, 4]` describing the N bounding boxes
+  associated with the image.
+begin: 1-D, containing `[offset_height, offset_width, 0]`. Provide as input to
+  `tf.slice`.
+size: 1-D, containing `[target_height, target_width, -1]`. Provide as input to
+  `tf.slice`.
+bboxes: 3-D with shape `[1, 1, 4]` containing the distorted bounding box.
+  Provide as input to `tf.image.draw_bounding_boxes`.
+seed: If either `seed` or `seed2` are set to non-zero, the random number
+  generator is seeded by the given `seed`.  Otherwise, it is seeded by a random
+  seed.
+seed2: A second seed to avoid seed collision.
+min_object_covered: The cropped area of the image must contain at least this
+  fraction of any bounding box supplied. The value of this parameter should be
+  non-negative. In the case of 0, the cropped area does not need to overlap
+  any of the bounding boxes supplied.
+aspect_ratio_range: The cropped area of the image must have an aspect ratio =
+  width / height within this range.
+area_range: The cropped area of the image must contain a fraction of the
+  supplied image within in this range.
+max_attempts: Number of attempts at generating a cropped region of the image
+  of the specified constraints. After `max_attempts` failures, return the entire
+  image.
+use_image_if_no_bounding_boxes: Controls behavior if no bounding boxes supplied.
+  If true, assume an implicit bounding box covering the whole input. If false,
+  raise an error.
+)doc");
 
 REGISTER_OP("SampleDistortedBoundingBoxV2")
     .Input("image_size: T")
@@ -515,7 +944,77 @@ REGISTER_OP("SampleDistortedBoundingBoxV2")
       c->set_output(1, c->Vector(3));
       c->set_output(2, c->MakeShape({1, 1, 4}));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Generate a single randomly distorted bounding box for an image.
+
+Bounding box annotations are often supplied in addition to ground-truth labels
+in image recognition or object localization tasks. A common technique for
+training such a system is to randomly distort an image while preserving
+its content, i.e. *data augmentation*. This Op outputs a randomly distorted
+localization of an object, i.e. bounding box, given an `image_size`,
+`bounding_boxes` and a series of constraints.
+
+The output of this Op is a single bounding box that may be used to crop the
+original image. The output is returned as 3 tensors: `begin`, `size` and
+`bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the
+image. The latter may be supplied to `tf.image.draw_bounding_boxes` to visualize
+what the bounding box looks like.
+
+Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. The
+bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and
+height of the underlying image.
+
+For example,
+
+```python
+    # Generate a single distorted bounding box.
+    begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box(
+        tf.shape(image),
+        bounding_boxes=bounding_boxes)
+
+    # Draw the bounding box in an image summary.
+    image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
+                                                  bbox_for_draw)
+    tf.summary.image('images_with_box', image_with_box)
+
+    # Employ the bounding box to distort the image.
+    distorted_image = tf.slice(image, begin, size)
+```
+
+Note that if no bounding box information is available, setting
+`use_image_if_no_bounding_boxes = true` will assume there is a single implicit
+bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is
+false and no bounding boxes are supplied, an error is raised.
+
+image_size: 1-D, containing `[height, width, channels]`.
+bounding_boxes: 3-D with shape `[batch, N, 4]` describing the N bounding boxes
+  associated with the image.
+min_object_covered: The cropped area of the image must contain at least this
+  fraction of any bounding box supplied. The value of this parameter should be
+  non-negative. In the case of 0, the cropped area does not need to overlap
+  any of the bounding boxes supplied.
+begin: 1-D, containing `[offset_height, offset_width, 0]`. Provide as input to
+  `tf.slice`.
+size: 1-D, containing `[target_height, target_width, -1]`. Provide as input to
+  `tf.slice`.
+bboxes: 3-D with shape `[1, 1, 4]` containing the distorted bounding box.
+  Provide as input to `tf.image.draw_bounding_boxes`.
+seed: If either `seed` or `seed2` are set to non-zero, the random number
+  generator is seeded by the given `seed`.  Otherwise, it is seeded by a random
+  seed.
+seed2: A second seed to avoid seed collision.
+aspect_ratio_range: The cropped area of the image must have an aspect ratio =
+  width / height within this range.
+area_range: The cropped area of the image must contain a fraction of the
+  supplied image within in this range.
+max_attempts: Number of attempts at generating a cropped region of the image
+  of the specified constraints. After `max_attempts` failures, return the entire
+  image.
+use_image_if_no_bounding_boxes: Controls behavior if no bounding boxes supplied.
+  If true, assume an implicit bounding box covering the whole input. If false,
+  raise an error.
+)doc");
 
 // --------------------------------------------------------------------------
 
@@ -547,7 +1046,48 @@ REGISTER_OP("ExtractGlimpse")
 
       return SetOutputToSizedImage(c, batch_dim, 1 /* size_input_idx */,
                                    c->Dim(input, 3));
-    });
+    })
+    .Doc(R"doc(
+Extracts a glimpse from the input tensor.
+
+Returns a set of windows called glimpses extracted at location
+`offsets` from the input tensor. If the windows only partially
+overlaps the inputs, the non overlapping areas will be filled with
+random noise.
+
+The result is a 4-D tensor of shape `[batch_size, glimpse_height,
+glimpse_width, channels]`. The channels and batch dimensions are the
+same as that of the input tensor. The height and width of the output
+windows are specified in the `size` parameter.
+
+The argument `normalized` and `centered` controls how the windows are built:
+
+* If the coordinates are normalized but not centered, 0.0 and 1.0
+  correspond to the minimum and maximum of each height and width
+  dimension.
+* If the coordinates are both normalized and centered, they range from
+  -1.0 to 1.0. The coordinates (-1.0, -1.0) correspond to the upper
+  left corner, the lower right corner is located at (1.0, 1.0) and the
+  center is at (0, 0).
+* If the coordinates are not normalized they are interpreted as
+  numbers of pixels.
+
+input: A 4-D float tensor of shape `[batch_size, height, width, channels]`.
+size: A 1-D tensor of 2 elements containing the size of the glimpses
+  to extract.  The glimpse height must be specified first, following
+  by the glimpse width.
+offsets: A 2-D integer tensor of shape `[batch_size, 2]` containing
+  the y, x locations of the center of each window.
+glimpse: A tensor representing the glimpses `[batch_size,
+  glimpse_height, glimpse_width, channels]`.
+centered: indicates if the offset coordinates are centered relative to
+  the image, in which case the (0, 0) offset is relative to the center
+  of the input images. If false, the (0,0) offset corresponds to the
+  upper left corner of the input images.
+normalized: indicates if the offset coordinates are normalized.
+uniform_noise: indicates if the noise should be generated using a
+  uniform distribution or a Gaussian distribution.
+)doc");
 
 // --------------------------------------------------------------------------
 
@@ -580,7 +1120,44 @@ REGISTER_OP("CropAndResize")
 
       return SetOutputToSizedImage(c, num_boxes_dim, 3 /* size_input_idx */,
                                    c->Dim(input, 3));
-    });
+    })
+    .Doc(R"doc(
+Extracts crops from the input image tensor and bilinearly resizes them (possibly
+with aspect ratio change) to a common output size specified by `crop_size`. This
+is more general than the `crop_to_bounding_box` op which extracts a fixed size
+slice from the input image and does not allow resizing or aspect ratio change.
+
+Returns a tensor with `crops` from the input `image` at positions defined at the
+bounding box locations in `boxes`. The cropped boxes are all resized (with
+bilinear interpolation) to a fixed `size = [crop_height, crop_width]`. The
+result is a 4-D tensor `[num_boxes, crop_height, crop_width, depth]`. The
+resizing is corner aligned. In particular, if `boxes = [[0, 0, 1, 1]]`, the
+method will give identical results to using `tf.image.resize_bilinear()`
+with `align_corners=True`.
+
+image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
+  Both `image_height` and `image_width` need to be positive.
+boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
+  specifies the coordinates of a box in the `box_ind[i]` image and is specified
+  in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of
+  `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the
+  `[0, 1]` interval of normalized image height is mapped to
+  `[0, image_height - 1]` in image height coordinates. We do allow `y1` > `y2`, in
+  which case the sampled crop is an up-down flipped version of the original
+  image. The width dimension is treated similarly. Normalized coordinates
+  outside the `[0, 1]` range are allowed, in which case we use
+  `extrapolation_value` to extrapolate the input image values.
+box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`.
+  The value of `box_ind[i]` specifies the image that the `i`-th box refers to.
+crop_size: A 1-D tensor of 2 elements, `size = [crop_height, crop_width]`. All
+  cropped image patches are resized to this size. The aspect ratio of the image
+  content is not preserved. Both `crop_height` and `crop_width` need to be
+  positive.
+crops: A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`.
+method: A string specifying the interpolation method. Only 'bilinear' is
+  supported for now.
+extrapolation_value: Value used for extrapolation, when applicable.
+)doc");
 
 REGISTER_OP("CropAndResizeGradImage")
     .Input("grads: float")
@@ -596,7 +1173,30 @@ REGISTER_OP("CropAndResizeGradImage")
       TF_RETURN_IF_ERROR(c->WithRank(out, 4, &out));
       c->set_output(0, out);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Computes the gradient of the crop_and_resize op wrt the input image tensor.
+
+grads: A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`.
+boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
+  specifies the coordinates of a box in the `box_ind[i]` image and is specified
+  in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of
+  `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the
+  `[0, 1]` interval of normalized image height is mapped to
+  `[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in
+  which case the sampled crop is an up-down flipped version of the original
+  image. The width dimension is treated similarly. Normalized coordinates
+  outside the `[0, 1]` range are allowed, in which case we use
+  `extrapolation_value` to extrapolate the input image values.
+box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`.
+  The value of `box_ind[i]` specifies the image that the `i`-th box refers to.
+image_size: A 1-D tensor with value `[batch, image_height, image_width, depth]`
+  containing the original image size. Both `image_height` and `image_width` need
+  to be positive.
+output: A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
+method: A string specifying the interpolation method. Only 'bilinear' is
+  supported for now.
+)doc");
 
 REGISTER_OP("CropAndResizeGradBoxes")
     .Input("grads: float")
@@ -609,7 +1209,29 @@ REGISTER_OP("CropAndResizeGradBoxes")
     .SetShapeFn([](InferenceContext* c) {
       c->set_output(0, c->input(2));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Computes the gradient of the crop_and_resize op wrt the input boxes tensor.
+
+grads: A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`.
+image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
+  Both `image_height` and `image_width` need to be positive.
+boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
+  specifies the coordinates of a box in the `box_ind[i]` image and is specified
+  in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of
+  `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the
+  `[0, 1]` interval of normalized image height is mapped to
+  `[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in
+  which case the sampled crop is an up-down flipped version of the original
+  image. The width dimension is treated similarly. Normalized coordinates
+  outside the `[0, 1]` range are allowed, in which case we use
+  `extrapolation_value` to extrapolate the input image values.
+box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`.
+  The value of `box_ind[i]` specifies the image that the `i`-th box refers to.
+output: A 2-D tensor of shape `[num_boxes, 4]`.
+method: A string specifying the interpolation method. Only 'bilinear' is
+  supported for now.
+)doc");
 
 // --------------------------------------------------------------------------
 
@@ -622,7 +1244,35 @@ REGISTER_OP("NonMaxSuppression")
     .SetShapeFn([](InferenceContext* c) {
       c->set_output(0, c->Vector(c->UnknownDim()));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Greedily selects a subset of bounding boxes in descending order of score,
+pruning away boxes that have high intersection-over-union (IOU) overlap
+with previously selected boxes.  Bounding boxes are supplied as
+[y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any
+diagonal pair of box corners and the coordinates can be provided as normalized
+(i.e., lying in the interval [0, 1]) or absolute.  Note that this algorithm
+is agnostic to where the origin is in the coordinate system.  Note that this
+algorithm is invariant to orthogonal transformations and translations
+of the coordinate system; thus translating or reflections of the coordinate
+system result in the same boxes being selected by the algorithm.
+The output of this operation is a set of integers indexing into the input
+collection of bounding boxes representing the selected boxes.  The bounding
+box coordinates corresponding to the selected indices can then be obtained
+using the `tf.gather operation`.  For example:
+  selected_indices = tf.image.non_max_suppression(
+      boxes, scores, max_output_size, iou_threshold)
+  selected_boxes = tf.gather(boxes, selected_indices)
+boxes: A 2-D float tensor of shape `[num_boxes, 4]`.
+scores: A 1-D float tensor of shape `[num_boxes]` representing a single
+  score corresponding to each box (each row of boxes).
+max_output_size: A scalar integer tensor representing the maximum number of
+  boxes to be selected by non max suppression.
+iou_threshold: A float representing the threshold for deciding whether boxes
+  overlap too much with respect to IOU.
+selected_indices: A 1-D integer tensor of shape `[M]` representing the selected
+  indices from the boxes tensor, where `M <= max_output_size`.
+)doc");
 
 REGISTER_OP("NonMaxSuppressionV2")
     .Input("boxes: float")
@@ -633,6 +1283,37 @@ REGISTER_OP("NonMaxSuppressionV2")
     .SetShapeFn([](InferenceContext* c) {
       c->set_output(0, c->Vector(c->UnknownDim()));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Greedily selects a subset of bounding boxes in descending order of score,
+pruning away boxes that have high intersection-over-union (IOU) overlap
+with previously selected boxes.  Bounding boxes are supplied as
+[y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any
+diagonal pair of box corners and the coordinates can be provided as normalized
+(i.e., lying in the interval [0, 1]) or absolute.  Note that this algorithm
+is agnostic to where the origin is in the coordinate system.  Note that this
+algorithm is invariant to orthogonal transformations and translations
+of the coordinate system; thus translating or reflections of the coordinate
+system result in the same boxes being selected by the algorithm.
+
+The output of this operation is a set of integers indexing into the input
+collection of bounding boxes representing the selected boxes.  The bounding
+box coordinates corresponding to the selected indices can then be obtained
+using the `tf.gather operation`.  For example:
+
+  selected_indices = tf.image.non_max_suppression_v2(
+      boxes, scores, max_output_size, iou_threshold)
+  selected_boxes = tf.gather(boxes, selected_indices)
+
+boxes: A 2-D float tensor of shape `[num_boxes, 4]`.
+scores: A 1-D float tensor of shape `[num_boxes]` representing a single
+  score corresponding to each box (each row of boxes).
+max_output_size: A scalar integer tensor representing the maximum number of
+  boxes to be selected by non max suppression.
+iou_threshold: A 0-D float tensor representing the threshold for deciding whether
+  boxes overlap too much with respect to IOU.
+selected_indices: A 1-D integer tensor of shape `[M]` representing the selected
+  indices from the boxes tensor, where `M <= max_output_size`.
+)doc");
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/io_ops.cc b/tensorflow/core/ops/io_ops.cc
index 21f0d02ff2..082d18c1d5 100644
--- a/tensorflow/core/ops/io_ops.cc
+++ b/tensorflow/core/ops/io_ops.cc
@@ -81,7 +81,21 @@ REGISTER_OP("SaveV2")
       // TODO(mrry): Attempt to parse the shapes_and_slices values and use
       // them to constrain the shape of the remaining inputs.
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Saves tensors in V2 checkpoint format.
+
+By default, saves the named tensors in full.  If the caller wishes to save
+specific slices of full tensors, "shape_and_slices" should be non-empty strings
+and correspondingly well-formed.
+
+prefix: Must have a single element. The prefix of the V2 checkpoint to which we
+  write the tensors.
+tensor_names: shape {N}. The names of the tensors to be saved.
+shape_and_slices: shape {N}.  The slice specs of the tensors to be saved.
+  Empty strings indicate that they are non-partitioned tensors.
+tensors: `N` tensors to save.
+)doc");
 
 REGISTER_OP("RestoreV2")
     .Input("prefix: string")
@@ -127,7 +141,33 @@ REGISTER_OP("RestoreV2")
       } else {
         return UnknownShape(c);
       }
-    });
+    })
+    .Doc(R"doc(
+Restores tensors from a V2 checkpoint.
+
+For backward compatibility with the V1 format, this Op currently allows
+restoring from a V1 checkpoint as well:
+  - This Op first attempts to find the V2 index file pointed to by "prefix", and
+    if found proceed to read it as a V2 checkpoint;
+  - Otherwise the V1 read path is invoked.
+Relying on this behavior is not recommended, as the ability to fall back to read
+V1 might be deprecated and eventually removed.
+
+By default, restores the named tensors in full.  If the caller wishes to restore
+specific slices of stored tensors, "shape_and_slices" should be non-empty
+strings and correspondingly well-formed.
+
+Callers must ensure all the named tensors are indeed stored in the checkpoint.
+
+prefix: Must have a single element.  The prefix of a V2 checkpoint.
+tensor_names: shape {N}.  The names of the tensors to be restored.
+shape_and_slices: shape {N}.  The slice specs of the tensors to be restored.
+  Empty strings indicate that they are non-partitioned tensors.
+dtypes: shape {N}.  The list of expected dtype for the tensors.  Must match
+  those stored in the checkpoint.
+tensors: shape {N}.  The restored tensors, whose shapes are read from the
+  checkpoint directly.
+)doc");
 
 REGISTER_OP("MergeV2Checkpoints")
     .Input("checkpoint_prefixes: string")
@@ -139,7 +179,23 @@ REGISTER_OP("MergeV2Checkpoints")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &unused));
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+V2 format specific: merges the metadata files of sharded checkpoints.  The
+result is one logical checkpoint, with one physical metadata file and renamed
+data files.
+
+Intended for "grouping" multiple checkpoints in a sharded checkpoint setup.
+
+If delete_old_dirs is true, attempts to delete recursively the dirname of each
+path in the input checkpoint_prefixes.  This is useful when those paths are non
+user-facing temporary locations.
+
+checkpoint_prefixes: prefixes of V2 checkpoints to merge.
+destination_prefix: scalar.  The desired final prefix.  Allowed to be the same
+  as one of the checkpoint_prefixes.
+delete_old_dirs: see above.
+)doc");
 
 REGISTER_OP("Save")
     .Input("filename: string")
@@ -161,7 +217,20 @@ REGISTER_OP("Save")
           c->WithValue(c->Dim(s, 0), c->num_inputs() - 2, &unused_dim));
 
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Saves the input tensors to disk.
+
+The size of `tensor_names` must match the number of tensors in `data`. `data[i]`
+is written to `filename` with name `tensor_names[i]`.
+
+See also `SaveSlices`.
+
+filename: Must have a single element. The name of the file to which we write
+  the tensor.
+tensor_names: Shape `[N]`. The names of the tensors to be saved.
+data: `N` tensors to save.
+)doc");
 
 REGISTER_OP("SaveSlices")
     .Input("filename: string")
@@ -187,7 +256,39 @@ REGISTER_OP("SaveSlices")
       // TODO(mrry): Attempt to parse the shapes_and_slices values and use
       // them to constrain the shape of the remaining inputs.
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Saves input tensors slices to disk.
+
+This is like `Save` except that tensors can be listed in the saved file as being
+a slice of a larger tensor.  `shapes_and_slices` specifies the shape of the
+larger tensor and the slice that this tensor covers. `shapes_and_slices` must
+have as many elements as `tensor_names`.
+
+Elements of the `shapes_and_slices` input must either be:
+
+*  The empty string, in which case the corresponding tensor is
+   saved normally.
+*  A string of the form `dim0 dim1 ... dimN-1 slice-spec` where the
+   `dimI` are the dimensions of the larger tensor and `slice-spec`
+   specifies what part is covered by the tensor to save.
+
+`slice-spec` itself is a `:`-separated list: `slice0:slice1:...:sliceN-1`
+where each `sliceI` is either:
+
+*  The string `-` meaning that the slice covers all indices of this dimension
+*  `start,length` where `start` and `length` are integers.  In that
+   case the slice covers `length` indices starting at `start`.
+
+See also `Save`.
+
+filename: Must have a single element. The name of the file to which we write the
+  tensor.
+tensor_names: Shape `[N]`. The names of the tensors to be saved.
+shapes_and_slices: Shape `[N]`.  The shapes and slice specifications to use when
+  saving the tensors.
+data: `N` tensors to save.
+)doc");
 
 REGISTER_OP("Restore")
     .Input("file_pattern: string")
@@ -202,7 +303,36 @@ REGISTER_OP("Restore")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
       c->set_output(0, c->UnknownShape());
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Restores a tensor from checkpoint files.
+
+Reads a tensor stored in one or several files. If there are several files (for
+instance because a tensor was saved as slices), `file_pattern` may contain
+wildcard symbols (`*` and `?`) in the filename portion only, not in the
+directory portion.
+
+If a `file_pattern` matches several files, `preferred_shard` can be used to hint
+in which file the requested tensor is likely to be found. This op will first
+open the file at index `preferred_shard` in the list of matching files and try
+to restore tensors from that file.  Only if some tensors or tensor slices are
+not found in that first file, then the Op opens all the files. Setting
+`preferred_shard` to match the value passed as the `shard` input
+of a matching `Save` Op may speed up Restore.  This attribute only affects
+performance, not correctness.  The default value -1 means files are processed in
+order.
+
+See also `RestoreSlice`.
+
+file_pattern: Must have a single element. The pattern of the files from
+  which we read the tensor.
+tensor_name: Must have a single element. The name of the tensor to be
+  restored.
+tensor: The restored tensor.
+dt: The type of the tensor to be restored.
+preferred_shard: Index of file to open first if multiple files match
+  `file_pattern`.
+)doc");
 
 REGISTER_OP("RestoreSlice")
     .Input("file_pattern: string")
@@ -241,20 +371,48 @@ REGISTER_OP("RestoreSlice")
         c->set_output(0, c->UnknownShape());
       }
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Restores a tensor from checkpoint files.
+
+This is like `Restore` except that restored tensor can be listed as filling
+only a slice of a larger tensor.  `shape_and_slice` specifies the shape of the
+larger tensor and the slice that the restored tensor covers.
+
+The `shape_and_slice` input has the same format as the
+elements of the `shapes_and_slices` input of the `SaveSlices` op.
+
+file_pattern: Must have a single element. The pattern of the files from
+  which we read the tensor.
+tensor_name: Must have a single element. The name of the tensor to be
+  restored.
+shape_and_slice: Scalar. The shapes and slice specifications to use when
+  restoring a tensors.
+tensor: The restored tensor.
+dt: The type of the tensor to be restored.
+preferred_shard: Index of file to open first if multiple files match
+  `file_pattern`. See the documentation for `Restore`.
+)doc");
 
 REGISTER_OP("ShardedFilename")
     .Input("basename: string")
     .Input("shard: int32")
     .Input("num_shards: int32")
     .Output("filename: string")
-    .SetShapeFn(ScalarInputsAndOutputs);
+    .SetShapeFn(ScalarInputsAndOutputs)
+    .Doc(R"doc(
+Generate a sharded filename. The filename is printf formatted as
+   %s-%05d-of-%05d, basename, shard, num_shards.
+)doc");
 
 REGISTER_OP("ShardedFilespec")
     .Input("basename: string")
     .Input("num_shards: int32")
     .Output("filename: string")
-    .SetShapeFn(ScalarInputsAndOutputs);
+    .SetShapeFn(ScalarInputsAndOutputs)
+    .Doc(R"doc(
+Generate a glob pattern matching all sharded file names.
+)doc");
 
 // Reader source ops ----------------------------------------------------------
 
@@ -263,14 +421,38 @@ REGISTER_OP("WholeFileReader")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(TwoElementOutput);
+    .SetShapeFn(TwoElementOutput)
+    .Doc(R"doc(
+A Reader that outputs the entire contents of a file as a value.
+
+To use, enqueue filenames in a Queue.  The output of ReaderRead will
+be a filename (key) and the contents of that file (value).
+
+reader_handle: The handle to reference the Reader.
+container: If non-empty, this reader is placed in the given container.
+        Otherwise, a default container is used.
+shared_name: If non-empty, this reader is named in the given bucket
+             with this shared_name. Otherwise, the node name is used instead.
+)doc");
 
 REGISTER_OP("WholeFileReaderV2")
     .Output("reader_handle: resource")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+A Reader that outputs the entire contents of a file as a value.
+
+To use, enqueue filenames in a Queue.  The output of ReaderRead will
+be a filename (key) and the contents of that file (value).
+
+reader_handle: The handle to reference the Reader.
+container: If non-empty, this reader is placed in the given container.
+        Otherwise, a default container is used.
+shared_name: If non-empty, this reader is named in the given bucket
+             with this shared_name. Otherwise, the node name is used instead.
+)doc");
 
 // TODO(cwhipkey): mark this deprecated in favor of V2.
 REGISTER_OP("TextLineReader")
@@ -279,7 +461,17 @@ REGISTER_OP("TextLineReader")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(TwoElementOutput);
+    .SetShapeFn(TwoElementOutput)
+    .Doc(R"doc(
+A Reader that outputs the lines of a file delimited by '\n'.
+
+reader_handle: The handle to reference the Reader.
+skip_header_lines: Number of lines to skip from the beginning of every file.
+container: If non-empty, this reader is placed in the given container.
+        Otherwise, a default container is used.
+shared_name: If non-empty, this reader is named in the given bucket
+             with this shared_name. Otherwise, the node name is used instead.
+)doc");
 
 REGISTER_OP("TextLineReaderV2")
     .Output("reader_handle: resource")
@@ -287,7 +479,17 @@ REGISTER_OP("TextLineReaderV2")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+A Reader that outputs the lines of a file delimited by '\n'.
+
+reader_handle: The handle to reference the Reader.
+skip_header_lines: Number of lines to skip from the beginning of every file.
+container: If non-empty, this reader is placed in the given container.
+        Otherwise, a default container is used.
+shared_name: If non-empty, this reader is named in the given bucket
+             with this shared_name. Otherwise, the node name is used instead.
+)doc");
 
 // TODO(cwhipkey): mark this deprecated in favor of V2.
 REGISTER_OP("FixedLengthRecordReader")
@@ -299,7 +501,21 @@ REGISTER_OP("FixedLengthRecordReader")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(TwoElementOutput);
+    .SetShapeFn(TwoElementOutput)
+    .Doc(R"doc(
+A Reader that outputs fixed-length records from a file.
+
+reader_handle: The handle to reference the Reader.
+header_bytes: Number of bytes in the header, defaults to 0.
+record_bytes: Number of bytes in the record.
+footer_bytes: Number of bytes in the footer, defaults to 0.
+hop_bytes: Number of bytes to hop before each read. Default of 0 means using
+        record_bytes.
+container: If non-empty, this reader is placed in the given container.
+        Otherwise, a default container is used.
+shared_name: If non-empty, this reader is named in the given bucket
+             with this shared_name. Otherwise, the node name is used instead.
+)doc");
 
 REGISTER_OP("FixedLengthRecordReaderV2")
     .Output("reader_handle: resource")
@@ -311,7 +527,23 @@ REGISTER_OP("FixedLengthRecordReaderV2")
     .Attr("shared_name: string = ''")
     .Attr("encoding: string = ''")
     .SetIsStateful()
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+A Reader that outputs fixed-length records from a file.
+
+reader_handle: The handle to reference the Reader.
+header_bytes: Number of bytes in the header, defaults to 0.
+record_bytes: Number of bytes in the record.
+footer_bytes: Number of bytes in the footer, defaults to 0.
+hop_bytes: Number of bytes to hop before each read. Default of 0 means using
+        record_bytes.
+container: If non-empty, this reader is placed in the given container.
+        Otherwise, a default container is used.
+shared_name: If non-empty, this reader is named in the given bucket
+             with this shared_name. Otherwise, the node name is used instead.
+encoding: The type of encoding for the file. Currently ZLIB and GZIP
+        are supported. Defaults to none.
+)doc");
 
 // TODO(cwhipkey): mark this deprecated in favor of V2.
 REGISTER_OP("TFRecordReader")
@@ -320,7 +552,16 @@ REGISTER_OP("TFRecordReader")
     .Attr("shared_name: string = ''")
     .Attr("compression_type: string = ''")
     .SetIsStateful()
-    .SetShapeFn(TwoElementOutput);
+    .SetShapeFn(TwoElementOutput)
+    .Doc(R"doc(
+A Reader that outputs the records from a TensorFlow Records file.
+
+reader_handle: The handle to reference the Reader.
+container: If non-empty, this reader is placed in the given container.
+        Otherwise, a default container is used.
+shared_name: If non-empty, this reader is named in the given bucket
+             with this shared_name. Otherwise, the node name is used instead.
+)doc");
 
 REGISTER_OP("TFRecordReaderV2")
     .Output("reader_handle: resource")
@@ -328,14 +569,31 @@ REGISTER_OP("TFRecordReaderV2")
     .Attr("shared_name: string = ''")
     .Attr("compression_type: string = ''")
     .SetIsStateful()
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+A Reader that outputs the records from a TensorFlow Records file.
+
+reader_handle: The handle to reference the Reader.
+container: If non-empty, this reader is placed in the given container.
+        Otherwise, a default container is used.
+shared_name: If non-empty, this reader is named in the given bucket
+             with this shared_name. Otherwise, the node name is used instead.
+)doc");
 
 REGISTER_OP("LMDBReader")
     .Output("reader_handle: Ref(string)")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(TwoElementOutput);
+    .SetShapeFn(TwoElementOutput)
+    .Doc(R"doc(
+A Reader that outputs the records from a LMDB file.
+reader_handle: The handle to reference the Reader.
+container: If non-empty, this reader is placed in the given container.
+        Otherwise, a default container is used.
+shared_name: If non-empty, this reader is named in the given bucket
+             with this shared_name. Otherwise, the node name is used instead.
+)doc");
 
 // TODO(cwhipkey): mark this deprecated in favor of V2.
 REGISTER_OP("IdentityReader")
@@ -343,14 +601,38 @@ REGISTER_OP("IdentityReader")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(TwoElementOutput);
+    .SetShapeFn(TwoElementOutput)
+    .Doc(R"doc(
+A Reader that outputs the queued work as both the key and value.
+
+To use, enqueue strings in a Queue.  ReaderRead will take the front
+work string and output (work, work).
+
+reader_handle: The handle to reference the Reader.
+container: If non-empty, this reader is placed in the given container.
+        Otherwise, a default container is used.
+shared_name: If non-empty, this reader is named in the given bucket
+             with this shared_name. Otherwise, the node name is used instead.
+)doc");
 
 REGISTER_OP("IdentityReaderV2")
     .Output("reader_handle: resource")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+A Reader that outputs the queued work as both the key and value.
+
+To use, enqueue strings in a Queue.  ReaderRead will take the front
+work string and output (work, work).
+
+reader_handle: The handle to reference the Reader.
+container: If non-empty, this reader is placed in the given container.
+        Otherwise, a default container is used.
+shared_name: If non-empty, this reader is named in the given bucket
+             with this shared_name. Otherwise, the node name is used instead.
+)doc");
 
 // Ops that operate on Readers ------------------------------------------------
 
@@ -359,14 +641,38 @@ REGISTER_OP("ReaderRead")
     .Input("queue_handle: Ref(string)")
     .Output("key: string")
     .Output("value: string")
-    .SetShapeFn(TwoElementVectorAndScalarOutputs);
+    .SetShapeFn(TwoElementVectorAndScalarOutputs)
+    .Doc(R"doc(
+Returns the next record (key, value pair) produced by a Reader.
+
+Will dequeue from the input queue if necessary (e.g. when the
+Reader needs to start reading from a new file since it has finished
+with the previous file).
+
+reader_handle: Handle to a Reader.
+queue_handle: Handle to a Queue, with string work items.
+key: A scalar.
+value: A scalar.
+)doc");
 
 REGISTER_OP("ReaderReadV2")
     .Input("reader_handle: resource")
     .Input("queue_handle: resource")
     .Output("key: string")
     .Output("value: string")
-    .SetShapeFn(ScalarInputsAndOutputs);
+    .SetShapeFn(ScalarInputsAndOutputs)
+    .Doc(R"doc(
+Returns the next record (key, value pair) produced by a Reader.
+
+Will dequeue from the input queue if necessary (e.g. when the
+Reader needs to start reading from a new file since it has finished
+with the previous file).
+
+reader_handle: Handle to a Reader.
+queue_handle: Handle to a Queue, with string work items.
+key: A scalar.
+value: A scalar.
+)doc");
 
 REGISTER_OP("ReaderReadUpTo")
     .Input("reader_handle: Ref(string)")
@@ -383,7 +689,21 @@ REGISTER_OP("ReaderReadUpTo")
       c->set_output(0, out);
       c->set_output(1, out);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Returns up to `num_records` (key, value) pairs produced by a Reader.
+
+Will dequeue from the input queue if necessary (e.g. when the
+Reader needs to start reading from a new file since it has finished
+with the previous file).
+It may return less than `num_records` even before the last batch.
+
+reader_handle: Handle to a `Reader`.
+queue_handle: Handle to a `Queue`, with string work items.
+num_records: number of records to read from `Reader`.
+keys: A 1-D tensor.
+values: A 1-D tensor.
+)doc");
 
 REGISTER_OP("ReaderReadUpToV2")
     .Input("reader_handle: resource")
@@ -400,37 +720,93 @@ REGISTER_OP("ReaderReadUpToV2")
       c->set_output(0, out);
       c->set_output(1, out);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Returns up to `num_records` (key, value) pairs produced by a Reader.
+
+Will dequeue from the input queue if necessary (e.g. when the
+Reader needs to start reading from a new file since it has finished
+with the previous file).
+It may return less than `num_records` even before the last batch.
+
+reader_handle: Handle to a `Reader`.
+queue_handle: Handle to a `Queue`, with string work items.
+num_records: number of records to read from `Reader`.
+keys: A 1-D tensor.
+values: A 1-D tensor.
+)doc");
 
 REGISTER_OP("ReaderNumRecordsProduced")
     .Input("reader_handle: Ref(string)")
     .Output("records_produced: int64")
-    .SetShapeFn(TwoElementVectorAndScalarOutputs);
+    .SetShapeFn(TwoElementVectorAndScalarOutputs)
+    .Doc(R"doc(
+Returns the number of records this Reader has produced.
+
+This is the same as the number of ReaderRead executions that have
+succeeded.
+
+reader_handle: Handle to a Reader.
+)doc");
 
 REGISTER_OP("ReaderNumRecordsProducedV2")
     .Input("reader_handle: resource")
     .Output("records_produced: int64")
-    .SetShapeFn(ScalarInputsAndOutputs);
+    .SetShapeFn(ScalarInputsAndOutputs)
+    .Doc(R"doc(
+Returns the number of records this Reader has produced.
+
+This is the same as the number of ReaderRead executions that have
+succeeded.
+
+reader_handle: Handle to a Reader.
+)doc");
 
 REGISTER_OP("ReaderNumWorkUnitsCompleted")
     .Input("reader_handle: Ref(string)")
     .Output("units_completed: int64")
-    .SetShapeFn(TwoElementVectorAndScalarOutputs);
+    .SetShapeFn(TwoElementVectorAndScalarOutputs)
+    .Doc(R"doc(
+Returns the number of work units this Reader has finished processing.
+
+reader_handle: Handle to a Reader.
+)doc");
 
 REGISTER_OP("ReaderNumWorkUnitsCompletedV2")
     .Input("reader_handle: resource")
     .Output("units_completed: int64")
-    .SetShapeFn(ScalarInputsAndOutputs);
+    .SetShapeFn(ScalarInputsAndOutputs)
+    .Doc(R"doc(
+Returns the number of work units this Reader has finished processing.
+
+reader_handle: Handle to a Reader.
+)doc");
 
 REGISTER_OP("ReaderSerializeState")
     .Input("reader_handle: Ref(string)")
     .Output("state: string")
-    .SetShapeFn(TwoElementVectorAndScalarOutputs);
+    .SetShapeFn(TwoElementVectorAndScalarOutputs)
+    .Doc(R"doc(
+Produce a string tensor that encodes the state of a Reader.
+
+Not all Readers support being serialized, so this can produce an
+Unimplemented error.
+
+reader_handle: Handle to a Reader.
+)doc");
 
 REGISTER_OP("ReaderSerializeStateV2")
     .Input("reader_handle: resource")
     .Output("state: string")
-    .SetShapeFn(ScalarInputsAndOutputs);
+    .SetShapeFn(ScalarInputsAndOutputs)
+    .Doc(R"doc(
+Produce a string tensor that encodes the state of a Reader.
+
+Not all Readers support being serialized, so this can produce an
+Unimplemented error.
+
+reader_handle: Handle to a Reader.
+)doc");
 
 REGISTER_OP("ReaderRestoreState")
     .Input("reader_handle: Ref(string)")
@@ -444,7 +820,17 @@ REGISTER_OP("ReaderRestoreState")
 
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Restore a reader to a previously saved state.
+
+Not all Readers support being restored, so this can produce an
+Unimplemented error.
+
+reader_handle: Handle to a Reader.
+state: Result of a ReaderSerializeState of a Reader with type
+  matching reader_handle.
+)doc");
 
 REGISTER_OP("ReaderRestoreStateV2")
     .Input("reader_handle: resource")
@@ -454,22 +840,45 @@ REGISTER_OP("ReaderRestoreStateV2")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused));
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Restore a reader to a previously saved state.
+
+Not all Readers support being restored, so this can produce an
+Unimplemented error.
+
+reader_handle: Handle to a Reader.
+state: Result of a ReaderSerializeState of a Reader with type
+  matching reader_handle.
+)doc");
 
 REGISTER_OP("ReaderReset")
     .Input("reader_handle: Ref(string)")
-    .SetShapeFn(TwoElementVectorAndScalarOutputs);
+    .SetShapeFn(TwoElementVectorAndScalarOutputs)
+    .Doc(R"doc(
+Restore a Reader to its initial clean state.
+
+reader_handle: Handle to a Reader.
+)doc");
 
 REGISTER_OP("ReaderResetV2")
     .Input("reader_handle: resource")
-    .SetShapeFn(ScalarInputsAndOutputs);
+    .SetShapeFn(ScalarInputsAndOutputs)
+    .Doc(R"doc(
+Restore a Reader to its initial clean state.
+
+reader_handle: Handle to a Reader.
+)doc");
 
 // Other input Ops ----------------------------------------------------------
 
 REGISTER_OP("ReadFile")
     .Input("filename: string")
     .Output("contents: string")
-    .SetShapeFn(ScalarInputsAndOutputs);
+    .SetShapeFn(ScalarInputsAndOutputs)
+    .Doc(R"doc(
+Reads and outputs the entire contents of the input filename.
+)doc");
 
 REGISTER_OP("WriteFile")
     .Input("filename: string")
@@ -479,7 +888,14 @@ REGISTER_OP("WriteFile")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused));
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Writes contents to the file at input filename. Creates file and recursively
+creates directory if not existing.
+
+filename: scalar. The name of the file to which we write the contents.
+contents: scalar. The content to be written to the output file.
+)doc");
 
 REGISTER_OP("MatchingFiles")
     .Input("pattern: string")
@@ -489,6 +905,15 @@ REGISTER_OP("MatchingFiles")
       TF_RETURN_IF_ERROR(c->WithRankAtMost(c->input(0), 1, &unused));
       c->set_output(0, c->Vector(InferenceContext::kUnknownDim));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Returns the set of files matching one or more glob patterns.
+
+Note that this routine only supports wildcard characters in the
+basename portion of the pattern, not in the directory portion.
+
+pattern: Shell wildcard pattern(s). Scalar or vector of type string.
+filenames: A vector of matching filenames.
+)doc");
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/linalg_ops.cc b/tensorflow/core/ops/linalg_ops.cc
index b8496d972d..53e2360d23 100644
--- a/tensorflow/core/ops/linalg_ops.cc
+++ b/tensorflow/core/ops/linalg_ops.cc
@@ -202,7 +202,17 @@ REGISTER_OP("MatrixDeterminant")
       TF_RETURN_IF_ERROR(c->Subshape(input, 0, -2, &out));
       c->set_output(0, out);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Computes the determinant of one or more square matrices.
+
+The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
+form square matrices. The output is a tensor containing the determinants
+for all input submatrices `[..., :, :]`.
+
+input: Shape is `[..., M, M]`.
+output: Shape is `[...]`.
+)doc");
 
 REGISTER_OP("LogMatrixDeterminant")
     .Input("input: T")
@@ -225,33 +235,126 @@ REGISTER_OP("LogMatrixDeterminant")
       TF_RETURN_IF_ERROR(c->Subshape(input, 0, -2, &out));
       c->set_output(1, out);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Computes the sign and the log of the absolute value of the determinant of
+one or more square matrices.
+
+The input is a tensor of shape `[N, M, M]` whose inner-most 2 dimensions
+form square matrices. The outputs are two tensors containing the signs and
+absolute values of the log determinants for all N input submatrices
+`[..., :, :]` such that the determinant = sign*exp(log_abs_determinant).
+The log_abs_determinant is computed as det(P)*sum(log(diag(LU))) where LU
+is the LU decomposition of the input and P is the corresponding
+permutation matrix.
+
+input: Shape is `[N, M, M]`.
+sign: The signs of the log determinants of the inputs. Shape is `[N]`.
+log_abs_determinant: The logs of the absolute values of the determinants
+of the N input matrices.  Shape is `[N]`.
+)doc");
 
 REGISTER_OP("MatrixInverse")
     .Input("input: T")
     .Output("output: T")
     .Attr("adjoint: bool = False")
     .Attr("T: {double, float, complex64, complex128}")
-    .SetShapeFn(BatchUnchangedSquareShapeFn);
+    .SetShapeFn(BatchUnchangedSquareShapeFn)
+    .Doc(R"doc(
+Computes the inverse of one or more square invertible matrices or their
+adjoints (conjugate transposes).
+
+The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
+form square matrices. The output is a tensor of the same shape as the input
+containing the inverse for all input submatrices `[..., :, :]`.
+
+The op uses LU decomposition with partial pivoting to compute the inverses.
+
+If a matrix is not invertible there is no guarantee what the op does. It
+may detect the condition and raise an exception or it may simply return a
+garbage result.
+
+input: Shape is `[..., M, M]`.
+output: Shape is `[..., M, M]`.
+
+@compatibility(numpy)
+Equivalent to np.linalg.inv
+@end_compatibility
+)doc");
 
 REGISTER_OP("MatrixExponential")
     .Input("input: T")
     .Output("output: T")
     .Attr("T: {double, float, complex64, complex128}")
-    .SetShapeFn(BatchUnchangedSquareShapeFn);
+    .SetShapeFn(BatchUnchangedSquareShapeFn)
+    .Doc(R"doc(
+Computes the matrix exponential of one or more square matrices:
+
+exp(A) = \sum_{n=0}^\infty A^n/n!
+
+The exponential is computed using a combination of the scaling and squaring
+method and the Pade approximation. Details can be founds in:
+Nicholas J. Higham, "The scaling and squaring method for the matrix exponential
+revisited," SIAM J. Matrix Anal. Applic., 26:1179-1193, 2005.
+
+The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
+form square matrices. The output is a tensor of the same shape as the input
+containing the exponential for all input submatrices `[..., :, :]`.
+
+input: Shape is `[..., M, M]`.
+output: Shape is `[..., M, M]`.
+
+@compatibility(scipy)
+Equivalent to scipy.linalg.expm
+@end_compatibility
+)doc");
 
 REGISTER_OP("Cholesky")
     .Input("input: T")
     .Output("output: T")
     .Attr("T: {double, float, complex64, complex128}")
-    .SetShapeFn(BatchUnchangedSquareShapeFn);
+    .SetShapeFn(BatchUnchangedSquareShapeFn)
+    .Doc(R"doc(
+Computes the Cholesky decomposition of one or more square matrices.
+
+The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
+form square matrices.
+
+The input has to be symmetric and positive definite. Only the lower-triangular
+part of the input will be used for this operation. The upper-triangular part
+will not be read.
+
+The output is a tensor of the same shape as the input
+containing the Cholesky decompositions for all input submatrices `[..., :, :]`.
+
+**Note**: The gradient computation on GPU is faster for large matrices but
+not for large batch dimensions when the submatrices are small. In this
+case it might be faster to use the CPU.
+
+input: Shape is `[..., M, M]`.
+output: Shape is `[..., M, M]`.
+)doc");
 
 REGISTER_OP("CholeskyGrad")
     .Input("l: T")
     .Input("grad: T")
     .Output("output: T")
     .Attr("T: {float, double}")
-    .SetShapeFn(BatchUnchangedSquareShapeFn);
+    .SetShapeFn(BatchUnchangedSquareShapeFn)
+    .Doc(R"doc(
+Computes the reverse mode backpropagated gradient of the Cholesky algorithm.
+
+For an explanation see "Differentiation of the Cholesky algorithm" by
+Iain Murray http://arxiv.org/abs/1602.07527.
+
+l: Output of batch Cholesky algorithm l = cholesky(A). Shape is `[..., M, M]`.
+  Algorithm depends only on lower triangular part of the innermost matrices of
+  this tensor.
+grad: df/dl where f is some scalar function. Shape is `[..., M, M]`.
+  Algorithm depends only on lower triangular part of the innermost matrices of
+  this tensor.
+output: Symmetrized version of df/dA . Shape is `[..., M, M]`
+)doc");
 
 REGISTER_OP("SelfAdjointEig")
     .Input("input: T")
@@ -271,7 +374,20 @@ REGISTER_OP("SelfAdjointEig")
       TF_RETURN_IF_ERROR(c->Concatenate(s, c->Matrix(d_plus_1, d), &s));
       c->set_output(0, s);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Computes the Eigen Decomposition of a batch of square self-adjoint matrices.
+
+The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
+form square matrices, with the same constraints as the single matrix
+SelfAdjointEig.
+
+The result is a [..., M+1, M] matrix with [..., 0,:] containing the
+eigenvalues, and subsequent [...,1:, :] containing the eigenvectors.
+
+input: Shape is `[..., M, M]`.
+output: Shape is `[..., M+1, M]`.
+)doc");
 
 REGISTER_OP("SelfAdjointEigV2")
     .Input("input: T")
@@ -279,7 +395,27 @@ REGISTER_OP("SelfAdjointEigV2")
     .Output("v: T")
     .Attr("compute_v: bool = True")
     .Attr("T: {double, float, complex64, complex128}")
-    .SetShapeFn(SelfAdjointEigV2ShapeFn);
+    .SetShapeFn(SelfAdjointEigV2ShapeFn)
+    .Doc(R"doc(
+Computes the eigen decomposition of one or more square self-adjoint matrices.
+
+Computes the eigenvalues and (optionally) eigenvectors of each inner matrix in
+`input` such that `input[..., :, :] = v[..., :, :] * diag(e[..., :])`.
+
+```python
+# a is a tensor.
+# e is a tensor of eigenvalues.
+# v is a tensor of eigenvectors.
+e, v = self_adjoint_eig(a)
+e = self_adjoint_eig(a, compute_v=False)
+```
+
+input: `Tensor` input of shape `[N, N]`.
+compute_v: If `True` then eigenvectors will be computed and returned in `v`.
+  Otherwise, only the eigenvalues will be computed.
+e: Eigenvalues. Shape is `[N]`.
+v: Eigenvectors. Shape is `[N, N]`.
+)doc");
 
 REGISTER_OP("MatrixSolve")
     .Input("matrix: T")
@@ -289,7 +425,23 @@ REGISTER_OP("MatrixSolve")
     .Attr("T: {double, float, complex64, complex128}")
     .SetShapeFn([](InferenceContext* c) {
       return MatrixSolveShapeFn(c, true /* square (*/);
-    });
+    })
+    .Doc(R"doc(
+Solves systems of linear equations.
+
+`Matrix` is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
+form square matrices. `Rhs` is a tensor of shape `[..., M, K]`. The `output` is
+a tensor shape `[..., M, K]`.  If `adjoint` is `False` then each output matrix
+satisfies `matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]`.
+If `adjoint` is `True` then each output matrix satisfies
+`adjoint(matrix[..., :, :]) * output[..., :, :] = rhs[..., :, :]`.
+
+matrix: Shape is `[..., M, M]`.
+rhs: Shape is `[..., M, K]`.
+output: Shape is `[..., M, K]`.
+adjoint: Boolean indicating whether to solve with `matrix` or its (block-wise)
+         adjoint.
+)doc");
 
 REGISTER_OP("MatrixTriangularSolve")
     .Input("matrix: T")
@@ -300,7 +452,37 @@ REGISTER_OP("MatrixTriangularSolve")
     .Attr("T: {double, float, complex64, complex128}")
     .SetShapeFn([](InferenceContext* c) {
       return MatrixSolveShapeFn(c, true /* square (*/);
-    });
+    })
+    .Doc(R"doc(
+Solves systems of linear equations with upper or lower triangular matrices by
+backsubstitution.
+
+`matrix` is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions form
+square matrices. If `lower` is `True` then the strictly upper triangular part
+of each inner-most matrix is assumed to be zero and not accessed.
+If `lower` is False then the strictly lower triangular part of each inner-most
+matrix is assumed to be zero and not accessed.
+`rhs` is a tensor of shape `[..., M, K]`.
+
+The output is a tensor of shape `[..., M, K]`. If `adjoint` is
+`True` then the innermost matrices in `output` satisfy matrix equations
+`matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]`.
+If `adjoint` is `False` then the strictly then the  innermost matrices in
+`output` satisfy matrix equations
+`adjoint(matrix[..., i, k]) * output[..., k, j] = rhs[..., i, j]`.
+
+matrix: Shape is `[..., M, M]`.
+rhs: Shape is `[..., M, K]`.
+output: Shape is `[..., M, K]`.
+lower: Boolean indicating whether the innermost matrices in `matrix` are
+       lower or upper triangular.
+adjoint: Boolean indicating whether to solve with `matrix` or its (block-wise)
+         adjoint.
+
+@compatibility(numpy)
+Equivalent to np.linalg.triangular_solve
+@end_compatibility
+)doc");
 
 REGISTER_OP("MatrixSolveLs")
     .Input("matrix: T")
@@ -313,7 +495,54 @@ REGISTER_OP("MatrixSolveLs")
       ShapeHandle l2_regularizer;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &l2_regularizer));
       return MatrixSolveShapeFn(c, false /* square */);
-    });
+    })
+    .Doc(R"doc(
+Solves one or more linear least-squares problems.
+
+`matrix` is a tensor of shape `[..., M, N]` whose inner-most 2 dimensions
+form real or complex matrices of size `[M, N]`. `Rhs` is a tensor of the same
+type as `matrix` and shape `[..., M, K]`.
+The output is a tensor shape `[..., N, K]` where each output matrix solves
+each of the equations
+`matrix[..., :, :]` * `output[..., :, :]` = `rhs[..., :, :]`
+in the least squares sense.
+
+We use the following notation for (complex) matrix and right-hand sides
+in the batch:
+
+`matrix`=\\(A \in \mathbb{C}^{m \times n}\\),
+`rhs`=\\(B  \in \mathbb{C}^{m \times k}\\),
+`output`=\\(X  \in \mathbb{C}^{n \times k}\\),
+`l2_regularizer`=\\(\lambda \in \mathbb{R}\\).
+
+If `fast` is `True`, then the solution is computed by solving the normal
+equations using Cholesky decomposition. Specifically, if \\(m \ge n\\) then
+\\(X = (A^H A + \lambda I)^{-1} A^H B\\), which solves the least-squares
+problem \\(X = \mathrm{argmin}_{Z \in \Re^{n \times k} } ||A Z - B||_F^2 +
+\lambda ||Z||_F^2\\). If \\(m \lt n\\) then `output` is computed as
+\\(X = A^H (A A^H + \lambda I)^{-1} B\\), which (for \\(\lambda = 0\\)) is the
+minimum-norm solution to the under-determined linear system, i.e.
+\\(X = \mathrm{argmin}_{Z \in \mathbb{C}^{n \times k} } ||Z||_F^2 \\),
+subject to \\(A Z = B\\). Notice that the fast path is only numerically stable
+when \\(A\\) is numerically full rank and has a condition number
+\\(\mathrm{cond}(A) \lt \frac{1}{\sqrt{\epsilon_{mach} } }\\) or\\(\lambda\\) is
+sufficiently large.
+
+If `fast` is `False` an algorithm based on the numerically robust complete
+orthogonal decomposition is used. This computes the minimum-norm
+least-squares solution, even when \\(A\\) is rank deficient. This path is
+typically 6-7 times slower than the fast path. If `fast` is `False` then
+`l2_regularizer` is ignored.
+
+matrix: Shape is `[..., M, N]`.
+rhs: Shape is `[..., M, K]`.
+output: Shape is `[..., N, K]`.
+l2_regularizer: Scalar tensor.
+
+@compatibility(numpy)
+Equivalent to np.linalg.lstsq
+@end_compatibility
+)doc");
 
 REGISTER_OP("Qr")
     .Input("input: T")
@@ -321,7 +550,31 @@ REGISTER_OP("Qr")
     .Output("r: T")
     .Attr("full_matrices: bool = False")
     .Attr("T: {double, float, complex64, complex128}")
-    .SetShapeFn(QrShapeFn);
+    .SetShapeFn(QrShapeFn)
+    .Doc(R"doc(
+Computes the QR decompositions of one or more matrices.
+
+Computes the QR decomposition of each inner matrix in `tensor` such that
+`tensor[..., :, :] = q[..., :, :] * r[..., :,:])`
+
+```python
+# a is a tensor.
+# q is a tensor of orthonormal matrices.
+# r is a tensor of upper triangular matrices.
+q, r = qr(a)
+q_full, r_full = qr(a, full_matrices=True)
+```
+
+input: A tensor of shape `[..., M, N]` whose inner-most 2 dimensions
+  form matrices of size `[M, N]`. Let `P` be the minimum of `M` and `N`.
+q: Orthonormal basis for range of `a`. If `full_matrices` is `False` then
+  shape is `[..., M, P]`; if `full_matrices` is `True` then shape is
+  `[..., M, M]`.
+r: Triangular factor. If `full_matrices` is `False` then shape is
+  `[..., P, N]`. If `full_matrices` is `True` then shape is `[..., M, N]`.
+full_matrices: If true, compute full-sized `q` and `r`. If false
+  (the default), compute only the leading `P` columns of `q`.
+)doc");
 
 REGISTER_OP("Svd")
     .Input("input: T")
@@ -331,7 +584,38 @@ REGISTER_OP("Svd")
     .Attr("compute_uv: bool = True")
     .Attr("full_matrices: bool = False")
     .Attr("T: {double, float, complex64, complex128}")
-    .SetShapeFn(SvdShapeFn);
+    .SetShapeFn(SvdShapeFn)
+    .Doc(R"doc(
+Computes the singular value decompositions of one or more matrices.
+
+Computes the SVD of each inner matrix in `input` such that
+`input[..., :, :] = u[..., :, :] * diag(s[..., :, :]) * transpose(v[..., :, :])`
+
+```python
+# a is a tensor containing a batch of matrices.
+# s is a tensor of singular values for each matrix.
+# u is the tensor containing of left singular vectors for each matrix.
+# v is the tensor containing of right singular vectors for each matrix.
+s, u, v = svd(a)
+s, _, _ = svd(a, compute_uv=False)
+```
+
+input: A tensor of shape `[..., M, N]` whose inner-most 2 dimensions
+  form matrices of size `[M, N]`. Let `P` be the minimum of `M` and `N`.
+s: Singular values. Shape is `[..., P]`.
+u: Left singular vectors. If `full_matrices` is `False` then shape is
+  `[..., M, P]`; if `full_matrices` is `True` then shape is
+  `[..., M, M]`. Undefined if `compute_uv` is `False`.
+v: Left singular vectors. If `full_matrices` is `False` then shape is
+  `[..., N, P]`. If `full_matrices` is `True` then shape is `[..., N, N]`.
+  Undefined if `compute_uv` is false.
+compute_uv: If true, left and right singular vectors will be
+  computed and returned in `u` and `v`, respectively.
+  If false, `u` and `v` are not set and should never referenced.
+full_matrices: If true, compute full-sized `u` and `v`. If false
+  (the default), compute only the leading `P` singular vectors.
+  Ignored if `compute_uv` is `False`.
+)doc");
 
 // Deprecated op registrations:
 
diff --git a/tensorflow/core/ops/logging_ops.cc b/tensorflow/core/ops/logging_ops.cc
index d263dc25b2..e6995821df 100644
--- a/tensorflow/core/ops/logging_ops.cc
+++ b/tensorflow/core/ops/logging_ops.cc
@@ -25,7 +25,17 @@ REGISTER_OP("Assert")
     .SetIsStateful()
     .Attr("T: list(type)")
     .Attr("summarize: int = 3")
-    .SetShapeFn(shape_inference::NoOutputs);
+    .SetShapeFn(shape_inference::NoOutputs)
+    .Doc(R"doc(
+Asserts that the given condition is true.
+
+If `condition` evaluates to false, print the list of tensors in `data`.
+`summarize` determines how many entries of the tensors to print.
+
+condition: The condition to evaluate.
+data: The tensors to print out when condition is false.
+summarize: Print this many entries of each tensor.
+)doc");
 
 REGISTER_OP("Print")
     .Input("input: T")
@@ -37,7 +47,19 @@ REGISTER_OP("Print")
     .Attr("message: string = ''")
     .Attr("first_n: int = -1")
     .Attr("summarize: int = 3")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Prints a list of tensors.
+
+Passes `input` through to `output` and prints `data` when evaluating.
+
+input: The tensor passed to `output`
+data: A list of tensors to print out when op is evaluated.
+output:= The unmodified `input` tensor
+message: A string, prefix of the error message.
+first_n: Only log `first_n` number of times. -1 disables logging.
+summarize: Only print this many entries of each tensor.
+)doc");
 
 // ----------------------------------------------------------------------------
 // Operators that deal with SummaryProtos (encoded as DT_STRING tensors) as
@@ -51,7 +73,15 @@ REGISTER_OP("TensorSummaryV2")
     .Input("serialized_summary_metadata: string")
     .Output("summary: string")
     .Attr("T: type")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Outputs a `Summary` protocol buffer with a tensor and per-plugin data.
+
+tag: A string attached to this summary. Used for organization in TensorBoard.
+tensor: A tensor to serialize.
+serialized_summary_metadata: A serialized SummaryMetadata proto. Contains plugin
+  data.
+)doc");
 
 REGISTER_OP("TensorSummary")
     .Input("tensor: T")
@@ -60,21 +90,56 @@ REGISTER_OP("TensorSummary")
     .Attr("description: string = ''")
     .Attr("labels: list(string) = []")
     .Attr("display_name: string = ''")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Outputs a `Summary` protocol buffer with a tensor.
+
+This op is being phased out in favor of TensorSummaryV2, which lets callers pass
+a tag as well as a serialized SummaryMetadata proto string that contains
+plugin-specific data. We will keep this op to maintain backwards compatibility.
+
+tensor: A tensor to serialize.
+description: A json-encoded SummaryDescription proto.
+labels: An unused list of strings.
+display_name: An unused string.
+)doc");
 
 REGISTER_OP("ScalarSummary")
     .Input("tags: string")
     .Input("values: T")
     .Output("summary: string")
     .Attr("T: realnumbertype")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Outputs a `Summary` protocol buffer with scalar values.
+
+The input `tags` and `values` must have the same shape.  The generated summary
+has a summary value for each tag-value pair in `tags` and `values`.
+
+tags: Tags for the summary.
+values: Same shape as `tags.  Values for the summary.
+summary: Scalar.  Serialized `Summary` protocol buffer.
+)doc");
 
 REGISTER_OP("HistogramSummary")
     .Input("tag: string")
     .Input("values: T")
     .Output("summary: string")
     .Attr("T: realnumbertype = DT_FLOAT")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Outputs a `Summary` protocol buffer with a histogram.
+
+The generated
+[`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto)
+has one summary value containing a histogram for `values`.
+
+This op reports an `InvalidArgument` error if any value is not finite.
+
+tag: Scalar.  Tag to use for the `Summary.Value`.
+values: Any shape. Values to use to build the histogram.
+summary: Scalar. Serialized `Summary` protocol buffer.
+)doc");
 
 REGISTER_OP("ImageSummary")
     .Input("tag: string")
@@ -86,7 +151,51 @@ REGISTER_OP("ImageSummary")
         "bad_color: tensor = { dtype: DT_UINT8 "
         "tensor_shape: { dim { size: 4 } } "
         "int_val: 255 int_val: 0 int_val: 0 int_val: 255 }")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Outputs a `Summary` protocol buffer with images.
+
+The summary has up to `max_images` summary values containing images. The
+images are built from `tensor` which must be 4-D with shape `[batch_size,
+height, width, channels]` and where `channels` can be:
+
+*  1: `tensor` is interpreted as Grayscale.
+*  3: `tensor` is interpreted as RGB.
+*  4: `tensor` is interpreted as RGBA.
+
+The images have the same number of channels as the input tensor. For float
+input, the values are normalized one image at a time to fit in the range
+`[0, 255]`.  `uint8` values are unchanged.  The op uses two different
+normalization algorithms:
+
+*  If the input values are all positive, they are rescaled so the largest one
+   is 255.
+
+*  If any input value is negative, the values are shifted so input value 0.0
+   is at 127.  They are then rescaled so that either the smallest value is 0,
+   or the largest one is 255.
+
+The `tag` argument is a scalar `Tensor` of type `string`.  It is used to
+build the `tag` of the summary values:
+
+*  If `max_images` is 1, the summary value tag is '*tag*/image'.
+*  If `max_images` is greater than 1, the summary value tags are
+   generated sequentially as '*tag*/image/0', '*tag*/image/1', etc.
+
+The `bad_color` argument is the color to use in the generated images for
+non-finite input values.  It is a `unit8` 1-D tensor of length `channels`.
+Each element must be in the range `[0, 255]` (It represents the value of a
+pixel in the output image).  Non-finite values in the input tensor are
+replaced by this tensor in the output image.  The default value is the color
+red.
+
+tag: Scalar. Used to build the `tag` attribute of the summary values.
+tensor: 4-D of shape `[batch_size, height, width, channels]` where
+  `channels` is 1, 3, or 4.
+max_images: Max number of batch elements to generate images for.
+bad_color: Color to use for pixels with non-finite values.
+summary: Scalar. Serialized `Summary` protocol buffer.
+)doc");
 
 REGISTER_OP("AudioSummaryV2")
     .Input("tag: string")
@@ -94,7 +203,28 @@ REGISTER_OP("AudioSummaryV2")
     .Input("sample_rate: float")
     .Output("summary: string")
     .Attr("max_outputs: int >= 1 = 3")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Outputs a `Summary` protocol buffer with audio.
+
+The summary has up to `max_outputs` summary values containing audio. The
+audio is built from `tensor` which must be 3-D with shape `[batch_size,
+frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are
+assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`.
+
+The `tag` argument is a scalar `Tensor` of type `string`.  It is used to
+build the `tag` of the summary values:
+
+*  If `max_outputs` is 1, the summary value tag is '*tag*/audio'.
+*  If `max_outputs` is greater than 1, the summary value tags are
+   generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc.
+
+tag: Scalar. Used to build the `tag` attribute of the summary values.
+tensor: 2-D of shape `[batch_size, frames]`.
+sample_rate: The sample rate of the signal in hertz.
+max_outputs: Max number of batch elements to generate audio for.
+summary: Scalar. Serialized `Summary` protocol buffer.
+)doc");
 
 REGISTER_OP("AudioSummary")
     .Input("tag: string")
@@ -103,12 +233,48 @@ REGISTER_OP("AudioSummary")
     .Attr("sample_rate: float")
     .Attr("max_outputs: int >= 1 = 3")
     .SetShapeFn(shape_inference::ScalarShape)
-    .Deprecated(15, "Use AudioSummaryV2.");
+    .Deprecated(15, "Use AudioSummaryV2.")
+    .Doc(R"doc(
+Outputs a `Summary` protocol buffer with audio.
+
+The summary has up to `max_outputs` summary values containing audio. The
+audio is built from `tensor` which must be 3-D with shape `[batch_size,
+frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are
+assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`.
+
+The `tag` argument is a scalar `Tensor` of type `string`.  It is used to
+build the `tag` of the summary values:
+
+*  If `max_outputs` is 1, the summary value tag is '*tag*/audio'.
+*  If `max_outputs` is greater than 1, the summary value tags are
+   generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc.
+
+tag: Scalar. Used to build the `tag` attribute of the summary values.
+tensor: 2-D of shape `[batch_size, frames]`.
+sample_rate: The sample rate of the signal in hertz.
+max_outputs: Max number of batch elements to generate audio for.
+summary: Scalar. Serialized `Summary` protocol buffer.
+)doc");
 
 REGISTER_OP("MergeSummary")
     .Input("inputs: N * string")
     .Output("summary: string")
     .Attr("N : int >= 1")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Merges summaries.
+
+This op creates a
+[`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto)
+protocol buffer that contains the union of all the values in the input
+summaries.
+
+When the Op is run, it reports an `InvalidArgument` error if multiple values
+in the summaries to merge use the same tag.
+
+inputs: Can be of any shape.  Each must contain serialized `Summary` protocol
+  buffers.
+summary: Scalar. Serialized `Summary` protocol buffer.
+)doc");
 
 }  // end namespace tensorflow
diff --git a/tensorflow/core/ops/lookup_ops.cc b/tensorflow/core/ops/lookup_ops.cc
index a67267418d..dac02dad8b 100644
--- a/tensorflow/core/ops/lookup_ops.cc
+++ b/tensorflow/core/ops/lookup_ops.cc
@@ -83,7 +83,21 @@ REGISTER_OP("LookupTableFind")
       TF_RETURN_IF_ERROR(c->WithRankAtMost(c->input(2), 1, &unused));
       c->set_output(0, c->UnknownShape());
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Looks up keys in a table, outputs the corresponding values.
+
+The tensor `keys` must of the same type as the keys of the table.
+The output `values` is of the type of the table values.
+
+The scalar `default_value` is the value output for keys not present in the
+table. It must also be of the same type as the table values.
+
+table_handle: Handle to the table.
+keys:  Any shape.  Keys to look up.
+values: Same shape as `keys`.  Values found in the table, or `default_values`
+   for missing keys.
+)doc");
 
 REGISTER_OP("LookupTableFindV2")
     .Input("table_handle: resource")
@@ -101,7 +115,21 @@ REGISTER_OP("LookupTableFindV2")
       TF_RETURN_IF_ERROR(c->WithRankAtMost(c->input(2), 1, &unused));
       c->set_output(0, c->UnknownShape());
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Looks up keys in a table, outputs the corresponding values.
+
+The tensor `keys` must of the same type as the keys of the table.
+The output `values` is of the type of the table values.
+
+The scalar `default_value` is the value output for keys not present in the
+table. It must also be of the same type as the table values.
+
+table_handle: Handle to the table.
+keys:  Any shape.  Keys to look up.
+values: Same shape as `keys`.  Values found in the table, or `default_values`
+   for missing keys.
+)doc");
 
 REGISTER_OP("LookupTableInsert")
     .Input("table_handle: Ref(string)")
@@ -117,7 +145,17 @@ REGISTER_OP("LookupTableInsert")
 
       // TODO(ebrevdo): Validate keys and values shape.
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Updates the table to associates keys with values.
+
+The tensor `keys` must be of the same type as the keys of the table.
+The tensor `values` must be of the type of the table values.
+
+table_handle: Handle to the table.
+keys:  Any shape.  Keys to look up.
+values: Values to associate with keys.
+)doc");
 
 REGISTER_OP("LookupTableInsertV2")
     .Input("table_handle: resource")
@@ -131,17 +169,39 @@ REGISTER_OP("LookupTableInsertV2")
 
       // TODO: Validate keys and values shape.
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Updates the table to associates keys with values.
+
+The tensor `keys` must be of the same type as the keys of the table.
+The tensor `values` must be of the type of the table values.
+
+table_handle: Handle to the table.
+keys:  Any shape.  Keys to look up.
+values: Values to associate with keys.
+)doc");
 
 REGISTER_OP("LookupTableSize")
     .Input("table_handle: Ref(string)")
     .Output("size: int64")
-    .SetShapeFn(TwoElementVectorInputsAndScalarOutputs);
+    .SetShapeFn(TwoElementVectorInputsAndScalarOutputs)
+    .Doc(R"doc(
+Computes the number of elements in the given table.
+
+table_handle: Handle to the table.
+size: Scalar that contains number of elements in the table.
+)doc");
 
 REGISTER_OP("LookupTableSizeV2")
     .Input("table_handle: resource")
     .Output("size: int64")
-    .SetShapeFn(ScalarAndTwoElementVectorInputsAndScalarOutputs);
+    .SetShapeFn(ScalarAndTwoElementVectorInputsAndScalarOutputs)
+    .Doc(R"doc(
+Computes the number of elements in the given table.
+
+table_handle: Handle to the table.
+size: Scalar that contains number of elements in the table.
+)doc");
 
 REGISTER_OP("LookupTableExport")
     .Input("table_handle: Ref(string)")
@@ -161,7 +221,14 @@ REGISTER_OP("LookupTableExport")
       c->set_output(0, keys);
       c->set_output(1, values);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Outputs all keys and values in the table.
+
+table_handle: Handle to the table.
+keys: Vector of all keys present in the table.
+values: Tensor of all values in the table. Indexed in parallel with `keys`.
+)doc");
 
 REGISTER_OP("LookupTableExportV2")
     .Input("table_handle: resource")
@@ -179,7 +246,14 @@ REGISTER_OP("LookupTableExportV2")
       c->set_output(0, keys);
       c->set_output(1, values);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Outputs all keys and values in the table.
+
+table_handle: Handle to the table.
+keys: Vector of all keys present in the table.
+values: Tensor of all values in the table. Indexed in parallel with `keys`.
+)doc");
 
 REGISTER_OP("LookupTableImport")
     .Input("table_handle: Ref(string)")
@@ -195,7 +269,17 @@ REGISTER_OP("LookupTableImport")
 
       // TODO(ebrevdo): Validate keys and values shape.
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Replaces the contents of the table with the specified keys and values.
+
+The tensor `keys` must be of the same type as the keys of the table.
+The tensor `values` must be of the type of the table values.
+
+table_handle: Handle to the table.
+keys:  Any shape.  Keys to look up.
+values: Values to associate with keys.
+)doc");
 
 REGISTER_OP("LookupTableImportV2")
     .Input("table_handle: resource")
@@ -209,7 +293,17 @@ REGISTER_OP("LookupTableImportV2")
 
       // TODO: Validate keys and values shape.
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Replaces the contents of the table with the specified keys and values.
+
+The tensor `keys` must be of the same type as the keys of the table.
+The tensor `values` must be of the type of the table values.
+
+table_handle: Handle to the table.
+keys:  Any shape.  Keys to look up.
+values: Values to associate with keys.
+)doc");
 
 REGISTER_OP("HashTable")
     .Output("table_handle: Ref(string)")
@@ -219,7 +313,24 @@ REGISTER_OP("HashTable")
     .Attr("key_dtype: type")
     .Attr("value_dtype: type")
     .SetIsStateful()
-    .SetShapeFn(TwoElementOutput);
+    .SetShapeFn(TwoElementOutput)
+    .Doc(R"doc(
+Creates a non-initialized hash table.
+
+This op creates a hash table, specifying the type of its keys and values.
+Before using the table you will have to initialize it.  After initialization the
+table will be immutable.
+
+table_handle: Handle to a table.
+container: If non-empty, this table is placed in the given container.
+  Otherwise, a default container is used.
+shared_name: If non-empty, this table is shared under the given name across
+  multiple sessions.
+use_node_name_sharing: If true and shared_name is empty, the table is shared
+  using the node name.
+key_dtype: Type of the table keys.
+value_dtype: Type of the table values.
+)doc");
 
 REGISTER_OP("HashTableV2")
     .Output("table_handle: resource")
@@ -229,7 +340,24 @@ REGISTER_OP("HashTableV2")
     .Attr("key_dtype: type")
     .Attr("value_dtype: type")
     .SetIsStateful()
-    .SetShapeFn(ScalarOutput);
+    .SetShapeFn(ScalarOutput)
+    .Doc(R"doc(
+Creates a non-initialized hash table.
+
+This op creates a hash table, specifying the type of its keys and values.
+Before using the table you will have to initialize it.  After initialization the
+table will be immutable.
+
+table_handle: Handle to a table.
+container: If non-empty, this table is placed in the given container.
+  Otherwise, a default container is used.
+shared_name: If non-empty, this table is shared under the given name across
+  multiple sessions.
+use_node_name_sharing: If true and shared_name is empty, the table is shared
+  using the node name.
+key_dtype: Type of the table keys.
+value_dtype: Type of the table values.
+)doc");
 
 REGISTER_OP("MutableHashTable")
     .Output("table_handle: Ref(string)")
@@ -239,7 +367,24 @@ REGISTER_OP("MutableHashTable")
     .Attr("key_dtype: type")
     .Attr("value_dtype: type")
     .SetIsStateful()
-    .SetShapeFn(TwoElementOutput);
+    .SetShapeFn(TwoElementOutput)
+    .Doc(R"doc(
+Creates an empty hash table.
+
+This op creates a mutable hash table, specifying the type of its keys and
+values. Each value must be a scalar. Data can be inserted into the table using
+the insert operations. It does not support the initialization operation.
+
+table_handle: Handle to a table.
+container: If non-empty, this table is placed in the given container.
+  Otherwise, a default container is used.
+shared_name: If non-empty, this table is shared under the given name across
+  multiple sessions.
+use_node_name_sharing: If true and shared_name is empty, the table is shared
+  using the node name.
+key_dtype: Type of the table keys.
+value_dtype: Type of the table values.
+)doc");
 
 REGISTER_OP("MutableHashTableV2")
     .Output("table_handle: resource")
@@ -249,7 +394,24 @@ REGISTER_OP("MutableHashTableV2")
     .Attr("key_dtype: type")
     .Attr("value_dtype: type")
     .SetIsStateful()
-    .SetShapeFn(ScalarOutput);
+    .SetShapeFn(ScalarOutput)
+    .Doc(R"doc(
+Creates an empty hash table.
+
+This op creates a mutable hash table, specifying the type of its keys and
+values. Each value must be a scalar. Data can be inserted into the table using
+the insert operations. It does not support the initialization operation.
+
+table_handle: Handle to a table.
+container: If non-empty, this table is placed in the given container.
+  Otherwise, a default container is used.
+shared_name: If non-empty, this table is shared under the given name across
+  multiple sessions.
+use_node_name_sharing: If true and shared_name is empty, the table is shared
+  using the node name.
+key_dtype: Type of the table keys.
+value_dtype: Type of the table values.
+)doc");
 
 REGISTER_OP("MutableHashTableOfTensors")
     .Output("table_handle: Ref(string)")
@@ -260,7 +422,22 @@ REGISTER_OP("MutableHashTableOfTensors")
     .Attr("value_dtype: type")
     .Attr("value_shape: shape = {}")
     .SetIsStateful()
-    .SetShapeFn(TwoElementOutput);
+    .SetShapeFn(TwoElementOutput)
+    .Doc(R"doc(
+Creates an empty hash table.
+
+This op creates a mutable hash table, specifying the type of its keys and
+values. Each value must be a vector. Data can be inserted into the table using
+the insert operations. It does not support the initialization operation.
+
+table_handle: Handle to a table.
+container: If non-empty, this table is placed in the given container.
+  Otherwise, a default container is used.
+shared_name: If non-empty, this table is shared under the given name across
+  multiple sessions.
+key_dtype: Type of the table keys.
+value_dtype: Type of the table values.
+)doc");
 
 REGISTER_OP("MutableHashTableOfTensorsV2")
     .Output("table_handle: resource")
@@ -271,7 +448,22 @@ REGISTER_OP("MutableHashTableOfTensorsV2")
     .Attr("value_dtype: type")
     .Attr("value_shape: shape = {}")
     .SetIsStateful()
-    .SetShapeFn(ScalarOutput);
+    .SetShapeFn(ScalarOutput)
+    .Doc(R"doc(
+Creates an empty hash table.
+
+This op creates a mutable hash table, specifying the type of its keys and
+values. Each value must be a vector. Data can be inserted into the table using
+the insert operations. It does not support the initialization operation.
+
+table_handle: Handle to a table.
+container: If non-empty, this table is placed in the given container.
+  Otherwise, a default container is used.
+shared_name: If non-empty, this table is shared under the given name across
+  multiple sessions.
+key_dtype: Type of the table keys.
+value_dtype: Type of the table values.
+)doc");
 
 REGISTER_OP("MutableDenseHashTable")
     .Input("empty_key: key_dtype")
@@ -285,7 +477,32 @@ REGISTER_OP("MutableDenseHashTable")
     .Attr("initial_num_buckets: int = 131072")  // 2^17
     .Attr("max_load_factor: float = 0.8")
     .SetIsStateful()
-    .SetShapeFn(TwoElementOutput);
+    .SetShapeFn(TwoElementOutput)
+    .Doc(R"doc(
+Creates an empty hash table that uses tensors as the backing store.
+
+It uses "open addressing" with quadratic reprobing to resolve
+collisions.
+
+This op creates a mutable hash table, specifying the type of its keys and
+values. Each value must be a scalar. Data can be inserted into the table using
+the insert operations. It does not support the initialization operation.
+
+empty_key: The key used to represent empty key buckets internally. Must not
+  be used in insert or lookup operations.
+table_handle: Handle to a table.
+container: If non-empty, this table is placed in the given container.
+  Otherwise, a default container is used.
+shared_name: If non-empty, this table is shared under the given name across
+  multiple sessions.
+key_dtype: Type of the table keys.
+value_dtype: Type of the table values.
+value_shape: The shape of each value.
+initial_num_buckets: The initial number of hash table buckets. Must be a power
+  to 2.
+max_load_factor: The maximum ratio between number of entries and number of
+  buckets before growing the table. Must be between 0 and 1.
+)doc");
 
 REGISTER_OP("MutableDenseHashTableV2")
     .Input("empty_key: key_dtype")
@@ -299,7 +516,32 @@ REGISTER_OP("MutableDenseHashTableV2")
     .Attr("initial_num_buckets: int = 131072")  // 2^17
     .Attr("max_load_factor: float = 0.8")
     .SetIsStateful()
-    .SetShapeFn(ScalarOutput);
+    .SetShapeFn(ScalarOutput)
+    .Doc(R"doc(
+Creates an empty hash table that uses tensors as the backing store.
+
+It uses "open addressing" with quadratic reprobing to resolve
+collisions.
+
+This op creates a mutable hash table, specifying the type of its keys and
+values. Each value must be a scalar. Data can be inserted into the table using
+the insert operations. It does not support the initialization operation.
+
+empty_key: The key used to represent empty key buckets internally. Must not
+  be used in insert or lookup operations.
+table_handle: Handle to a table.
+container: If non-empty, this table is placed in the given container.
+  Otherwise, a default container is used.
+shared_name: If non-empty, this table is shared under the given name across
+  multiple sessions.
+key_dtype: Type of the table keys.
+value_dtype: Type of the table values.
+value_shape: The shape of each value.
+initial_num_buckets: The initial number of hash table buckets. Must be a power
+  to 2.
+max_load_factor: The maximum ratio between number of entries and number of
+  buckets before growing the table. Must be between 0 and 1.
+)doc");
 
 REGISTER_OP("InitializeTable")
     .Input("table_handle: Ref(string)")
@@ -317,7 +559,14 @@ REGISTER_OP("InitializeTable")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &keys));
       TF_RETURN_IF_ERROR(c->Merge(keys, c->input(2), &keys));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Table initializer that takes two tensors for keys and values respectively.
+
+table_handle: Handle to a table which will be initialized.
+keys: Keys of type Tkey.
+values: Values of type Tval.
+)doc");
 
 REGISTER_OP("InitializeTableV2")
     .Input("table_handle: resource")
@@ -333,7 +582,14 @@ REGISTER_OP("InitializeTableV2")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &keys));
       TF_RETURN_IF_ERROR(c->Merge(keys, c->input(2), &keys));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Table initializer that takes two tensors for keys and values respectively.
+
+table_handle: Handle to a table which will be initialized.
+keys: Keys of type Tkey.
+values: Values of type Tval.
+)doc");
 
 REGISTER_OP("InitializeTableFromTextFile")
     .Input("table_handle: Ref(string)")
@@ -350,7 +606,29 @@ REGISTER_OP("InitializeTableFromTextFile")
 
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &handle));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Initializes a table from a text file.
+
+It inserts one key-value pair into the table for each line of the file.
+The key and value is extracted from the whole line content, elements from the
+split line based on `delimiter` or the line number (starting from zero).
+Where to extract the key and value from a line is specified by `key_index` and
+`value_index`.
+
+- A value of -1 means use the line number(starting from zero), expects `int64`.
+- A value of -2 means use the whole line content, expects `string`.
+- A value >= 0 means use the index (starting at zero) of the split line based
+  on `delimiter`.
+
+table_handle: Handle to a table which will be initialized.
+filename: Filename of a vocabulary text file.
+key_index: Column index in a line to get the table `key` values from.
+value_index: Column index that represents information of a line to get the table
+  `value` values from.
+vocab_size: Number of elements of the file, use -1 if unknown.
+delimiter: Delimiter to separate fields in a line.
+)doc");
 
 REGISTER_OP("InitializeTableFromTextFileV2")
     .Input("table_handle: resource")
@@ -365,6 +643,28 @@ REGISTER_OP("InitializeTableFromTextFileV2")
 
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &handle));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Initializes a table from a text file.
+
+It inserts one key-value pair into the table for each line of the file.
+The key and value is extracted from the whole line content, elements from the
+split line based on `delimiter` or the line number (starting from zero).
+Where to extract the key and value from a line is specified by `key_index` and
+`value_index`.
+
+- A value of -1 means use the line number(starting from zero), expects `int64`.
+- A value of -2 means use the whole line content, expects `string`.
+- A value >= 0 means use the index (starting at zero) of the split line based
+  on `delimiter`.
+
+table_handle: Handle to a table which will be initialized.
+filename: Filename of a vocabulary text file.
+key_index: Column index in a line to get the table `key` values from.
+value_index: Column index that represents information of a line to get the table
+  `value` values from.
+vocab_size: Number of elements of the file, use -1 if unknown.
+delimiter: Delimiter to separate fields in a line.
+)doc");
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc
index dd484c3ee7..8ea170ba14 100644
--- a/tensorflow/core/ops/math_ops.cc
+++ b/tensorflow/core/ops/math_ops.cc
@@ -40,7 +40,12 @@ REGISTER_OP("AddN")
       }
       c->set_output(0, cur);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Add all input tensors element wise.
+
+inputs: Must all be the same size and shape.
+)doc");
 
 // --------------------------------------------------------------------------
 
@@ -57,7 +62,22 @@ REGISTER_OP("AccumulateNV2")
     .Attr("shape: shape")
     .SetIsCommutative()
     .SetIsAggregate()
-    .SetShapeFn(shape_inference::ExplicitShape);
+    .SetShapeFn(shape_inference::ExplicitShape)
+    .Doc(R"doc(
+Returns the element-wise sum of a list of tensors.
+
+`tf.accumulate_n_v2` performs the same operation as `tf.add_n`, but does not
+wait for all of its inputs to be ready before beginning to sum. This can
+save memory if inputs are ready at different times, since minimum temporary
+storage is proportional to the output size rather than the inputs size.
+
+Unlike the original `accumulate_n`, `accumulate_n_v2` is differentiable.
+
+Returns a `Tensor` of same shape and type as the elements of `inputs`.
+
+inputs: A list of `Tensor` objects, each with same shape and type.
+shape: Shape of elements of `inputs`.
+)doc");
 
 // --------------------------------------------------------------------------
 
@@ -100,7 +120,35 @@ REGISTER_OP("BatchMatMul")
           batch_dims, c->Matrix(output_rows, output_cols), &out));
       c->set_output(0, out);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Multiplies slices of two tensors in batches.
+
+Multiplies all slices of `Tensor` `x` and `y` (each slice can be
+viewed as an element of a batch), and arranges the individual results
+in a single output tensor of the same batch size. Each of the
+individual slices can optionally be adjointed (to adjoint a matrix
+means to transpose and conjugate it) before multiplication by setting
+the `adj_x` or `adj_y` flag to `True`, which are by default `False`.
+
+The input tensors `x` and `y` are 2-D or higher with shape `[..., r_x, c_x]`
+and `[..., r_y, c_y]`.
+
+The output tensor is 2-D or higher with shape `[..., r_o, c_o]`, where:
+
+    r_o = c_x if adj_x else r_x
+    c_o = r_y if adj_y else c_y
+
+It is computed as:
+
+    output[..., :, :] = matrix(x[..., :, :]) * matrix(y[..., :, :])
+
+x: 2-D or higher with shape `[..., r_x, c_x]`.
+y: 2-D or higher with shape `[..., r_y, c_y]`.
+output: 3-D or higher with shape `[..., r_o, c_o]`
+adj_x: If `True`, adjoint the slices of `x`. Defaults to `False`.
+adj_y: If `True`, adjoint the slices of `y`. Defaults to `False`.
+)doc");
 
 // --------------------------------------------------------------------------
 // Casting Ops
@@ -114,7 +162,10 @@ REGISTER_OP("Cast")
     .Output("y: DstT")
     .Attr("SrcT: type")
     .Attr("DstT: type")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Cast x of type SrcT to y of DstT.
+)doc");
 
 REGISTER_OP("_HostCast")
     .Input("x: SrcT")
@@ -134,14 +185,29 @@ REGISTER_OP("Abs")
     .Input("x: T")
     .Output("y: T")
     .Attr("T: {half, bfloat16, float, double, int32, int64}")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Computes the absolute value of a tensor.
+
+Given a tensor `x`, this operation returns a tensor containing the absolute
+value of each element in `x`. For example, if x is an input element and y is
+an output element, this operation computes \\(y = |x|\\).
+)doc");
 
 REGISTER_OP("ComplexAbs")
     .Input("x: T")
     .Output("y: Tout")
     .Attr("T: {complex64, complex128} = DT_COMPLEX64")
     .Attr("Tout: {float, double} = DT_FLOAT")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Computes the complex absolute value of a tensor.
+
+Given a tensor `x` of complex numbers, this operation returns a tensor of type
+`float` or `double` that is the absolute value of each element in `x`. All
+elements in `x` must be complex numbers of the form \\(a + bj\\). The absolute
+value is computed as \\( \sqrt{a^2 + b^2}\\).
+)doc");
 
 // Declares cwise unary operations signature: 't -> 't
 #define UNARY()                                                          \
@@ -171,73 +237,244 @@ REGISTER_OP("ComplexAbs")
       .Attr("T: {half, bfloat16, float, double, complex64, complex128}") \
       .SetShapeFn(shape_inference::UnchangedShape)
 
-REGISTER_OP("Neg").UNARY();
+REGISTER_OP("Neg")
+    .UNARY()
+    .Doc(R"doc(
+Computes numerical negative value element-wise.
+I.e., \\(y = -x\\).
+)doc");
 
-REGISTER_OP("Inv").UNARY();
+REGISTER_OP("Inv")
+    .UNARY()
+    .Doc(R"doc(
+Computes the reciprocal of x element-wise.
+I.e., \\(y = 1 / x\\).
+)doc")
+    .Deprecated(17, "Use Reciprocal");
 
-REGISTER_OP("InvGrad").UNARY_GRADIENT_COMPLEX();
+REGISTER_OP("InvGrad")
+    .UNARY_GRADIENT_COMPLEX()
+    .Doc(R"doc(
+Computes the gradient for the inverse of `x` wrt its input.
 
-REGISTER_OP("Reciprocal").UNARY();
+Specifically, `grad = -dy * y*y`, where `y = 1/x`, and `dy`
+is the corresponding input gradient.
+)doc")
+    .Deprecated(17, "Use ReciprocalGrad");
 
-REGISTER_OP("ReciprocalGrad").UNARY_GRADIENT_COMPLEX();
+REGISTER_OP("Reciprocal")
+    .UNARY()
+    .Doc(R"doc(
+Computes the reciprocal of x element-wise.
+I.e., \\(y = 1 / x\\).
+)doc");
 
-REGISTER_OP("Square").UNARY();
+REGISTER_OP("ReciprocalGrad")
+    .UNARY_GRADIENT_COMPLEX()
+    .Doc(R"doc(
+Computes the gradient for the inverse of `x` wrt its input.
 
-REGISTER_OP("Sqrt").UNARY_COMPLEX();
+Specifically, `grad = -dy * y*y`, where `y = 1/x`, and `dy`
+is the corresponding input gradient.
+)doc");
 
-REGISTER_OP("SqrtGrad").UNARY_GRADIENT_COMPLEX();
+REGISTER_OP("Square")
+    .UNARY()
+    .Doc(R"doc(
+Computes square of x element-wise.
+I.e., \\(y = x * x = x^2\\).
+)doc");
 
-REGISTER_OP("Rsqrt").UNARY_COMPLEX();
+REGISTER_OP("Sqrt")
+    .UNARY_COMPLEX()
+    .Doc(R"doc(
+Computes square root of x element-wise.
+I.e., \\(y = \sqrt{x} = x^{1/2}\\).
+)doc");
 
-REGISTER_OP("Round").UNARY();
+REGISTER_OP("SqrtGrad")
+    .UNARY_GRADIENT_COMPLEX()
+    .Doc(R"doc(
+Computes the gradient for the sqrt of `x` wrt its input.
 
-REGISTER_OP("RsqrtGrad").UNARY_GRADIENT_COMPLEX();
+Specifically, `grad = dy * 0.5 / y`, where `y = sqrt(x)`, and `dy`
+is the corresponding input gradient.
+)doc");
 
-REGISTER_OP("Exp").UNARY_COMPLEX();
+REGISTER_OP("Rsqrt")
+    .UNARY_COMPLEX()
+    .Doc(R"doc(
+Computes reciprocal of square root of x element-wise.
+I.e., \\(y = 1 / \sqrt{x}\\).
+)doc");
 
-REGISTER_OP("Expm1").UNARY_COMPLEX();
+REGISTER_OP("Round")
+    .UNARY()
+    .Doc(R"doc(
+Rounds the values of a tensor to the nearest integer, element-wise.
 
-REGISTER_OP("Log").UNARY_COMPLEX();
+Rounds half to even.  Also known as bankers rounding. If you want to round
+according to the current system rounding mode use std::cint.
+)doc");
+
+REGISTER_OP("RsqrtGrad")
+    .UNARY_GRADIENT_COMPLEX()
+    .Doc(R"doc(
+Computes the gradient for the rsqrt of `x` wrt its input.
 
-REGISTER_OP("Log1p").UNARY_COMPLEX();
+Specifically, `grad = dy * -0.5 * y^3`, where `y = rsqrt(x)`, and `dy`
+is the corresponding input gradient.
+)doc");
+
+REGISTER_OP("Exp")
+    .UNARY_COMPLEX()
+    .Doc(R"doc(
+Computes exponential of x element-wise.  \\(y = e^x\\).
+)doc");
+
+REGISTER_OP("Expm1")
+    .UNARY_COMPLEX()
+    .Doc(R"doc(
+Computes exponential of x - 1 element-wise.
+I.e., \\(y = (\exp x) - 1\\).
+)doc");
+
+REGISTER_OP("Log")
+    .UNARY_COMPLEX()
+    .Doc(R"doc(
+Computes natural logarithm of x element-wise.
+I.e., \\(y = \log_e x\\).
+)doc");
+
+REGISTER_OP("Log1p")
+    .UNARY_COMPLEX()
+    .Doc(R"doc(
+Computes natural logarithm of (1 + x) element-wise.
+I.e., \\(y = \log_e (1 + x)\\).
+)doc");
+
+REGISTER_OP("Sinh")
+    .UNARY_COMPLEX()
+    .Doc(R"doc(
+Computes hyperbolic sine of x element-wise.
+)doc");
+
+REGISTER_OP("Cosh")
+    .UNARY_COMPLEX()
+    .Doc(R"doc(
+Computes hyperbolic cosine of x element-wise.
+)doc");
+
+REGISTER_OP("Tanh")
+    .UNARY_COMPLEX()
+    .Doc(R"doc(
+Computes hyperbolic tangent of `x` element-wise.
+)doc");
 
-REGISTER_OP("Sinh").UNARY_COMPLEX();
+REGISTER_OP("Asinh")
+    .UNARY_COMPLEX()
+    .Doc(R"doc(
+Computes inverse hyperbolic sine of x element-wise.
+)doc");
 
-REGISTER_OP("Cosh").UNARY_COMPLEX();
+REGISTER_OP("Acosh")
+    .UNARY_COMPLEX()
+    .Doc(R"doc(
+Computes inverse hyperbolic cosine of x element-wise.
+)doc");
 
-REGISTER_OP("Tanh").UNARY_COMPLEX();
+REGISTER_OP("Atanh")
+    .UNARY_COMPLEX()
+    .Doc(R"doc(
+Computes inverse hyperbolic tangent of x element-wise.
+)doc");
 
-REGISTER_OP("Asinh").UNARY_COMPLEX();
+REGISTER_OP("TanhGrad")
+    .UNARY_GRADIENT_COMPLEX()
+    .Doc(R"doc(
+Computes the gradient for the tanh of `x` wrt its input.
 
-REGISTER_OP("Acosh").UNARY_COMPLEX();
+Specifically, `grad = dy * (1 - y*y)`, where `y = tanh(x)`, and `dy`
+is the corresponding input gradient.
+)doc");
 
-REGISTER_OP("Atanh").UNARY_COMPLEX();
+REGISTER_OP("Lgamma")
+    .UNARY_REAL()
+    .Doc(R"doc(
+Computes the log of the absolute value of `Gamma(x)` element-wise.
+)doc");
 
-REGISTER_OP("TanhGrad").UNARY_GRADIENT_COMPLEX();
+REGISTER_OP("Digamma")
+    .UNARY_REAL()
+    .Doc(R"doc(
+Computes Psi, the derivative of Lgamma (the log of the absolute value of
+`Gamma(x)`), element-wise.
+)doc");
 
-REGISTER_OP("Lgamma").UNARY_REAL();
+REGISTER_OP("Erf")
+    .UNARY_REAL()
+    .Doc(R"doc(
+Computes the Gauss error function of `x` element-wise.
+)doc");
 
-REGISTER_OP("Digamma").UNARY_REAL();
+REGISTER_OP("Erfc")
+    .UNARY_REAL()
+    .Doc(R"doc(
+Computes the complementary error function of `x` element-wise.
+)doc");
 
-REGISTER_OP("Erf").UNARY_REAL();
+REGISTER_OP("Sigmoid")
+    .UNARY_COMPLEX()
+    .Doc(R"doc(
+Computes sigmoid of `x` element-wise.
 
-REGISTER_OP("Erfc").UNARY_REAL();
+Specifically, `y = 1 / (1 + exp(-x))`.
+)doc");
 
-REGISTER_OP("Sigmoid").UNARY_COMPLEX();
+REGISTER_OP("SigmoidGrad")
+    .UNARY_GRADIENT_COMPLEX()
+    .Doc(R"doc(
+Computes the gradient of the sigmoid of `x` wrt its input.
 
-REGISTER_OP("SigmoidGrad").UNARY_GRADIENT_COMPLEX();
+Specifically, `grad = dy * y * (1 - y)`, where `y = sigmoid(x)`, and
+`dy` is the corresponding input gradient.
+)doc");
 
-REGISTER_OP("Sin").UNARY_COMPLEX();
+REGISTER_OP("Sin")
+    .UNARY_COMPLEX()
+    .Doc(R"doc(
+Computes sin of x element-wise.
+)doc");
 
-REGISTER_OP("Cos").UNARY_COMPLEX();
+REGISTER_OP("Cos")
+    .UNARY_COMPLEX()
+    .Doc(R"doc(
+Computes cos of x element-wise.
+)doc");
 
-REGISTER_OP("Tan").UNARY();
+REGISTER_OP("Tan")
+    .UNARY()
+    .Doc(R"doc(
+Computes tan of x element-wise.
+)doc");
 
-REGISTER_OP("Asin").UNARY();
+REGISTER_OP("Asin")
+    .UNARY()
+    .Doc(R"doc(
+Computes asin of x element-wise.
+)doc");
 
-REGISTER_OP("Acos").UNARY();
+REGISTER_OP("Acos")
+    .UNARY()
+    .Doc(R"doc(
+Computes acos of x element-wise.
+)doc");
 
-REGISTER_OP("Atan").UNARY();
+REGISTER_OP("Atan")
+    .UNARY()
+    .Doc(R"doc(
+Computes atan of x element-wise.
+)doc");
 
 #undef UNARY
 #undef UNARY_REAL
@@ -247,19 +484,40 @@ REGISTER_OP("IsNan")
     .Input("x: T")
     .Output("y: bool")
     .Attr("T: {half, bfloat16, float, double}")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Returns which elements of x are NaN.
+
+@compatibility(numpy)
+Equivalent to np.isnan
+@end_compatibility
+)doc");
 
 REGISTER_OP("IsInf")
     .Input("x: T")
     .Output("y: bool")
     .Attr("T: {half, bfloat16, float, double}")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Returns which elements of x are Inf.
+
+@compatibility(numpy)
+Equivalent to np.isinf
+@end_compatibility
+)doc");
 
 REGISTER_OP("IsFinite")
     .Input("x: T")
     .Output("y: bool")
     .Attr("T: {half, bfloat16, float, double}")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Returns which elements of x are finite.
+
+@compatibility(numpy)
+Equivalent to np.isfinite
+@end_compatibility
+)doc");
 
 REGISTER_OP("Sign")
     .Input("x: T")
@@ -267,25 +525,51 @@ REGISTER_OP("Sign")
     .Attr(
         "T: {half, bfloat16, float, double, int32, int64, complex64, "
         "complex128}")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Returns an element-wise indication of the sign of a number.
+
+`y = sign(x) = -1` if `x < 0`; 0 if `x == 0`; 1 if `x > 0`.
+
+For complex numbers, `y = sign(x) = x / |x|` if `x != 0`, otherwise `y = 0`.
+)doc");
 
 REGISTER_OP("Floor")
     .Input("x: T")
     .Output("y: T")
     .Attr("T: {half, bfloat16, float, double}")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Returns element-wise largest integer not greater than x.
+)doc");
 
 REGISTER_OP("Ceil")
     .Input("x: T")
     .Output("y: T")
     .Attr("T: {half, bfloat16, float, double}")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Returns element-wise smallest integer in not less than x.
+)doc");
 
 REGISTER_OP("Rint")
     .Input("x: T")
     .Output("y: T")
     .Attr("T: {bfloat16, float, double}")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Returns element-wise integer closest to x.
+
+If the result is midway between two representable values,
+the even representable is chosen.
+For example:
+
+```
+rint(-1.5) ==> -2.0
+rint(0.5000001) ==> 1.0
+rint([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) ==> [-2., -2., -0., 0., 2., 2., 2.]
+```
+)doc");
 
 // Declares cwise binary operations signature: 't, 't -> 't.
 
@@ -306,7 +590,13 @@ REGISTER_OP("Add")
     .Attr(
         "T: {half, bfloat16, float, double, uint8, int8, int16, int32, int64, "
         "complex64, complex128, string}")
-    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
+    .Doc(R"doc(
+Returns x + y element-wise.
+
+*NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+)doc");
 
 // TODO(rmlarsen): Add a Python wrapper that swiches non-string instances to
 // use AddV2 (b/68646025).
@@ -319,7 +609,13 @@ REGISTER_OP("AddV2")
         "complex64, complex128}")
     .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
     .SetIsAggregate()
-    .SetIsCommutative();
+    .SetIsCommutative()
+    .Doc(R"doc(
+Returns x + y element-wise.
+
+*NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+)doc");
 
 REGISTER_OP("_MklAdd")
     .Input("x: T")
@@ -339,8 +635,15 @@ Returns x + y element-wise.
 [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 )doc");
 
-REGISTER_OP("Sub").BINARY_MORE().SetShapeFn(
-    shape_inference::BroadcastBinaryOpShapeFn);
+REGISTER_OP("Sub")
+    .BINARY_MORE()
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
+    .Doc(R"doc(
+Returns x - y element-wise.
+
+*NOTE*: `Sub` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+)doc");
 
 REGISTER_OP("_MklSub")
     .BINARY_FEWER()
@@ -355,8 +658,16 @@ Returns x - y element-wise.
 [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 )doc");
 
-REGISTER_OP("Mul").BINARY_MORE().SetIsCommutative().SetShapeFn(
-    shape_inference::BroadcastBinaryOpShapeFn);
+REGISTER_OP("Mul")
+    .BINARY_MORE()
+    .SetIsCommutative()
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
+    .Doc(R"doc(
+Returns x * y element-wise.
+
+*NOTE*: `Mul` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+)doc");
 
 REGISTER_OP("_MklMul")
     .BINARY_MORE()
@@ -372,24 +683,63 @@ Returns x * y element-wise.
 [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 )doc");
 
-REGISTER_OP("Div").BINARY_MORE().SetShapeFn(
-    shape_inference::BroadcastBinaryOpShapeFn);
+REGISTER_OP("Div")
+    .BINARY_MORE()
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
+    .Doc(R"doc(
+Returns x / y element-wise.
+
+*NOTE*: `Div` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+)doc");
 
 REGISTER_OP("FloorDiv")
     .BINARY_MORE()
-    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
+    .Doc(R"doc(
+Returns x // y element-wise.
+
+*NOTE*: `FloorDiv` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+)doc");
 
 REGISTER_OP("TruncateDiv")
     .BINARY_MORE()
-    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
+    .Doc(R"doc(
+Returns x / y element-wise for integer types.
 
-REGISTER_OP("RealDiv").BINARY_MORE().SetShapeFn(
-    shape_inference::BroadcastBinaryOpShapeFn);
+Truncation designates that negative numbers will round fractional quantities
+toward zero. I.e. -7 / 5 = -1. This matches C semantics but it is different
+than Python semantics. See `FloorDiv` for a division function that matches
+Python Semantics.
+
+*NOTE*: `TruncateDiv` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+)doc");
+
+REGISTER_OP("RealDiv")
+    .BINARY_MORE()
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
+    .Doc(R"doc(
+Returns x / y element-wise for real types.
+
+If `x` and `y` are reals, this will return the floating-point division.
+
+*NOTE*: `Div` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+)doc");
 
 REGISTER_OP("SquaredDifference")
     .BINARY_FEWER()
     .SetIsCommutative()
-    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
+    .Doc(R"doc(
+Returns (x - y)(x - y) element-wise.
+
+*NOTE*: `SquaredDifference` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+)doc");
 
 REGISTER_OP("_MklSquaredDifference")
     .BINARY_FEWER()
@@ -414,7 +764,13 @@ REGISTER_OP("Maximum")
     .Output("z: T")
     .Attr("T: {half, bfloat16, float, double, int32, int64}")
     .SetIsCommutative()
-    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
+    .Doc(R"doc(
+Returns the max of x and y (i.e. x > y ? x : y) element-wise.
+
+*NOTE*: `Maximum` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+)doc");
 
 REGISTER_OP("_MklMaximum")
     .Input("x: T")
@@ -439,28 +795,58 @@ REGISTER_OP("Minimum")
     .Output("z: T")
     .Attr("T: {half, bfloat16, float, double, int32, int64}")
     .SetIsCommutative()
-    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
+    .Doc(R"doc(
+Returns the min of x and y (i.e. x < y ? x : y) element-wise.
+
+*NOTE*: `Minimum` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+)doc");
 
 REGISTER_OP("Mod")
     .Input("x: T")
     .Input("y: T")
     .Output("z: T")
     .Attr("T: {int32, int64, bfloat16, float, double}")
-    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
+    .Doc(R"doc(
+Returns element-wise remainder of division. This emulates C semantics in that
+the result here is consistent with a truncating divide. E.g.
+`tf.truncatediv(x, y) * y + truncate_mod(x, y) = x`.
+
+*NOTE*: `Mod` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+)doc");
 
 REGISTER_OP("FloorMod")
     .Input("x: T")
     .Input("y: T")
     .Output("z: T")
     .Attr("T: {int32, int64, bfloat16, float, double}")
-    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
+    .Doc(R"doc(
+Returns element-wise remainder of division. When `x < 0` xor `y < 0` is
+true, this follows Python semantics in that the result here is consistent
+with a flooring divide. E.g. `floor(x / y) * y + mod(x, y) = x`.
+
+*NOTE*: `FloorMod` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+)doc");
 
 REGISTER_OP("TruncateMod")
     .Input("x: T")
     .Input("y: T")
     .Output("z: T")
     .Attr("T: {int32, int64, bfloat16, float, double}")
-    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
+    .Doc(R"doc(
+Returns element-wise remainder of division. This emulates C semantics in that
+the result here is consistent with a truncating divide. E.g. `truncate(x / y) *
+y + truncate_mod(x, y) = x`.
+
+*NOTE*: `TruncateMod` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+)doc");
 
 REGISTER_OP("Pow")
     .Input("x: T")
@@ -469,42 +855,114 @@ REGISTER_OP("Pow")
     .Attr(
         "T: {half, bfloat16, float, double, int32, int64, complex64, "
         "complex128}")
-    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
+    .Doc(R"doc(
+Computes the power of one value to another.
+
+Given a tensor `x` and a tensor `y`, this operation computes \\(x^y\\) for
+corresponding elements in `x` and `y`. For example:
+
+```
+# tensor 'x' is [[2, 2]], [3, 3]]
+# tensor 'y' is [[8, 16], [2, 3]]
+tf.pow(x, y) ==> [[256, 65536], [9, 27]]
+```
+)doc");
 
 REGISTER_OP("Igammac")
     .Input("a: T")
     .Input("x: T")
     .Output("z: T")
     .Attr("T: {float, double}")
-    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
+    .Doc(R"doc(
+Compute the upper regularized incomplete Gamma function `Q(a, x)`.
+
+The upper regularized incomplete Gamma function is defined as:
+
+\\(Q(a, x) = Gamma(a, x) / Gamma(a) = 1 - P(a, x)\\)
+
+where
+
+\\(Gamma(a, x) = int_{x}^{\infty} t^{a-1} exp(-t) dt\\)
+
+is the upper incomplete Gama function.
+
+Note, above `P(a, x)` (`Igamma`) is the lower regularized complete
+Gamma function.
+)doc");
 
 REGISTER_OP("Igamma")
     .Input("a: T")
     .Input("x: T")
     .Output("z: T")
     .Attr("T: {float, double}")
-    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
+    .Doc(R"doc(
+Compute the lower regularized incomplete Gamma function `Q(a, x)`.
+
+The lower regularized incomplete Gamma function is defined as:
+
+
+\\(P(a, x) = gamma(a, x) / Gamma(a) = 1 - Q(a, x)\\)
+
+where
+
+\\(gamma(a, x) = int_{0}^{x} t^{a-1} exp(-t) dt\\)
+
+is the lower incomplete Gamma function.
+
+Note, above `Q(a, x)` (`Igammac`) is the upper regularized complete
+Gamma function.
+)doc");
 
 REGISTER_OP("Zeta")
     .Input("x: T")
     .Input("q: T")
     .Output("z: T")
     .Attr("T: {float, double}")
-    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
+    .Doc(R"doc(
+Compute the Hurwitz zeta function \\(\zeta(x, q)\\).
+
+The Hurwitz zeta function is defined as:
+
+
+\\(\zeta(x, q) = \sum_{n=0}^{\infty} (q + n)^{-x}\\)
+
+)doc");
 
 REGISTER_OP("Polygamma")
     .Input("a: T")
     .Input("x: T")
     .Output("z: T")
     .Attr("T: {float, double}")
-    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
+    .Doc(R"doc(
+Compute the polygamma function \\(\psi^{(n)}(x)\\).
+
+The polygamma function is defined as:
+
+
+\\(\psi^{(n)}(x) = \frac{d^n}{dx^n} \psi(x)\\)
+
+where \\(\psi(x)\\) is the digamma function.
+)doc");
 
 REGISTER_OP("Atan2")
     .Input("y: T")
     .Input("x: T")
     .Output("z: T")
     .Attr("T: {bfloat16, float, double}")
-    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
+    .Doc(R"doc(
+Computes arctangent of `y/x` element-wise, respecting signs of the arguments.
+This is the angle \( \theta \in [-\pi, \pi] \) such that
+\[ x = r \cos(\theta) \]
+and
+\[ y = r \sin(\theta) \]
+where \(r = \sqrt(x^2 + y^2) \).
+)doc");
 
 REGISTER_OP("Betainc")
     .Input("a: T")
@@ -543,7 +1001,24 @@ REGISTER_OP("Betainc")
 
       c->set_output(0, output);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Compute the regularized incomplete beta integral \\(I_x(a, b)\\).
+
+The regularized incomplete beta integral is defined as:
+
+
+\\(I_x(a, b) = \frac{B(x; a, b)}{B(a, b)}\\)
+
+where
+
+
+\\(B(x; a, b) = \int_0^x t^{a-1} (1 - t)^{b-1} dt\\)
+
+
+is the incomplete beta function and \\(B(a, b)\\) is the *complete*
+beta function.
+)doc");
 
 // --------------------------------------------------------------------------
 
@@ -556,13 +1031,41 @@ REGISTER_OP("Betainc")
       .Attr("T: realnumbertype") \
       .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
 
-REGISTER_OP("Less").COMPARISON();
+REGISTER_OP("Less")
+    .COMPARISON()
+    .Doc(R"doc(
+Returns the truth value of (x < y) element-wise.
 
-REGISTER_OP("LessEqual").COMPARISON();
+*NOTE*: `Less` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+)doc");
 
-REGISTER_OP("Greater").COMPARISON();
+REGISTER_OP("LessEqual")
+    .COMPARISON()
+    .Doc(R"doc(
+Returns the truth value of (x <= y) element-wise.
 
-REGISTER_OP("GreaterEqual").COMPARISON();
+*NOTE*: `LessEqual` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+)doc");
+
+REGISTER_OP("Greater")
+    .COMPARISON()
+    .Doc(R"doc(
+Returns the truth value of (x > y) element-wise.
+
+*NOTE*: `Greater` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+)doc");
+
+REGISTER_OP("GreaterEqual")
+    .COMPARISON()
+    .Doc(R"doc(
+Returns the truth value of (x >= y) element-wise.
+
+*NOTE*: `GreaterEqual` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+)doc");
 
 #undef COMPARISON
 
@@ -579,9 +1082,23 @@ REGISTER_OP("GreaterEqual").COMPARISON();
           "complex128}")                                                   \
       .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
 
-REGISTER_OP("Equal").EQUALITY_COMPARISON();
+REGISTER_OP("Equal")
+    .EQUALITY_COMPARISON()
+    .Doc(R"doc(
+Returns the truth value of (x == y) element-wise.
 
-REGISTER_OP("NotEqual").EQUALITY_COMPARISON();
+*NOTE*: `Equal` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+)doc");
+
+REGISTER_OP("NotEqual")
+    .EQUALITY_COMPARISON()
+    .Doc(R"doc(
+Returns the truth value of (x != y) element-wise.
+
+*NOTE*: `NotEqual` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+)doc");
 
 #undef EQUALITY_COMPARISON
 
@@ -592,14 +1109,20 @@ REGISTER_OP("ApproximateEqual")
     .SetIsCommutative()
     .Attr("T: numbertype")
     .Attr("tolerance: float = 0.00001")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Returns the truth value of abs(x-y) < tolerance element-wise.
+)doc");
 
 // --------------------------------------------------------------------------
 
 REGISTER_OP("LogicalNot")
     .Input("x: bool")
     .Output("y: bool")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Returns the truth value of NOT x element-wise.
+)doc");
 
 #define BINARY_LOGICAL()  \
   Input("x: bool")        \
@@ -608,9 +1131,23 @@ REGISTER_OP("LogicalNot")
       .SetIsCommutative() \
       .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
 
-REGISTER_OP("LogicalAnd").BINARY_LOGICAL();
+REGISTER_OP("LogicalAnd")
+    .BINARY_LOGICAL()
+    .Doc(R"doc(
+Returns the truth value of x AND y element-wise.
 
-REGISTER_OP("LogicalOr").BINARY_LOGICAL();
+*NOTE*: `LogicalAnd` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+)doc");
+
+REGISTER_OP("LogicalOr")
+    .BINARY_LOGICAL()
+    .Doc(R"doc(
+Returns the truth value of x OR y element-wise.
+
+*NOTE*: `LogicalOr` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+)doc");
 
 #undef BINARY_LOGICAL
 
@@ -703,7 +1240,55 @@ REGISTER_OP("Select")
       c->set_output(0, data);
 
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Selects elements from `t` or `e`, depending on `condition`.
+
+The `t`, and `e` tensors must all have the same shape, and the
+output will also have that shape.
+
+The `condition` tensor must be a scalar if `t` and `e` are scalars.
+If `t` and `e` are vectors or higher rank, then `condition` must be either a
+scalar, a vector with size matching the first dimension of `t`, or must have
+the same shape as `t`.
+
+The `condition` tensor acts as a mask that chooses, based on the value at each
+element, whether the corresponding element / row in the output should be
+taken from `t` (if true) or `e` (if false).
+
+If `condition` is a vector and `t` and `e` are higher rank matrices, then
+it chooses which row (outer dimension) to copy from `t` and `e`.
+If `condition` has the same shape as `t` and `e`, then it chooses which
+element to copy from `t` and `e`.
+
+For example:
+
+```python
+# 'condition' tensor is [[True,  False]
+#                        [False, True]]
+# 't' is [[1, 2],
+#         [3, 4]]
+# 'e' is [[5, 6],
+#         [7, 8]]
+select(condition, t, e)  # => [[1, 6], [7, 4]]
+
+
+# 'condition' tensor is [True, False]
+# 't' is [[1, 2],
+#         [3, 4]]
+# 'e' is [[5, 6],
+#         [7, 8]]
+select(condition, t, e) ==> [[1, 2],
+                             [7, 8]]
+
+```
+
+t:= A `Tensor` which may have the same shape as `condition`.
+    If `condition` is rank 1, `t` may have higher rank,
+    but its first dimension must match the size of `condition`.
+e:= A `Tensor` with the same type and shape as `t`.
+output:= A `Tensor` with the same type and shape as `t` and `e`.
+)doc");
 
 // --------------------------------------------------------------------------
 
@@ -714,7 +1299,21 @@ REGISTER_OP("MatMul")
     .Attr("transpose_a: bool = false")
     .Attr("transpose_b: bool = false")
     .Attr("T: {half, bfloat16, float, double, int32, complex64, complex128}")
-    .SetShapeFn(shape_inference::MatMulShape);
+    .SetShapeFn(shape_inference::MatMulShape)
+    .Doc(R"doc(
+Multiply the matrix "a" by the matrix "b".
+
+The inputs must be two-dimensional matrices and the inner dimension of
+"a" (after being transposed if transpose_a is true) must match the
+outer dimension of "b" (after being transposed if transposed_b is
+true).
+
+*Note*: The default kernel implementation for MatMul on GPUs uses
+cublas.
+
+transpose_a: If true, "a" is transposed before multiplication.
+transpose_b: If true, "b" is transposed before multiplication.
+)doc");
 
 REGISTER_OP("SparseMatMul")
     .Input("a: Ta")
@@ -726,7 +1325,18 @@ REGISTER_OP("SparseMatMul")
     .Attr("b_is_sparse: bool = false")
     .Attr("Ta: {float, bfloat16} = DT_FLOAT")
     .Attr("Tb: {float, bfloat16} = DT_FLOAT")
-    .SetShapeFn(shape_inference::MatMulShape);
+    .SetShapeFn(shape_inference::MatMulShape)
+    .Doc(R"doc(
+Multiply matrix "a" by matrix "b".
+
+The inputs must be two-dimensional matrices and the inner dimension of "a" must
+match the outer dimension of "b". This op is optimized for the case where at
+least one of "a" or "b" is sparse. The breakeven for using this versus a dense
+matrix multiply on one platform was 30% zero values in the sparse matrix.
+
+The gradient computation of this operation will only take advantage of sparsity
+in the input gradient when that gradient comes from a Relu.
+)doc");
 
 // --------------------------------------------------------------------------
 
@@ -739,7 +1349,21 @@ REGISTER_OP("Sum")
     .Attr("keep_dims: bool = false")
     .Attr("T: numbertype")
     .Attr("Tidx: {int32, int64} = DT_INT32")
-    .SetShapeFn(shape_inference::ReductionShape);
+    .SetShapeFn(shape_inference::ReductionShape)
+    .Doc(R"doc(
+Computes the sum of elements across dimensions of a tensor.
+
+Reduces `input` along the dimensions given in `reduction_indices`. Unless
+`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+`reduction_indices`. If `keep_dims` is true, the reduced dimensions are
+retained with length 1.
+
+input: The tensor to reduce.
+reduction_indices: The dimensions to reduce. Must be in the range
+  `[-rank(input), rank(input))`.
+keep_dims: If true, retain reduced dimensions with length 1.
+output: The reduced tensor.
+)doc");
 
 REGISTER_OP("Mean")
     .Input("input: T")
@@ -748,7 +1372,21 @@ REGISTER_OP("Mean")
     .Attr("keep_dims: bool = false")
     .Attr("T: numbertype")
     .Attr("Tidx: {int32, int64} = DT_INT32")
-    .SetShapeFn(shape_inference::ReductionShape);
+    .SetShapeFn(shape_inference::ReductionShape)
+    .Doc(R"doc(
+Computes the mean of elements across dimensions of a tensor.
+
+Reduces `input` along the dimensions given in `reduction_indices`. Unless
+`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+`reduction_indices`. If `keep_dims` is true, the reduced dimensions are
+retained with length 1.
+
+input: The tensor to reduce.
+reduction_indices: The dimensions to reduce. Must be in the range
+  `[-rank(input), rank(input))`.
+keep_dims: If true, retain reduced dimensions with length 1.
+output: The reduced tensor.
+)doc");
 
 REGISTER_OP("Prod")
     .Input("input: T")
@@ -757,7 +1395,21 @@ REGISTER_OP("Prod")
     .Attr("keep_dims: bool = false")
     .Attr("T: numbertype")
     .Attr("Tidx: {int32, int64} = DT_INT32")
-    .SetShapeFn(shape_inference::ReductionShape);
+    .SetShapeFn(shape_inference::ReductionShape)
+    .Doc(R"doc(
+Computes the product of elements across dimensions of a tensor.
+
+Reduces `input` along the dimensions given in `reduction_indices`. Unless
+`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+`reduction_indices`. If `keep_dims` is true, the reduced dimensions are
+retained with length 1.
+
+input: The tensor to reduce.
+reduction_indices: The dimensions to reduce. Must be in the range
+  `[-rank(input), rank(input))`.
+keep_dims: If true, retain reduced dimensions with length 1.
+output: The reduced tensor.
+)doc");
 
 REGISTER_OP("Min")
     .Input("input: T")
@@ -766,7 +1418,21 @@ REGISTER_OP("Min")
     .Attr("keep_dims: bool = false")
     .Attr("T: numbertype")
     .Attr("Tidx: {int32, int64} = DT_INT32")
-    .SetShapeFn(shape_inference::ReductionShape);
+    .SetShapeFn(shape_inference::ReductionShape)
+    .Doc(R"doc(
+Computes the minimum of elements across dimensions of a tensor.
+
+Reduces `input` along the dimensions given in `reduction_indices`. Unless
+`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+`reduction_indices`. If `keep_dims` is true, the reduced dimensions are
+retained with length 1.
+
+input: The tensor to reduce.
+reduction_indices: The dimensions to reduce. Must be in the range
+  `[-rank(input), rank(input))`.
+keep_dims: If true, retain reduced dimensions with length 1.
+output: The reduced tensor.
+)doc");
 
 REGISTER_OP("Max")
     .Input("input: T")
@@ -775,7 +1441,21 @@ REGISTER_OP("Max")
     .Attr("keep_dims: bool = false")
     .Attr("T: numbertype")
     .Attr("Tidx: {int32, int64} = DT_INT32")
-    .SetShapeFn(shape_inference::ReductionShape);
+    .SetShapeFn(shape_inference::ReductionShape)
+    .Doc(R"doc(
+Computes the maximum of elements across dimensions of a tensor.
+
+Reduces `input` along the dimensions given in `reduction_indices`. Unless
+`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+`reduction_indices`. If `keep_dims` is true, the reduced dimensions are
+retained with length 1.
+
+input: The tensor to reduce.
+reduction_indices: The dimensions to reduce. Must be in the range
+  `[-rank(input), rank(input))`.
+keep_dims: If true, retain reduced dimensions with length 1.
+output: The reduced tensor.
+)doc");
 
 namespace {
 
@@ -843,7 +1523,16 @@ REGISTER_OP("ArgMax")
     .Attr("T: numbertype")
     .Attr("Tidx: {int32, int64} = DT_INT32")
     .Attr("output_type: {int32, int64} = DT_INT64")
-    .SetShapeFn(ArgOpShape);
+    .SetShapeFn(ArgOpShape)
+    .Doc(R"doc(
+Returns the index with the largest value across dimensions of a tensor.
+
+Note that in case of ties the identity of the return value is not guaranteed.
+
+dimension: int32 or int64, must be in the range `[-rank(input), rank(input))`.
+  Describes which dimension of the input Tensor to reduce across. For vectors,
+  use dimension = 0.
+)doc");
 
 REGISTER_OP("ArgMin")
     .Input("input: T")
@@ -852,7 +1541,16 @@ REGISTER_OP("ArgMin")
     .Attr("T: numbertype")
     .Attr("Tidx: {int32, int64} = DT_INT32")
     .Attr("output_type: {int32, int64} = DT_INT64")
-    .SetShapeFn(ArgOpShape);
+    .SetShapeFn(ArgOpShape)
+    .Doc(R"doc(
+Returns the index with the smallest value across dimensions of a tensor.
+
+Note that in case of ties the identity of the return value is not guaranteed.
+
+dimension: int32 or int64, must be in the range `[-rank(input), rank(input))`.
+  Describes which dimension of the input Tensor to reduce across. For vectors,
+  use dimension = 0.
+)doc");
 
 namespace {
 
@@ -1011,7 +1709,29 @@ REGISTER_OP("SegmentSum")
     .Output("output: T")
     .Attr("T: numbertype")
     .Attr("Tindices: {int32,int64}")
-    .SetShapeFn(SegmentReductionShapeFn);
+    .SetShapeFn(SegmentReductionShapeFn)
+    .Doc(R"doc(
+Computes the sum along segments of a tensor.
+
+Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
+segments.
+
+Computes a tensor such that
+\\(output_i = \sum_j data_j\\) where sum is over `j` such
+that `segment_ids[j] == i`.
+
+If the sum is empty for a given segment ID `i`, `output[i] = 0`.
+
+<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="https://www.tensorflow.org/images/SegmentSum.png" alt>
+</div>
+
+segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s
+first dimension.  Values should be sorted and can be repeated.
+
+output: Has same shape as data, except for dimension 0 which
+  has size `k`, the number of segments.
+)doc");
 
 REGISTER_OP("SegmentMean")
     .Input("data: T")
@@ -1019,7 +1739,30 @@ REGISTER_OP("SegmentMean")
     .Output("output: T")
     .Attr("T: realnumbertype")
     .Attr("Tindices: {int32,int64}")
-    .SetShapeFn(SegmentReductionShapeFn);
+    .SetShapeFn(SegmentReductionShapeFn)
+    .Doc(R"doc(
+Computes the mean along segments of a tensor.
+
+Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
+segments.
+
+Computes a tensor such that
+\\(output_i = \frac{\sum_j data_j}{N}\\) where `mean` is
+over `j` such that `segment_ids[j] == i` and `N` is the total number of
+values summed.
+
+If the mean is empty for a given segment ID `i`, `output[i] = 0`.
+
+<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="https://www.tensorflow.org/images/SegmentMean.png" alt>
+</div>
+
+segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s
+first dimension.  Values should be sorted and can be repeated.
+
+output: Has same shape as data, except for dimension 0 which
+  has size `k`, the number of segments.
+)doc");
 
 REGISTER_OP("SegmentProd")
     .Input("data: T")
@@ -1027,7 +1770,29 @@ REGISTER_OP("SegmentProd")
     .Output("output: T")
     .Attr("T: numbertype")
     .Attr("Tindices: {int32,int64}")
-    .SetShapeFn(SegmentReductionShapeFn);
+    .SetShapeFn(SegmentReductionShapeFn)
+    .Doc(R"doc(
+Computes the product along segments of a tensor.
+
+Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
+segments.
+
+Computes a tensor such that
+\\(output_i = \prod_j data_j\\) where the product is over `j` such
+that `segment_ids[j] == i`.
+
+If the product is empty for a given segment ID `i`, `output[i] = 1`.
+
+<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="https://www.tensorflow.org/images/SegmentProd.png" alt>
+</div>
+
+segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s
+first dimension.  Values should be sorted and can be repeated.
+
+output: Has same shape as data, except for dimension 0 which
+  has size `k`, the number of segments.
+)doc");
 
 REGISTER_OP("SegmentMin")
     .Input("data: T")
@@ -1035,7 +1800,29 @@ REGISTER_OP("SegmentMin")
     .Output("output: T")
     .Attr("T: realnumbertype")
     .Attr("Tindices: {int32,int64}")
-    .SetShapeFn(SegmentReductionShapeFn);
+    .SetShapeFn(SegmentReductionShapeFn)
+    .Doc(R"doc(
+Computes the minimum along segments of a tensor.
+
+Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
+segments.
+
+Computes a tensor such that
+\\(output_i = \min_j(data_j)\\) where `min` is over `j` such
+that `segment_ids[j] == i`.
+
+If the min is empty for a given segment ID `i`, `output[i] = 0`.
+
+<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="https://www.tensorflow.org/images/SegmentMin.png" alt>
+</div>
+
+segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s
+first dimension.  Values should be sorted and can be repeated.
+
+output: Has same shape as data, except for dimension 0 which
+  has size `k`, the number of segments.
+)doc");
 
 REGISTER_OP("SegmentMax")
     .Input("data: T")
@@ -1043,7 +1830,29 @@ REGISTER_OP("SegmentMax")
     .Output("output: T")
     .Attr("T: realnumbertype")
     .Attr("Tindices: {int32,int64}")
-    .SetShapeFn(SegmentReductionShapeFn);
+    .SetShapeFn(SegmentReductionShapeFn)
+    .Doc(R"doc(
+Computes the maximum along segments of a tensor.
+
+Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
+segments.
+
+Computes a tensor such that
+\\(output_i = \max_j(data_j)\\) where `max` is over `j` such
+that `segment_ids[j] == i`.
+
+If the max is empty for a given segment ID `i`, `output[i] = 0`.
+
+<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="https://www.tensorflow.org/images/SegmentMax.png" alt>
+</div>
+
+segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s
+first dimension.  Values should be sorted and can be repeated.
+
+output: Has same shape as data, except for dimension 0 which
+  has size `k`, the number of segments.
+)doc");
 
 REGISTER_OP("UnsortedSegmentSum")
     .Input("data: T")
@@ -1053,7 +1862,36 @@ REGISTER_OP("UnsortedSegmentSum")
     .Attr("T: numbertype")
     .Attr("Tindices: {int32,int64}")
     .Attr("Tnumsegments: {int32,int64} = DT_INT32")
-    .SetShapeFn(UnsortedSegmentReductionShapeFn);
+    .SetShapeFn(UnsortedSegmentReductionShapeFn)
+    .Doc(R"doc(
+Computes the sum along segments of a tensor.
+
+Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
+segments.
+
+Computes a tensor such that
+`(output[i] = sum_{j...} data[j...]` where the sum is over tuples `j...` such
+that `segment_ids[j...] == i`.  Unlike `SegmentSum`, `segment_ids`
+need not be sorted and need not cover all values in the full
+range of valid values.
+
+If the sum is empty for a given segment ID `i`, `output[i] = 0`.
+If the given segment ID `i` is negative, the value is dropped and will not be
+added to the sum of the segment.
+
+`num_segments` should equal the number of distinct segment IDs.
+
+<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="https://www.tensorflow.org/images/UnsortedSegmentSum.png" alt>
+</div>
+
+segment_ids: A tensor whose shape is a prefix of `data.shape`.
+
+output: Has same shape as data, except for the first `segment_ids.rank`
+  dimensions, which are replaced with a single dimension which has size
+  `num_segments`.
+
+)doc");
 
 REGISTER_OP("UnsortedSegmentMax")
     .Input("data: T")
@@ -1063,7 +1901,34 @@ REGISTER_OP("UnsortedSegmentMax")
     .Attr("T: realnumbertype")
     .Attr("Tindices: {int32,int64}")
     .Attr("Tnumsegments: {int32,int64} = DT_INT32")
-    .SetShapeFn(UnsortedSegmentReductionShapeFn);
+    .SetShapeFn(UnsortedSegmentReductionShapeFn)
+    .Doc(R"doc(
+Computes the Max along segments of a tensor.
+
+Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
+segments.
+
+This operator is similar to the [unsorted segment sum operator](../../../api_docs/python/math_ops.md#UnsortedSegmentSum).
+Instead of computing the sum over segments, it computes the maximum
+such that:
+
+\\(output_i = \max_j data_j\\) where max is over `j` such
+that `segment_ids[j] == i`.
+
+If the maximum is empty for a given segment ID `i`, it outputs the smallest possible value for specific numeric type,
+ `output[i] = numeric_limits<T>::min()`.
+
+<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="https://www.tensorflow.org/images/UnsortedSegmentMax.png" alt>
+</div>
+
+segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s
+first dimension.
+
+output: Has same shape as data, except for dimension 0 which
+has size `num_segments`.
+
+)doc");
 
 REGISTER_OP("SparseSegmentSum")
     .Input("data: T")
@@ -1072,7 +1937,46 @@ REGISTER_OP("SparseSegmentSum")
     .Output("output: T")
     .Attr("T: realnumbertype")
     .Attr("Tidx: {int32, int64} = DT_INT32")
-    .SetShapeFn(SparseSegmentReductionShapeFn);
+    .SetShapeFn(SparseSegmentReductionShapeFn)
+    .Doc(R"doc(
+Computes the sum along sparse segments of a tensor.
+
+Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
+segments.
+
+Like `SegmentSum`, but `segment_ids` can have rank less than `data`'s first
+dimension, selecting a subset of dimension 0, specified by `indices`.
+
+For example:
+
+```python
+c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]])
+
+# Select two rows, one segment.
+tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 0]))
+# => [[0 0 0 0]]
+
+# Select two rows, two segment.
+tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 1]))
+# => [[ 1  2  3  4]
+#     [-1 -2 -3 -4]]
+
+# Select all rows, two segments.
+tf.sparse_segment_sum(c, tf.constant([0, 1, 2]), tf.constant([0, 0, 1]))
+# => [[0 0 0 0]
+#     [5 6 7 8]]
+
+# Which is equivalent to:
+tf.segment_sum(c, tf.constant([0, 0, 1]))
+```
+
+indices: A 1-D tensor. Has same rank as `segment_ids`.
+
+segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
+
+output: Has same shape as data, except for dimension 0 which
+  has size `k`, the number of segments.
+)doc");
 
 REGISTER_OP("SparseSegmentSumWithNumSegments")
     .Input("data: T")
@@ -1083,7 +1987,46 @@ REGISTER_OP("SparseSegmentSumWithNumSegments")
     .Attr("T: realnumbertype")
     .Attr("Tidx: {int32, int64} = DT_INT32")
     .Attr("Tnumsegments: {int32,int64} = DT_INT32")
-    .SetShapeFn(SparseSegmentReductionWithNumSegmentsShapeFn);
+    .SetShapeFn(SparseSegmentReductionWithNumSegmentsShapeFn)
+    .Doc(R"doc(
+Computes the sum along sparse segments of a tensor.
+
+Like `SparseSegmentSum`, but allows missing ids in `segment_ids`. If an id is
+misisng, the `output` tensor at that position will be zeroed.
+
+Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
+segments.
+
+For example:
+
+```python
+c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]])
+
+tf.sparse_segment_sum_with_num_segments(
+    c, tf.constant([0, 1]), tf.constant([0, 0]), num_segments=3)
+# => [[0 0 0 0]
+#     [0 0 0 0]
+#     [0 0 0 0]]
+
+tf.sparse_segment_sum_with_num_segments(c,
+                                        tf.constant([0, 1]),
+                                        tf.constant([0, 2],
+                                        num_segments=4))
+# => [[ 1  2  3  4]
+#     [ 0  0  0  0]
+#     [-1 -2 -3 -4]
+#     [ 0  0  0  0]]
+```
+
+indices: A 1-D tensor. Has same rank as `segment_ids`.
+
+segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
+
+num_segments: Should equal the number of distinct segment IDs.
+
+output: Has same shape as data, except for dimension 0 which
+  has size `num_segments`.
+)doc");
 
 REGISTER_OP("SparseSegmentMean")
     .Input("data: T")
@@ -1092,7 +2035,24 @@ REGISTER_OP("SparseSegmentMean")
     .Output("output: T")
     .Attr("T: {float, double}")
     .Attr("Tidx: {int32, int64} = DT_INT32")
-    .SetShapeFn(SparseSegmentReductionShapeFn);
+    .SetShapeFn(SparseSegmentReductionShapeFn)
+    .Doc(R"doc(
+Computes the mean along sparse segments of a tensor.
+
+Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
+segments.
+
+Like `SegmentMean`, but `segment_ids` can have rank less than `data`'s first
+dimension, selecting a subset of dimension 0, specified by `indices`.
+
+indices: A 1-D tensor. Has same rank as `segment_ids`.
+
+segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
+
+output: Has same shape as data, except for dimension 0 which
+  has size `k`, the number of segments.
+
+)doc");
 
 REGISTER_OP("SparseSegmentMeanWithNumSegments")
     .Input("data: T")
@@ -1103,7 +2063,25 @@ REGISTER_OP("SparseSegmentMeanWithNumSegments")
     .Attr("T: {float, double}")
     .Attr("Tidx: {int32, int64} = DT_INT32")
     .Attr("Tnumsegments: {int32,int64} = DT_INT32")
-    .SetShapeFn(SparseSegmentReductionWithNumSegmentsShapeFn);
+    .SetShapeFn(SparseSegmentReductionWithNumSegmentsShapeFn)
+    .Doc(R"doc(
+Computes the mean along sparse segments of a tensor.
+
+Like `SparseSegmentMean`, but allows missing ids in `segment_ids`. If an id is
+misisng, the `output` tensor at that position will be zeroed.
+
+Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
+segments.
+
+indices: A 1-D tensor. Has same rank as `segment_ids`.
+
+segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
+
+num_segments: Should equal the number of distinct segment IDs.
+
+output: Has same shape as data, except for dimension 0 which has size
+    `num_segments`.
+)doc");
 
 REGISTER_OP("SparseSegmentMeanGrad")
     .Input("grad: T")
@@ -1113,7 +2091,18 @@ REGISTER_OP("SparseSegmentMeanGrad")
     .Output("output: T")
     .Attr("T: {float, double}")
     .Attr("Tidx: {int32, int64} = DT_INT32")
-    .SetShapeFn(SparseSegmentReductionGradShapeFn);
+    .SetShapeFn(SparseSegmentReductionGradShapeFn)
+    .Doc(R"doc(
+Computes gradients for SparseSegmentMean.
+
+Returns tensor "output" with same shape as grad, except for dimension 0 whose
+value is output_dim0.
+
+grad: gradient propagated to the SparseSegmentMean op.
+indices: indices passed to the corresponding SparseSegmentMean op.
+segment_ids: segment_ids passed to the corresponding SparseSegmentMean op.
+output_dim0: dimension 0 of "data" passed to SparseSegmentMean op.
+)doc");
 
 REGISTER_OP("SparseSegmentSqrtN")
     .Input("data: T")
@@ -1122,7 +2111,23 @@ REGISTER_OP("SparseSegmentSqrtN")
     .Output("output: T")
     .Attr("T: {float, double}")
     .Attr("Tidx: {int32, int64} = DT_INT32")
-    .SetShapeFn(SparseSegmentReductionShapeFn);
+    .SetShapeFn(SparseSegmentReductionShapeFn)
+    .Doc(R"doc(
+Computes the sum along sparse segments of a tensor divided by the sqrt of N.
+
+N is the size of the segment being reduced.
+
+Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
+segments.
+
+indices: A 1-D tensor. Has same rank as `segment_ids`.
+
+segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
+
+output: Has same shape as data, except for dimension 0 which
+  has size `k`, the number of segments.
+
+)doc");
 
 REGISTER_OP("SparseSegmentSqrtNWithNumSegments")
     .Input("data: T")
@@ -1133,7 +2138,28 @@ REGISTER_OP("SparseSegmentSqrtNWithNumSegments")
     .Attr("T: {float, double}")
     .Attr("Tidx: {int32, int64} = DT_INT32")
     .Attr("Tnumsegments: {int32,int64} = DT_INT32")
-    .SetShapeFn(SparseSegmentReductionWithNumSegmentsShapeFn);
+    .SetShapeFn(SparseSegmentReductionWithNumSegmentsShapeFn)
+    .Doc(R"doc(
+Computes the sum along sparse segments of a tensor divided by the sqrt of N.
+
+N is the size of the segment being reduced.
+
+Like `SparseSegmentSqrtN`, but allows missing ids in `segment_ids`. If an id is
+misisng, the `output` tensor at that position will be zeroed.
+
+Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
+segments.
+
+indices: A 1-D tensor. Has same rank as `segment_ids`.
+
+segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
+
+num_segments: Should equal the number of distinct segment IDs.
+
+output: Has same shape as data, except for dimension 0 which
+  has size `k`, the number of segments.
+
+)doc");
 
 REGISTER_OP("SparseSegmentSqrtNGrad")
     .Input("grad: T")
@@ -1143,7 +2169,18 @@ REGISTER_OP("SparseSegmentSqrtNGrad")
     .Output("output: T")
     .Attr("T: {float, double}")
     .Attr("Tidx: {int32, int64} = DT_INT32")
-    .SetShapeFn(SparseSegmentReductionGradShapeFn);
+    .SetShapeFn(SparseSegmentReductionGradShapeFn)
+    .Doc(R"doc(
+Computes gradients for SparseSegmentSqrtN.
+
+Returns tensor "output" with same shape as grad, except for dimension 0 whose
+value is output_dim0.
+
+grad: gradient propagated to the SparseSegmentSqrtN op.
+indices: indices passed to the corresponding SparseSegmentSqrtN op.
+segment_ids: segment_ids passed to the corresponding SparseSegmentSqrtN op.
+output_dim0: dimension 0 of "data" passed to SparseSegmentSqrtN op.
+)doc");
 
 REGISTER_OP("All")
     .Input("input: bool")
@@ -1151,7 +2188,21 @@ REGISTER_OP("All")
     .Output("output: bool")
     .Attr("keep_dims: bool = false")
     .Attr("Tidx: {int32, int64} = DT_INT32")
-    .SetShapeFn(shape_inference::ReductionShape);
+    .SetShapeFn(shape_inference::ReductionShape)
+    .Doc(R"doc(
+Computes the "logical and" of elements across dimensions of a tensor.
+
+Reduces `input` along the dimensions given in `reduction_indices`. Unless
+`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+`reduction_indices`. If `keep_dims` is true, the reduced dimensions are
+retained with length 1.
+
+input: The tensor to reduce.
+reduction_indices: The dimensions to reduce. Must be in the range
+  `[-rank(input), rank(input))`.
+keep_dims: If true, retain reduced dimensions with length 1.
+output: The reduced tensor.
+)doc");
 
 REGISTER_OP("Any")
     .Input("input: bool")
@@ -1159,7 +2210,21 @@ REGISTER_OP("Any")
     .Attr("keep_dims: bool = false")
     .Output("output: bool")
     .Attr("Tidx: {int32, int64} = DT_INT32")
-    .SetShapeFn(shape_inference::ReductionShape);
+    .SetShapeFn(shape_inference::ReductionShape)
+    .Doc(R"doc(
+Computes the "logical or" of elements across dimensions of a tensor.
+
+Reduces `input` along the dimensions given in `reduction_indices`. Unless
+`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+`reduction_indices`. If `keep_dims` is true, the reduced dimensions are
+retained with length 1.
+
+input: The tensor to reduce.
+reduction_indices: The dimensions to reduce. Must be in the range
+  `[-rank(input), rank(input))`.
+keep_dims: If true, retain reduced dimensions with length 1.
+output: The reduced tensor.
+)doc");
 
 // --------------------------------------------------------------------------
 
@@ -1226,7 +2291,27 @@ REGISTER_OP("Range")
         return RangeSize<double>(start_t, limit_t, delta_t, c);
       }
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Creates a sequence of numbers.
+
+This operation creates a sequence of numbers that begins at `start` and
+extends by increments of `delta` up to but not including `limit`.
+
+For example:
+
+```
+# 'start' is 3
+# 'limit' is 18
+# 'delta' is 3
+tf.range(start, limit, delta) ==> [3, 6, 9, 12, 15]
+```
+
+start: 0-D (scalar). First entry in the sequence.
+limit: 0-D (scalar). Upper limit of sequence, exclusive.
+delta: 0-D (scalar). Optional. Default is 1. Number that increments `start`.
+output: 1-D.
+)doc");
 
 REGISTER_OP("LinSpace")
     .Input("start: T")
@@ -1258,7 +2343,25 @@ REGISTER_OP("LinSpace")
       if (num <= 0) return errors::InvalidArgument("Requires num > 0: ", num);
       c->set_output(0, c->Vector(num));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Generates values in an interval.
+
+A sequence of `num` evenly-spaced values are generated beginning at `start`.
+If `num > 1`, the values in the sequence increase by `stop - start / num - 1`,
+so that the last one is exactly `stop`.
+
+For example:
+
+```
+tf.linspace(10.0, 12.0, 3, name="linspace") => [ 10.0  11.0  12.0]
+```
+
+start: First entry in the range.
+stop: Last entry in the range.
+num: Number of values to generate.
+output: 1-D. The generated values.
+)doc");
 
 REGISTER_OP("Complex")
     .Input("real: T")
@@ -1266,34 +2369,120 @@ REGISTER_OP("Complex")
     .Output("out: Tout")
     .Attr("T: {float, double} = DT_FLOAT")
     .Attr("Tout: {complex64, complex128} = DT_COMPLEX64")
-    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
+    .Doc(R"doc(
+Converts two real numbers to a complex number.
+
+Given a tensor `real` representing the real part of a complex number, and a
+tensor `imag` representing the imaginary part of a complex number, this
+operation returns complex numbers elementwise of the form \\(a + bj\\), where
+*a* represents the `real` part and *b* represents the `imag` part.
+
+The input tensors `real` and `imag` must have the same shape.
+
+For example:
+
+```
+# tensor 'real' is [2.25, 3.25]
+# tensor `imag` is [4.75, 5.75]
+tf.complex(real, imag) ==> [[2.25 + 4.75j], [3.25 + 5.75j]]
+```
+)doc");
 
 REGISTER_OP("Real")
     .Input("input: T")
     .Output("output: Tout")
     .Attr("T: {complex64, complex128} = DT_COMPLEX64")
     .Attr("Tout: {float, double} = DT_FLOAT")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Returns the real part of a complex number.
+
+Given a tensor `input` of complex numbers, this operation returns a tensor of
+type `float` that is the real part of each element in `input`. All elements in
+`input` must be complex numbers of the form \\(a + bj\\), where *a* is the real
+ part returned by this operation and *b* is the imaginary part.
+
+For example:
+
+```
+# tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
+tf.real(input) ==> [-2.25, 3.25]
+```
+)doc");
 
 REGISTER_OP("Imag")
     .Input("input: T")
     .Output("output: Tout")
     .Attr("T: {complex64, complex128} = DT_COMPLEX64")
     .Attr("Tout: {float, double} = DT_FLOAT")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Returns the imaginary part of a complex number.
+
+Given a tensor `input` of complex numbers, this operation returns a tensor of
+type `float` that is the imaginary part of each element in `input`. All
+elements in `input` must be complex numbers of the form \\(a + bj\\), where *a*
+is the real part and *b* is the imaginary part returned by this operation.
+
+For example:
+
+```
+# tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
+tf.imag(input) ==> [4.75, 5.75]
+```
+)doc");
 
 REGISTER_OP("Angle")
     .Input("input: T")
     .Output("output: Tout")
     .Attr("T: {complex64, complex128} = DT_COMPLEX64")
     .Attr("Tout: {float, double} = DT_FLOAT")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Returns the argument of a complex number.
+
+Given a tensor `input` of complex numbers, this operation returns a tensor of
+type `float` that is the argument of each element in `input`. All elements in
+`input` must be complex numbers of the form \\(a + bj\\), where *a*
+is the real part and *b* is the imaginary part.
+
+The argument returned by this operation is of the form \\(atan2(b, a)\\).
+
+For example:
+
+```
+# tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
+tf.angle(input) ==> [2.0132, 1.056]
+```
+
+@compatibility(numpy)
+Equivalent to np.angle.
+@end_compatibility
+)doc");
 
 REGISTER_OP("Conj")
     .Input("input: T")
     .Output("output: T")
     .Attr("T: {complex64, complex128, variant} = DT_COMPLEX64")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Returns the complex conjugate of a complex number.
+
+Given a tensor `input` of complex numbers, this operation returns a tensor of
+complex numbers that are the complex conjugate of each element in `input`. The
+complex numbers in `input` must be of the form \\(a + bj\\), where *a* is the
+real part and *b* is the imaginary part.
+
+The complex conjugate returned by this operation is of the form \\(a - bj\\).
+
+For example:
+
+```
+# tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
+tf.conj(input) ==> [-2.25 - 4.75j, 3.25 - 5.75j]
+```
+)doc");
 
 // --------------------------------------------------------------------------
 
@@ -1320,7 +2509,18 @@ REGISTER_OP("Cross")
       }
       c->set_output(0, a_shape);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Compute the pairwise cross product.
+
+`a` and `b` must be the same shape; they can either be simple 3-element vectors,
+or any shape where the innermost dimension is 3. In the latter case, each pair
+of corresponding 3-element vectors is cross-multiplied independently.
+
+a: A tensor containing 3-element vectors.
+b: Another tensor, of same type and shape as `a`.
+product: Pairwise cross product of the vectors in `a` and `b`.
+)doc");
 
 // --------------------------------------------------------------------------
 
@@ -1341,7 +2541,33 @@ REGISTER_OP("HistogramFixedWidth")
         c->set_output(0, c->UnknownShapeOfRank(1));
       }
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Return histogram of values.
+
+Given the tensor `values`, this operation returns a rank 1 histogram counting
+the number of entries in `values` that fall into every bin.  The bins are
+equal width and determined by the arguments `value_range` and `nbins`.
+
+```python
+# Bins will be:  (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf)
+nbins = 5
+value_range = [0.0, 5.0]
+new_values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15]
+
+with tf.get_default_session() as sess:
+  hist = tf.histogram_fixed_width(new_values, value_range, nbins=5)
+  variables.global_variables_initializer().run()
+  sess.run(hist) => [2, 1, 1, 0, 2]
+```
+
+values:  Numeric `Tensor`.
+value_range:  Shape [2] `Tensor` of same `dtype` as `values`.
+  values <= value_range[0] will be mapped to hist[0],
+  values >= value_range[1] will be mapped to hist[-1].
+nbins:  Scalar `int32 Tensor`.  Number of histogram bins.
+out: A 1-D `Tensor` holding histogram of values.
+)doc");
 
 REGISTER_OP("Bincount")
     .Input("arr: int32")
@@ -1352,7 +2578,27 @@ REGISTER_OP("Bincount")
     .SetShapeFn([](InferenceContext* c) {
       c->set_output(0, c->UnknownShapeOfRank(1));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Counts the number of occurrences of each value in an integer array.
+
+Outputs a vector with length `size` and the same dtype as `weights`. If
+`weights` are empty, then index `i` stores the number of times the value `i` is
+counted in `arr`. If `weights` are non-empty, then index `i` stores the sum of
+the value in `weights` at each index where the corresponding value in `arr` is
+`i`.
+
+Values in `arr` outside of the range [0, size) are ignored.
+
+arr: int32 `Tensor`.
+size: non-negative int32 scalar `Tensor`.
+weights: is an int32, int64, float32, or float64 `Tensor` with the same
+    shape as `arr`, or a length-0 `Tensor`, in which case it acts as all weights
+    equal to 1.
+
+bins: 1D `Tensor` with length equal to `size`. The counts or summed weights for
+    each value in the range [0, size).
+)doc");
 
 REGISTER_OP("Cumsum")
     .Input("x: T")
@@ -1362,7 +2608,47 @@ REGISTER_OP("Cumsum")
     .Output("out: T")
     .Attr("T: numbertype")
     .Attr("Tidx: {int32, int64} = DT_INT32")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Compute the cumulative sum of the tensor `x` along `axis`.
+
+By default, this op performs an inclusive cumsum, which means that the first
+element of the input is identical to the first element of the output:
+
+```python
+tf.cumsum([a, b, c])  # => [a, a + b, a + b + c]
+```
+
+By setting the `exclusive` kwarg to `True`, an exclusive cumsum is
+performed instead:
+
+```python
+tf.cumsum([a, b, c], exclusive=True)  # => [0, a, a + b]
+```
+
+By setting the `reverse` kwarg to `True`, the cumsum is performed in the
+opposite direction:
+
+```python
+tf.cumsum([a, b, c], reverse=True)  # => [a + b + c, b + c, c]
+```
+
+This is more efficient than using separate `tf.reverse` ops.
+
+The `reverse` and `exclusive` kwargs can also be combined:
+
+```python
+tf.cumsum([a, b, c], exclusive=True, reverse=True)  # => [b + c, c, 0]
+```
+
+x: A `Tensor`. Must be one of the following types: `float32`, `float64`,
+  `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,
+  `complex128`, `qint8`, `quint8`, `qint32`, `half`.
+axis: A `Tensor` of type `int32` (default: 0). Must be in the range
+  `[-rank(x), rank(x))`.
+exclusive: If `True`, perform exclusive cumsum.
+reverse: A `bool` (default: False).
+)doc");
 
 REGISTER_OP("Cumprod")
     .Input("x: T")
@@ -1372,7 +2658,47 @@ REGISTER_OP("Cumprod")
     .Output("out: T")
     .Attr("T: numbertype")
     .Attr("Tidx: {int32, int64} = DT_INT32")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Compute the cumulative product of the tensor `x` along `axis`.
+
+By default, this op performs an inclusive cumprod, which means that the first
+element of the input is identical to the first element of the output:
+
+```python
+tf.cumprod([a, b, c])  # => [a, a * b, a * b * c]
+```
+
+By setting the `exclusive` kwarg to `True`, an exclusive cumprod is
+performed instead:
+
+```python
+tf.cumprod([a, b, c], exclusive=True)  # => [1, a, a * b]
+```
+
+By setting the `reverse` kwarg to `True`, the cumprod is performed in the
+opposite direction:
+
+```python
+tf.cumprod([a, b, c], reverse=True)  # => [a * b * c, b * c, c]
+```
+
+This is more efficient than using separate `tf.reverse` ops.
+
+The `reverse` and `exclusive` kwargs can also be combined:
+
+```python
+tf.cumprod([a, b, c], exclusive=True, reverse=True)  # => [b * c, c, 1]
+```
+
+x: A `Tensor`. Must be one of the following types: `float32`, `float64`,
+  `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,
+  `complex128`, `qint8`, `quint8`, `qint32`, `half`.
+axis: A `Tensor` of type `int32` (default: 0). Must be in the range
+  `[-rank(x), rank(x))`.
+exclusive: If `True`, perform exclusive cumprod.
+reverse: A `bool` (default: False).
+)doc");
 
 REGISTER_OP("QuantizedMatMul")
     .Input("a: T1")
@@ -1401,7 +2727,29 @@ REGISTER_OP("QuantizedMatMul")
       c->set_output(1, c->Scalar());
       c->set_output(2, c->Scalar());
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Perform a quantized matrix multiplication of  `a` by the matrix `b`.
+
+The inputs must be two-dimensional matrices and the inner dimension of
+`a` (after being transposed if `transpose_a` is non-zero) must match the
+outer dimension of `b` (after being transposed if `transposed_b` is
+non-zero).
+
+a: Must be a two-dimensional tensor.
+b: Must be a two-dimensional tensor.
+transpose_a: If true, `a` is transposed before multiplication.
+transpose_b: If true, `b` is transposed before multiplication.
+min_a: The float value that the lowest quantized `a` value represents.
+max_a: The float value that the highest quantized `a` value represents.
+min_b: The float value that the lowest quantized `b` value represents.
+max_b: The float value that the highest quantized `b` value represents.
+min_out: The float value that the lowest quantized output value represents.
+max_out: The float value that the highest quantized output value represents.
+Tactivation: The type of output produced by activation function
+    following this operation.
+
+)doc");
 
 REGISTER_OP("QuantizedMul")
     .Input("x: T1")
@@ -1422,7 +2770,20 @@ REGISTER_OP("QuantizedMul")
       c->set_output(1, c->Scalar());
       c->set_output(2, c->Scalar());
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Returns x * y element-wise, working on quantized buffers.
+
+min_x: The float value that the lowest quantized `x` value represents.
+max_x: The float value that the highest quantized `x` value represents.
+min_y: The float value that the lowest quantized `y` value represents.
+max_y: The float value that the highest quantized `y` value represents.
+min_z: The float value that the lowest quantized output value represents.
+max_z: The float value that the highest quantized output value represents.
+
+*NOTE*: `QuantizedMul` supports limited forms of broadcasting. More about
+broadcasting [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+)doc");
 
 REGISTER_OP("QuantizedAdd")
     .Input("x: T1")
@@ -1443,7 +2804,20 @@ REGISTER_OP("QuantizedAdd")
       c->set_output(1, c->Scalar());
       c->set_output(2, c->Scalar());
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Returns x + y element-wise, working on quantized buffers.
+
+min_x: The float value that the lowest quantized `x` value represents.
+max_x: The float value that the highest quantized `x` value represents.
+min_y: The float value that the lowest quantized `y` value represents.
+max_y: The float value that the highest quantized `y` value represents.
+min_z: The float value that the lowest quantized output value represents.
+max_z: The float value that the highest quantized output value represents.
+
+*NOTE*: `QuantizedAdd` supports limited forms of broadcasting. More about
+broadcasting [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+)doc");
 
 REGISTER_OP("QuantizeDownAndShrinkRange")
     .Input("input: Tinput")
@@ -1462,7 +2836,40 @@ REGISTER_OP("QuantizeDownAndShrinkRange")
       c->set_output(1, c->Scalar());
       c->set_output(2, c->Scalar());
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Convert the quantized 'input' tensor into a lower-precision 'output', using the
+actual distribution of the values to maximize the usage of the lower bit depth
+and adjusting the output min and max ranges accordingly.
+
+[input_min, input_max] are scalar floats that specify the range for the float
+interpretation of the 'input' data. For example, if input_min is -1.0f and
+input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0
+value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f.
+
+This operator tries to squeeze as much precision as possible into an output with
+a lower bit depth by calculating the actual min and max values found in the
+data. For example, maybe that quint16 input has no values lower than 16,384 and
+none higher than 49,152. That means only half the range is actually needed, all
+the float interpretations are between -0.5f and 0.5f, so if we want to compress
+the data into a quint8 output, we can use that range rather than the theoretical
+-1.0f to 1.0f that is suggested by the input min and max.
+
+In practice, this is most useful for taking output from operations like
+QuantizedMatMul that can produce higher bit-depth outputs than their inputs and
+may have large potential output ranges, but in practice have a distribution of
+input values that only uses a small fraction of the possible range. By feeding
+that output into this operator, we can reduce it from 32 bits down to 8 with
+minimal loss of accuracy.
+
+input_min: The float value that the minimum quantized input value represents.
+input_max: The float value that the maximum quantized input value represents.
+Tinput: The type of the input.
+output_min: The float value that the minimum quantized output value represents.
+output_max: The float value that the maximum quantized output value represents.
+out_type: The type of the output. Should be a lower bit depth than Tinput.
+
+)doc");
 
 REGISTER_OP("Requantize")
     .Input("input: Tinput")
@@ -1485,7 +2892,26 @@ REGISTER_OP("Requantize")
       c->set_output(1, c->Scalar());
       c->set_output(2, c->Scalar());
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Convert the quantized 'input' tensor into a lower-precision 'output', using the
+output range specified with 'requested_output_min' and 'requested_output_max'.
+
+[input_min, input_max] are scalar floats that specify the range for the float
+interpretation of the 'input' data. For example, if input_min is -1.0f and
+input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0
+value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f.
+
+input_min: The float value that the minimum quantized input value represents.
+input_max: The float value that the maximum quantized input value represents.
+Tinput: The type of the input.
+requested_output_min: The float value that the minimum quantized output value represents.
+requested_output_max: The float value that the maximum quantized output value represents.
+output_min: The requested_output_min value is copied into this output.
+output_max: The requested_output_max value is copied into this output.
+out_type: The type of the output. Should be a lower bit depth than Tinput.
+
+)doc");
 
 REGISTER_OP("CompareAndBitpack")
     .Input("input: T")
@@ -1511,7 +2937,39 @@ REGISTER_OP("CompareAndBitpack")
       c->set_output(0, output);
 
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Compare values of `input` to `threshold` and pack resulting bits into a `uint8`.
+
+Each comparison returns a boolean `true` (if `input_value > threshold`)
+or and `false` otherwise.
+
+This operation is useful for Locality-Sensitive-Hashing (LSH) and other
+algorithms that use hashing approximations of cosine and `L2` distances;
+codes can be generated from an input via:
+
+```python
+codebook_size = 50
+codebook_bits = codebook_size * 32
+codebook = tf.get_variable('codebook', [x.shape[-1].value, codebook_bits],
+                           dtype=x.dtype,
+                           initializer=tf.orthogonal_initializer())
+codes = compare_and_threshold(tf.matmul(x, codebook), threshold=0.)
+codes = tf.bitcast(codes, tf.int32)  # go from uint8 to int32
+# now codes has shape x.shape[:-1] + [codebook_size]
+```
+
+**NOTE**: Currently, the innermost dimension of the tensor must be divisible
+by 8.
+
+Given an `input` shaped `[s0, s1, ..., s_n]`, the output is
+a `uint8` tensor shaped `[s0, s1, ..., s_n / 8]`.
+
+input: Values to compare against `threshold` and bitpack.
+threshold: Threshold to compare against.
+T: The type of the input and threshold.
+output: The bitpacked comparisons.
+)doc");
 
 REGISTER_OP("RequantizationRange")
     .Input("input: Tinput")
@@ -1527,7 +2985,20 @@ REGISTER_OP("RequantizationRange")
       c->set_output(0, c->Scalar());
       c->set_output(1, c->Scalar());
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Given a quantized tensor described by (input, input_min, input_max), outputs a
+range that covers the actual values present in that tensor.  This op is
+typically used to produce the requested_output_min and requested_output_max for
+Requantize.
+
+input_min: The float value that the minimum quantized input value represents.
+input_max: The float value that the maximum quantized input value represents.
+Tinput: The type of the input.
+output_min: The computed min output.
+output_max: the computed max output.
+
+)doc");
 
 // --------------------------------------------------------------------------
 
@@ -1536,7 +3007,29 @@ REGISTER_OP("Bucketize")
     .Output("output: int32")
     .Attr("T: {int32, int64, float, double}")
     .Attr("boundaries: list(float)")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Bucketizes 'input' based on 'boundaries'.
+
+For example, if the inputs are
+    boundaries = [0, 10, 100]
+    input = [[-5, 10000]
+             [150,   10]
+             [5,    100]]
+
+then the output will be
+    output = [[0, 3]
+              [3, 2]
+              [1, 3]]
+
+input: Any shape of Tensor contains with int or float type.
+boundaries: A sorted list of floats gives the boundary of the buckets.
+output: Same shape with 'input', each value of input replaced with bucket index.
+
+@compatibility(numpy)
+Equivalent to np.digitize.
+@end_compatibility
+)doc");
 
 #ifdef INTEL_MKL
 REGISTER_OP("_MklAddN")
diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
index 536fc7c0c1..8ad2c06741 100644
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@@ -74,7 +74,24 @@ REGISTER_OP("AvgPool")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnetDataFormatAttrString())
     .Attr("T: {half, bfloat16, float, double}")
-    .SetShapeFn(shape_inference::AvgPoolShape);
+    .SetShapeFn(shape_inference::AvgPoolShape)
+    .Doc(R"doc(
+Performs average pooling on the input.
+
+Each entry in `output` is the mean of the corresponding size `ksize`
+window in `value`.
+
+value: 4-D with shape `[batch, height, width, channels]`.
+ksize: The size of the sliding window for each dimension of `value`.
+strides: The stride of the sliding window for each dimension of `value`.
+padding: The type of padding algorithm to use.
+data_format: Specify the data format of the input and output data. With the
+    default format "NHWC", the data is stored in the order of:
+        [batch, in_height, in_width, in_channels].
+    Alternatively, the format could be "NCHW", the data storage order of:
+        [batch, in_channels, in_height, in_width].
+output: The average pooled output tensor.
+)doc");
 
 REGISTER_OP("AvgPoolGrad")
     .Input("orig_input_shape: int32")
@@ -91,7 +108,23 @@ REGISTER_OP("AvgPoolGrad")
       TF_RETURN_IF_ERROR(c->WithRank(s, 4, &s));
       c->set_output(0, s);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Computes gradients of the average pooling function.
+
+orig_input_shape: 1-D.  Shape of the original input to `avg_pool`.
+grad: 4-D with shape `[batch, height, width, channels]`.  Gradients w.r.t.
+  the output of `avg_pool`.
+ksize: The size of the sliding window for each dimension of the input.
+strides: The stride of the sliding window for each dimension of the input.
+padding: The type of padding algorithm to use.
+data_format: Specify the data format of the input and output data. With the
+    default format "NHWC", the data is stored in the order of:
+        [batch, in_height, in_width, in_channels].
+    Alternatively, the format could be "NCHW", the data storage order of:
+        [batch, in_channels, in_height, in_width].
+output: 4-D.  Gradients w.r.t. the input of `avg_pool`.
+)doc");
 
 // --------------------------------------------------------------------------
 
@@ -121,7 +154,28 @@ REGISTER_OP("BatchNormWithGlobalNormalization")
       TF_RETURN_IF_ERROR(c->ReplaceDim(input, 3, last_dim, &out));
       c->set_output(0, out);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Batch normalization.
+
+This op is deprecated. Prefer `tf.nn.batch_normalization`.
+
+t: A 4D input Tensor.
+m: A 1D mean Tensor with size matching the last dimension of t.
+  This is the first output from tf.nn.moments,
+  or a saved moving average thereof.
+v: A 1D variance Tensor with size matching the last dimension of t.
+  This is the second output from tf.nn.moments,
+  or a saved moving average thereof.
+beta: A 1D beta Tensor with size matching the last dimension of t.
+  An offset to be added to the normalized tensor.
+gamma: A 1D gamma Tensor with size matching the last dimension of t.
+  If "scale_after_normalization" is true, this tensor will be multiplied
+  with the normalized tensor.
+variance_epsilon: A small float number to avoid dividing by 0.
+scale_after_normalization: A bool indicating whether the resulted tensor
+  needs to be multiplied with gamma.
+)doc");
 
 REGISTER_OP("BatchNormWithGlobalNormalizationGrad")
     .Input("t: T")
@@ -161,7 +215,33 @@ REGISTER_OP("BatchNormWithGlobalNormalizationGrad")
       c->set_output(3, vector_shape);
       c->set_output(4, vector_shape);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Gradients for batch normalization.
+
+This op is deprecated. See `tf.nn.batch_normalization`.
+
+t: A 4D input Tensor.
+m: A 1D mean Tensor with size matching the last dimension of t.
+  This is the first output from tf.nn.moments,
+  or a saved moving average thereof.
+v: A 1D variance Tensor with size matching the last dimension of t.
+  This is the second output from tf.nn.moments,
+  or a saved moving average thereof.
+gamma: A 1D gamma Tensor with size matching the last dimension of t.
+  If "scale_after_normalization" is true, this Tensor will be multiplied
+  with the normalized Tensor.
+backprop: 4D backprop Tensor.
+variance_epsilon: A small float number to avoid dividing by 0.
+scale_after_normalization: A bool indicating whether the resulted tensor
+  needs to be multiplied with gamma.
+
+dx: 4D backprop tensor for input.
+dm: 1D backprop tensor for mean.
+dv: 1D backprop tensor for variance.
+db: 1D backprop tensor for beta.
+dg: 1D backprop tensor for gamma.
+)doc");
 
 // --------------------------------------------------------------------------
 
@@ -180,7 +260,34 @@ REGISTER_OP("FusedBatchNorm")
     .Attr("epsilon: float = 0.0001")
     .Attr("data_format: string = 'NHWC'")
     .Attr("is_training: bool = true")
-    .SetShapeFn(shape_inference::FusedBatchNormShape);
+    .SetShapeFn(shape_inference::FusedBatchNormShape)
+    .Doc(R"doc(
+Batch normalization.
+Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
+The size of 1D Tensors matches the dimension C of the 4D Tensors.
+
+x: A 4D Tensor for input data.
+scale: A 1D Tensor for scaling factor, to scale the normalized x.
+offset: A 1D Tensor for offset, to shift to the normalized x.
+mean: A 1D Tensor for population mean. Used for inference only;
+      must be empty for training.
+variance: A 1D Tensor for population variance. Used for inference only;
+          must be empty for training.
+y: A 4D Tensor for output data.
+batch_mean: A 1D Tensor for the computed batch mean, to be used by TensorFlow
+            to compute the running mean.
+batch_variance: A 1D Tensor for the computed batch variance, to be used by
+                TensorFlow to compute the running variance.
+reserve_space_1: A 1D Tensor for the computed batch mean, to be reused
+                 in the gradient computation.
+reserve_space_2: A 1D Tensor for the computed batch variance (inverted variance
+                 in the cuDNN case), to be reused in the gradient computation.
+T: The data type for the elements of input and output Tensors.
+epsilon: A small float number added to the variance of x.
+data_format: The data format for x and y. Either "NHWC" (default) or "NCHW".
+is_training: A bool value to indicate the operation is for training (default)
+             or inference.
+)doc");
 
 REGISTER_OP("FusedBatchNormV2")
     .Input("x: T")
@@ -198,7 +305,35 @@ REGISTER_OP("FusedBatchNormV2")
     .Attr("epsilon: float = 0.0001")
     .Attr("data_format: string = 'NHWC'")
     .Attr("is_training: bool = true")
-    .SetShapeFn(shape_inference::FusedBatchNormShape);
+    .SetShapeFn(shape_inference::FusedBatchNormShape)
+    .Doc(R"doc(
+Batch normalization.
+Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
+The size of 1D Tensors matches the dimension C of the 4D Tensors.
+
+x: A 4D Tensor for input data.
+scale: A 1D Tensor for scaling factor, to scale the normalized x.
+offset: A 1D Tensor for offset, to shift to the normalized x.
+mean: A 1D Tensor for population mean. Used for inference only;
+      must be empty for training.
+variance: A 1D Tensor for population variance. Used for inference only;
+          must be empty for training.
+y: A 4D Tensor for output data.
+batch_mean: A 1D Tensor for the computed batch mean, to be used by TensorFlow
+            to compute the running mean.
+batch_variance: A 1D Tensor for the computed batch variance, to be used by
+                TensorFlow to compute the running variance.
+reserve_space_1: A 1D Tensor for the computed batch mean, to be reused
+                 in the gradient computation.
+reserve_space_2: A 1D Tensor for the computed batch variance (inverted variance
+                 in the cuDNN case), to be reused in the gradient computation.
+T: The data type for the elements of input and output Tensors.
+U: The data type for the scale, offset, mean, and variance.
+epsilon: A small float number added to the variance of x.
+data_format: The data format for x and y. Either "NHWC" (default) or "NCHW".
+is_training: A bool value to indicate the operation is for training (default)
+             or inference.
+)doc");
 
 REGISTER_OP("FusedBatchNormGrad")
     .Input("y_backprop: T")
@@ -215,7 +350,37 @@ REGISTER_OP("FusedBatchNormGrad")
     .Attr("epsilon: float = 0.0001")
     .Attr("data_format: string = 'NHWC'")
     .Attr("is_training: bool = true")
-    .SetShapeFn(shape_inference::FusedBatchNormGradShape);
+    .SetShapeFn(shape_inference::FusedBatchNormGradShape)
+    .Doc(R"doc(
+Gradient for batch normalization.
+Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
+The size of 1D Tensors matches the dimension C of the 4D Tensors.
+
+y_backprop: A 4D Tensor for the gradient with respect to y.
+x: A 4D Tensor for input data.
+scale: A 1D Tensor for scaling factor, to scale the normalized x.
+reserve_space_1: When is_training is True, a 1D Tensor for the computed batch
+                 mean to be reused in gradient computation. When is_training is
+                 False, a 1D Tensor for the population mean to be reused in both
+                 1st and 2nd order gradient computation.
+reserve_space_2: When is_training is True, a 1D Tensor for the computed batch
+                 variance (inverted variance in the cuDNN case) to be reused in
+                 gradient computation. When is_training is False, a 1D Tensor
+                 for the population variance to be reused in both 1st and 2nd
+                 order gradient computation.
+x_backprop: A 4D Tensor for the gradient with respect to x.
+scale_backprop: A 1D Tensor for the gradient with respect to scale.
+offset_backprop: A 1D Tensor for the gradient with respect to offset.
+reserve_space_3: Unused placeholder to match the mean input in FusedBatchNorm.
+reserve_space_4: Unused placeholder to match the variance input
+                 in FusedBatchNorm.
+T: The data type for the elements of input and output Tensors.
+epsilon: A small float number added to the variance of x.
+data_format: The data format for y_backprop, x, x_backprop.
+             Either "NHWC" (default) or "NCHW".
+is_training: A bool value to indicate the operation is for training (default)
+             or inference.
+)doc");
 
 REGISTER_OP("FusedBatchNormGradV2")
     .Input("y_backprop: T")
@@ -233,7 +398,38 @@ REGISTER_OP("FusedBatchNormGradV2")
     .Attr("epsilon: float = 0.0001")
     .Attr("data_format: string = 'NHWC'")
     .Attr("is_training: bool = true")
-    .SetShapeFn(shape_inference::FusedBatchNormGradShape);
+    .SetShapeFn(shape_inference::FusedBatchNormGradShape)
+    .Doc(R"doc(
+Gradient for batch normalization.
+Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
+The size of 1D Tensors matches the dimension C of the 4D Tensors.
+
+y_backprop: A 4D Tensor for the gradient with respect to y.
+x: A 4D Tensor for input data.
+scale: A 1D Tensor for scaling factor, to scale the normalized x.
+reserve_space_1: When is_training is True, a 1D Tensor for the computed batch
+                 mean to be reused in gradient computation. When is_training is
+                 False, a 1D Tensor for the population mean to be reused in both
+                 1st and 2nd order gradient computation.
+reserve_space_2: When is_training is True, a 1D Tensor for the computed batch
+                 variance (inverted variance in the cuDNN case) to be reused in
+                 gradient computation. When is_training is False, a 1D Tensor
+                 for the population variance to be reused in both 1st and 2nd
+                 order gradient computation.
+x_backprop: A 4D Tensor for the gradient with respect to x.
+scale_backprop: A 1D Tensor for the gradient with respect to scale.
+offset_backprop: A 1D Tensor for the gradient with respect to offset.
+reserve_space_3: Unused placeholder to match the mean input in FusedBatchNorm.
+reserve_space_4: Unused placeholder to match the variance input
+                 in FusedBatchNorm.
+T: The data type for the elements of input and output Tensors.
+U: The data type for the scale, offset, mean, and variance.
+epsilon: A small float number added to the variance of x.
+data_format: The data format for y_backprop, x, x_backprop.
+             Either "NHWC" (default) or "NCHW".
+is_training: A bool value to indicate the operation is for training (default)
+             or inference.
+)doc");
 
 // --------------------------------------------------------------------------
 
@@ -243,7 +439,24 @@ REGISTER_OP("BiasAdd")
     .Input("bias: T")
     .Attr(GetConvnetDataFormatAttrString())
     .Output("output: T")
-    .SetShapeFn(shape_inference::BiasAddShape);
+    .SetShapeFn(shape_inference::BiasAddShape)
+    .Doc(R"doc(
+Adds `bias` to `value`.
+
+This is a special case of `tf.add` where `bias` is restricted to be 1-D.
+Broadcasting is supported, so `value` may have any number of dimensions.
+
+value: Any number of dimensions.
+bias: 1-D with size the last dimension of `value`.
+data_format: Specify the data format of the input and output data. With the
+    default format "NHWC", the bias tensor will be added to the last dimension
+    of the value tensor.
+    Alternatively, the format could be "NCHW", the data storage order of:
+        [batch, in_channels, in_height, in_width].
+    The tensor will be added to "in_channels", the third-to-the-last
+        dimension.
+output: Broadcasted sum of `value` and `bias`.
+)doc");
 // --------------------------------------------------------------------------
 
 REGISTER_OP("BiasAddGrad")
@@ -251,7 +464,24 @@ REGISTER_OP("BiasAddGrad")
     .Input("out_backprop: T")
     .Attr(GetConvnetDataFormatAttrString())
     .Output("output: T")
-    .SetShapeFn(shape_inference::BiasAddGradShape);
+    .SetShapeFn(shape_inference::BiasAddGradShape)
+    .Doc(R"doc(
+The backward operation for "BiasAdd" on the "bias" tensor.
+
+It accumulates all the values from out_backprop into the feature dimension.
+For NHWC data format, the feature dimension is the last. For NCHW data format,
+the feature dimension is the third-to-last.
+
+out_backprop: Any number of dimensions.
+output: 1-D with size the feature dimension of `out_backprop`.
+data_format: Specify the data format of the input and output data. With the
+    default format "NHWC", the bias tensor will be added to the last dimension
+    of the value tensor.
+    Alternatively, the format could be "NCHW", the data storage order of:
+        [batch, in_channels, in_height, in_width].
+    The tensor will be added to "in_channels", the third-to-the-last
+        dimension.
+)doc");
 // --------------------------------------------------------------------------
 
 REGISTER_OP("BiasAddV1")
@@ -259,7 +489,19 @@ REGISTER_OP("BiasAddV1")
     .Input("value: T")
     .Input("bias: T")
     .Output("output: T")
-    .SetShapeFn(shape_inference::BiasAddShape);
+    .SetShapeFn(shape_inference::BiasAddShape)
+    .Doc(R"doc(
+Adds `bias` to `value`.
+
+This is a deprecated version of BiasAdd and will be soon removed.
+
+This is a special case of `tf.add` where `bias` is restricted to be 1-D.
+Broadcasting is supported, so `value` may have any number of dimensions.
+
+value: Any number of dimensions.
+bias: 1-D with size the last dimension of `value`.
+output: Broadcasted sum of `value` and `bias`.
+)doc");
 // --------------------------------------------------------------------------
 
 REGISTER_OP("Conv2D")
@@ -272,7 +514,53 @@ REGISTER_OP("Conv2D")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnetDataFormatAttrString())
     .Attr("dilations: list(int) = [1, 1, 1, 1]")
-    .SetShapeFn(shape_inference::Conv2DShape);
+    .SetShapeFn(shape_inference::Conv2DShape)
+    .Doc(R"doc(
+Computes a 2-D convolution given 4-D `input` and `filter` tensors.
+
+Given an input tensor of shape `[batch, in_height, in_width, in_channels]`
+and a filter / kernel tensor of shape
+`[filter_height, filter_width, in_channels, out_channels]`, this op
+performs the following:
+
+1. Flattens the filter to a 2-D matrix with shape
+   `[filter_height * filter_width * in_channels, output_channels]`.
+2. Extracts image patches from the input tensor to form a *virtual*
+   tensor of shape `[batch, out_height, out_width,
+   filter_height * filter_width * in_channels]`.
+3. For each patch, right-multiplies the filter matrix and the image patch
+   vector.
+
+In detail, with the default NHWC format,
+
+    output[b, i, j, k] =
+        sum_{di, dj, q} input[b, strides[1] * i + di, strides[2] * j + dj, q] *
+                        filter[di, dj, q, k]
+
+Must have `strides[0] = strides[3] = 1`.  For the most common case of the same
+horizontal and vertices strides, `strides = [1, stride, stride, 1]`.
+
+input: A 4-D tensor. The dimension order is interpreted according to the value
+    of `data_format`, see below for details.
+filter: A 4-D tensor of shape
+    `[filter_height, filter_width, in_channels, out_channels]`
+output: A 4-D tensor. The dimension order is determined by the value of
+    `data_format`, see below for details.
+strides: 1-D tensor of length 4.  The stride of the sliding window for each
+    dimension of `input`. The dimension order is determined by the value of
+    `data_format`, see below for details.
+padding: The type of padding algorithm to use.
+data_format: Specify the data format of the input and output data. With the
+    default format "NHWC", the data is stored in the order of:
+        [batch, height, width, channels].
+    Alternatively, the format could be "NCHW", the data storage order of:
+        [batch, channels, height, width].
+dilations: 1-D tensor of length 4.  The dilation factor for each dimension of
+    `input`. If set to k > 1, there will be k-1 skipped cells between each
+    filter element on that dimension. The dimension order is determined by the
+    value of `data_format`, see above for details. Dilations in the batch and
+    depth dimensions must be 1.
+)doc");
 
 REGISTER_OP("Conv2DBackpropInput")
     .Input("input_sizes: int32")
@@ -291,7 +579,33 @@ REGISTER_OP("Conv2DBackpropInput")
       TF_RETURN_IF_ERROR(c->WithRank(s, 4, &s));
       c->set_output(0, s);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Computes the gradients of convolution with respect to the input.
+
+input_sizes: An integer vector representing the shape of `input`,
+  where `input` is a 4-D `[batch, height, width, channels]` tensor.
+filter: 4-D with shape
+  `[filter_height, filter_width, in_channels, out_channels]`.
+out_backprop: 4-D with shape `[batch, out_height, out_width, out_channels]`.
+  Gradients w.r.t. the output of the convolution.
+strides: The stride of the sliding window for each dimension of the input
+  of the convolution. Must be in the same order as the dimension specified with
+  format.
+padding: The type of padding algorithm to use.
+output: 4-D with shape `[batch, in_height, in_width, in_channels]`.  Gradient
+  w.r.t. the input of the convolution.
+data_format: Specify the data format of the input and output data. With the
+  default format "NHWC", the data is stored in the order of:
+      [batch, in_height, in_width, in_channels].
+  Alternatively, the format could be "NCHW", the data storage order of:
+      [batch, in_channels, in_height, in_width].
+dilations: 1-D tensor of length 4.  The dilation factor for each dimension of
+  `input`. If set to k > 1, there will be k-1 skipped cells between each filter
+  element on that dimension. The dimension order is determined by the value of
+  `data_format`, see above for details. Dilations in the batch and depth
+  dimensions must be 1.
+)doc");
 
 // TODO(jeff): Instead of 'use_cudnn_for_gpu', maybe we should have a
 // more general string attribute ('kernel_impl'?) that can be used to
@@ -313,7 +627,34 @@ REGISTER_OP("Conv2DBackpropFilter")
       TF_RETURN_IF_ERROR(c->WithRank(s, 4, &s));
       c->set_output(0, s);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Computes the gradients of convolution with respect to the filter.
+
+input: 4-D with shape `[batch, in_height, in_width, in_channels]`.
+filter_sizes: An integer vector representing the tensor shape of `filter`,
+  where `filter` is a 4-D
+  `[filter_height, filter_width, in_channels, out_channels]` tensor.
+out_backprop: 4-D with shape `[batch, out_height, out_width, out_channels]`.
+  Gradients w.r.t. the output of the convolution.
+strides: The stride of the sliding window for each dimension of the input
+  of the convolution. Must be in the same order as the dimension specified with
+  format.
+padding: The type of padding algorithm to use.
+output: 4-D with shape
+  `[filter_height, filter_width, in_channels, out_channels]`.  Gradient w.r.t.
+  the `filter` input of the convolution.
+data_format: Specify the data format of the input and output data. With the
+  default format "NHWC", the data is stored in the order of:
+      [batch, in_height, in_width, in_channels].
+  Alternatively, the format could be "NCHW", the data storage order of:
+      [batch, in_channels, in_height, in_width].
+dilations: 1-D tensor of length 4.  The dilation factor for each dimension of
+  `input`. If set to k > 1, there will be k-1 skipped cells between each filter
+  element on that dimension. The dimension order is determined by the value of
+  `data_format`, see above for details. Dilations in the batch and depth
+  dimensions must be 1.
+)doc");
 
 namespace {
 
@@ -416,7 +757,17 @@ REGISTER_OP("DataFormatDimMap")
     .Attr("T: {int32, int64} = DT_INT32")
     .Attr("src_format: string = 'NHWC'")
     .Attr("dst_format: string = 'NCHW'")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Returns the dimension index in the destination data format given the one in
+the source data format.
+
+x: A Tensor with each element as a dimension index in source data format.
+  Must be in the range [-4, 4).
+y: A Tensor with each element as a dimension index in destination data format.
+src_format: source data format.
+dst_format: destination data format.
+)doc");
 
 REGISTER_OP("DataFormatVecPermute")
     .Input("x: T")
@@ -424,7 +775,16 @@ REGISTER_OP("DataFormatVecPermute")
     .Attr("T: {int32, int64} = DT_INT32")
     .Attr("src_format: string = 'NHWC'")
     .Attr("dst_format: string = 'NCHW'")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Returns the permuted vector/tensor in the destination data format given the
+one in the source data format.
+
+x: Vector of size 4 or Tensor of shape (4, 2) in source data format.
+y: Vector of size 4 or Tensor of shape (4, 2) in destination data format.
+src_format: source data format.
+dst_format: destination data format.
+)doc");
 
 REGISTER_OP("FusedResizeAndPadConv2D")
     .Input("input: T")
@@ -439,7 +799,35 @@ REGISTER_OP("FusedResizeAndPadConv2D")
     .Attr(GetPaddingAttrString())
     .SetShapeFn([](InferenceContext* c) {
       return CommonFusedConvCalculations(c, true /* has_resize */);
-    });
+    })
+    .Doc(R"doc(
+Performs a resize and padding as a preprocess during a convolution.
+
+It's often possible to do spatial transformations more efficiently as part of
+the packing stage of a convolution, so this op allows for an optimized
+implementation where these stages are fused together. This prevents the need to
+write out the intermediate results as whole tensors, reducing memory pressure,
+and we can get some latency gains by merging the transformation calculations.
+The data_format attribute for Conv2D isn't supported by this op, and defaults to
+'NHWC' order.
+Internally this op uses a single per-graph scratch buffer, which means that it
+will block if multiple versions are being run in parallel. This is because this
+operator is primarily an optimization to minimize memory usage.
+
+input: 4-D with shape `[batch, in_height, in_width, in_channels]`.
+size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
+  new size for the images.
+paddings: A two-column matrix specifying the padding sizes. The number of
+  rows must be the same as the rank of `input`.
+filter: 4-D with shape
+  `[filter_height, filter_width, in_channels, out_channels]`.
+resize_align_corners: If true, rescale input by (new_height - 1) / (height - 1),
+  which exactly aligns the 4 corners of images and resized images. If false, rescale
+  by new_height / height. Treat similarly the width dimension.
+strides: 1-D of length 4.  The stride of the sliding window for each dimension
+   of `input`. Must be in the same order as the dimension specified with format.
+padding: The type of padding algorithm to use.
+ )doc");
 
 REGISTER_OP("FusedPadConv2D")
     .Input("input: T")
@@ -452,7 +840,31 @@ REGISTER_OP("FusedPadConv2D")
     .Attr(GetPaddingAttrString())
     .SetShapeFn([](InferenceContext* c) {
       return CommonFusedConvCalculations(c, false /* has_resize */);
-    });
+    })
+    .Doc(R"doc(
+Performs a padding as a preprocess during a convolution.
+
+Similar to FusedResizeAndPadConv2d, this op allows for an optimized
+implementation where the spatial padding transformation stage is fused with the
+im2col lookup, but in this case without the bilinear filtering required for
+resizing. Fusing the padding prevents the need to write out the intermediate
+results as whole tensors, reducing memory pressure, and we can get some latency
+gains by merging the transformation calculations.
+The data_format attribute for Conv2D isn't supported by this op, and 'NHWC'
+order is used instead.
+Internally this op uses a single per-graph scratch buffer, which means that it
+will block if multiple versions are being run in parallel. This is because this
+operator is primarily an optimization to minimize memory usage.
+
+input: 4-D with shape `[batch, in_height, in_width, in_channels]`.
+paddings: A two-column matrix specifying the padding sizes. The number of
+  rows must be the same as the rank of `input`.
+filter: 4-D with shape
+  `[filter_height, filter_width, in_channels, out_channels]`.
+strides: 1-D of length 4.  The stride of the sliding window for each dimension
+   of `input`. Must be in the same order as the dimension specified with format.
+padding: The type of padding algorithm to use.
+ )doc");
 
 // --------------------------------------------------------------------------
 
@@ -465,7 +877,42 @@ REGISTER_OP("DepthwiseConv2dNative")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnetDataFormatAttrString())
     .Attr("dilations: list(int) = [1, 1, 1, 1]")
-    .SetShapeFn(shape_inference::DepthwiseConv2DNativeShape);
+    .SetShapeFn(shape_inference::DepthwiseConv2DNativeShape)
+    .Doc(R"doc(
+Computes a 2-D depthwise convolution given 4-D `input` and `filter` tensors.
+
+Given an input tensor of shape `[batch, in_height, in_width, in_channels]`
+and a filter / kernel tensor of shape
+`[filter_height, filter_width, in_channels, channel_multiplier]`, containing
+`in_channels` convolutional filters of depth 1, `depthwise_conv2d` applies
+a different filter to each input channel (expanding from 1 channel to
+`channel_multiplier` channels for each), then concatenates the results
+together. Thus, the output has `in_channels * channel_multiplier` channels.
+
+```
+for k in 0..in_channels-1
+  for q in 0..channel_multiplier-1
+    output[b, i, j, k * channel_multiplier + q] =
+      sum_{di, dj} input[b, strides[1] * i + di, strides[2] * j + dj, k] *
+                        filter[di, dj, k, q]
+```
+
+Must have `strides[0] = strides[3] = 1`.  For the most common case of the same
+horizontal and vertices strides, `strides = [1, stride, stride, 1]`.
+strides: 1-D of length 4.  The stride of the sliding window for each dimension
+  of `input`.
+padding: The type of padding algorithm to use.
+data_format: Specify the data format of the input and output data. With the
+    default format "NHWC", the data is stored in the order of:
+        [batch, height, width, channels].
+    Alternatively, the format could be "NCHW", the data storage order of:
+        [batch, channels, height, width].
+dilations: 1-D tensor of length 4.  The dilation factor for each dimension of
+  `input`. If set to k > 1, there will be k-1 skipped cells between each filter
+  element on that dimension. The dimension order is determined by the value of
+  `data_format`, see above for details. Dilations in the batch and depth
+  dimensions must be 1.
+)doc");
 
 REGISTER_OP("DepthwiseConv2dNativeBackpropInput")
     .Input("input_sizes: int32")
@@ -483,7 +930,37 @@ REGISTER_OP("DepthwiseConv2dNativeBackpropInput")
       TF_RETURN_IF_ERROR(c->WithRank(s, 4, &s));
       c->set_output(0, s);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Computes the gradients of depthwise convolution with respect to the input.
+
+input_sizes: An integer vector representing the shape of `input`, based
+  on `data_format`.  For example, if `data_format` is 'NHWC' then
+   `input` is a 4-D `[batch, height, width, channels]` tensor.
+filter: 4-D with shape
+  `[filter_height, filter_width, in_channels, depthwise_multiplier]`.
+out_backprop: 4-D with shape  based on `data_format`.
+  For example, if `data_format` is 'NHWC' then
+  out_backprop shape is `[batch, out_height, out_width, out_channels]`.
+  Gradients w.r.t. the output of the convolution.
+strides: The stride of the sliding window for each dimension of the input
+  of the convolution.
+padding: The type of padding algorithm to use.
+data_format: Specify the data format of the input and output data. With the
+    default format "NHWC", the data is stored in the order of:
+        [batch, height, width, channels].
+    Alternatively, the format could be "NCHW", the data storage order of:
+        [batch, channels, height, width].
+dilations: 1-D tensor of length 4.  The dilation factor for each dimension of
+  `input`. If set to k > 1, there will be k-1 skipped cells between each filter
+  element on that dimension. The dimension order is determined by the value of
+  `data_format`, see above for details. Dilations in the batch and depth
+  dimensions must be 1.
+output: 4-D with shape according to `data_format`.  For example, if
+  `data_format` is 'NHWC', output shape is `[batch, in_height,
+  in_width, in_channels]`.  Gradient w.r.t. the input of the
+  convolution.
+)doc");
 
 REGISTER_OP("DepthwiseConv2dNativeBackpropFilter")
     .Input("input: T")
@@ -501,7 +978,37 @@ REGISTER_OP("DepthwiseConv2dNativeBackpropFilter")
       TF_RETURN_IF_ERROR(c->WithRank(s, 4, &s));
       c->set_output(0, s);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Computes the gradients of depthwise convolution with respect to the filter.
+
+input: 4-D with shape based on `data_format`.  For example, if
+  `data_format` is 'NHWC' then `input` is a 4-D `[batch, in_height,
+  in_width, in_channels]` tensor.
+filter_sizes: An integer vector representing the tensor shape of `filter`,
+  where `filter` is a 4-D
+  `[filter_height, filter_width, in_channels, depthwise_multiplier]` tensor.
+out_backprop: 4-D with shape  based on `data_format`.
+  For example, if `data_format` is 'NHWC' then
+  out_backprop shape is `[batch, out_height, out_width, out_channels]`.
+  Gradients w.r.t. the output of the convolution.
+strides: The stride of the sliding window for each dimension of the input
+  of the convolution.
+padding: The type of padding algorithm to use.
+data_format: Specify the data format of the input and output data. With the
+    default format "NHWC", the data is stored in the order of:
+        [batch, height, width, channels].
+    Alternatively, the format could be "NCHW", the data storage order of:
+        [batch, channels, height, width].
+dilations: 1-D tensor of length 4.  The dilation factor for each dimension of
+  `input`. If set to k > 1, there will be k-1 skipped cells between each filter
+  element on that dimension. The dimension order is determined by the value of
+  `data_format`, see above for details. Dilations in the batch and depth
+  dimensions must be 1.
+output: 4-D with shape
+  `[filter_height, filter_width, in_channels, out_channels]`.  Gradient w.r.t.
+  the `filter` input of the convolution.
+)doc");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Conv3D")
@@ -513,7 +1020,33 @@ REGISTER_OP("Conv3D")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnet3dDataFormatAttrString())
     .Attr("dilations: list(int) = [1, 1, 1, 1, 1]")
-    .SetShapeFn(shape_inference::Conv3DShape);
+    .SetShapeFn(shape_inference::Conv3DShape)
+    .Doc(R"doc(
+Computes a 3-D convolution given 5-D `input` and `filter` tensors.
+
+In signal processing, cross-correlation is a measure of similarity of
+two waveforms as a function of a time-lag applied to one of them. This
+is also known as a sliding dot product or sliding inner-product.
+
+Our Conv3D implements a form of cross-correlation.
+
+input: Shape `[batch, in_depth, in_height, in_width, in_channels]`.
+filter: Shape `[filter_depth, filter_height, filter_width, in_channels,
+  out_channels]`. `in_channels` must match between `input` and `filter`.
+strides: 1-D tensor of length 5. The stride of the sliding window for each
+  dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+padding: The type of padding algorithm to use.
+data_format: The data format of the input and output data. With the
+    default format "NDHWC", the data is stored in the order of:
+        [batch, in_depth, in_height, in_width, in_channels].
+    Alternatively, the format could be "NCDHW", the data storage order is:
+        [batch, in_channels, in_depth, in_height, in_width].
+dilations: 1-D tensor of length 5.  The dilation factor for each dimension of
+    `input`. If set to k > 1, there will be k-1 skipped cells between each
+    filter element on that dimension. The dimension order is determined by the
+    value of `data_format`, see above for details. Dilations in the batch and
+    depth dimensions must be 1.
+)doc");
 
 REGISTER_OP("Conv3DBackpropInput")
     .Input("input: T")
@@ -526,7 +1059,20 @@ REGISTER_OP("Conv3DBackpropInput")
     .Deprecated(10, "Use Conv3DBackpropInputV2")
     .SetShapeFn([](InferenceContext* c) {
       return UnchangedShapeWithRank(c, 5);
-    });
+    })
+    .Doc(R"doc(
+Computes the gradients of 3-D convolution with respect to the input.
+
+input: Shape `[batch, depth, rows, cols, in_channels]`.
+filter: Shape `[depth, rows, cols, in_channels, out_channels]`.
+  `in_channels` must match between `input` and `filter`.
+out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols,
+  out_channels]`.
+strides: 1-D tensor of length 5. The stride of the sliding window for each
+  dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+padding: The type of padding algorithm to use.
+
+)doc");
 
 REGISTER_OP("Conv3DBackpropFilter")
     .Input("input: T")
@@ -542,7 +1088,20 @@ REGISTER_OP("Conv3DBackpropFilter")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 5, &out));
       c->set_output(0, out);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Computes the gradients of 3-D convolution with respect to the filter.
+
+input: Shape `[batch, depth, rows, cols, in_channels]`.
+filter: Shape `[depth, rows, cols, in_channels, out_channels]`.
+  `in_channels` must match between `input` and `filter`.
+out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols,
+  out_channels]`.
+strides: 1-D tensor of length 5. The stride of the sliding window for each
+  dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+padding: The type of padding algorithm to use.
+
+)doc");
 
 REGISTER_OP("Conv3DBackpropInputV2")
     .Input("input_sizes: int32")
@@ -560,7 +1119,32 @@ REGISTER_OP("Conv3DBackpropInputV2")
       TF_RETURN_IF_ERROR(c->WithRank(s, 5, &s));
       c->set_output(0, s);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Computes the gradients of 3-D convolution with respect to the input.
+
+input_sizes: An integer vector representing the tensor shape of `input`,
+   where `input` is a 5-D
+   `[batch, depth, rows, cols, in_channels]` tensor.
+filter: Shape `[depth, rows, cols, in_channels, out_channels]`.
+  `in_channels` must match between `input` and `filter`.
+out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols,
+  out_channels]`.
+strides: 1-D tensor of length 5. The stride of the sliding window for each
+  dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+padding: The type of padding algorithm to use.
+data_format: The data format of the input and output data. With the
+    default format "NDHWC", the data is stored in the order of:
+        [batch, in_depth, in_height, in_width, in_channels].
+    Alternatively, the format could be "NCDHW", the data storage order is:
+        [batch, in_channels, in_depth, in_height, in_width].
+dilations: 1-D tensor of length 5.  The dilation factor for each dimension of
+    `input`. If set to k > 1, there will be k-1 skipped cells between each
+    filter element on that dimension. The dimension order is determined by the
+    value of `data_format`, see above for details. Dilations in the batch and
+    depth dimensions must be 1.
+
+)doc");
 
 REGISTER_OP("Conv3DBackpropFilterV2")
     .Input("input: T")
@@ -578,7 +1162,32 @@ REGISTER_OP("Conv3DBackpropFilterV2")
       TF_RETURN_IF_ERROR(c->WithRank(s, 5, &s));
       c->set_output(0, s);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Computes the gradients of 3-D convolution with respect to the filter.
+
+input: Shape `[batch, depth, rows, cols, in_channels]`.
+filter_sizes: An integer vector representing the tensor shape of `filter`,
+  where `filter` is a 5-D
+  `[filter_depth, filter_height, filter_width, in_channels, out_channels]`
+  tensor.
+out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols,
+  out_channels]`.
+strides: 1-D tensor of length 5. The stride of the sliding window for each
+  dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+padding: The type of padding algorithm to use.
+data_format: The data format of the input and output data. With the
+    default format "NDHWC", the data is stored in the order of:
+        [batch, in_depth, in_height, in_width, in_channels].
+    Alternatively, the format could be "NCDHW", the data storage order is:
+        [batch, in_channels, in_depth, in_height, in_width].
+dilations: 1-D tensor of length 5.  The dilation factor for each dimension of
+    `input`. If set to k > 1, there will be k-1 skipped cells between each
+    filter element on that dimension. The dimension order is determined by the
+    value of `data_format`, see above for details. Dilations in the batch and
+    depth dimensions must be 1.
+
+)doc");
 
 // --------------------------------------------------------------------------
 
@@ -590,7 +1199,23 @@ REGISTER_OP("AvgPool3D")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnet3dDataFormatAttrString())
     .Attr("T: {bfloat16, float, double}")
-    .SetShapeFn(shape_inference::Pool3DShape);
+    .SetShapeFn(shape_inference::Pool3DShape)
+    .Doc(R"doc(
+Performs 3D average pooling on the input.
+
+ksize: 1-D tensor of length 5. The size of the window for each dimension of
+  the input tensor. Must have `ksize[0] = ksize[4] = 1`.
+strides: 1-D tensor of length 5. The stride of the sliding window for each
+  dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+padding: The type of padding algorithm to use.
+input: Shape `[batch, depth, rows, cols, channels]` tensor to pool over.
+output: The average pooled output tensor.
+data_format: The data format of the input and output data. With the
+    default format "NDHWC", the data is stored in the order of:
+        [batch, in_depth, in_height, in_width, in_channels].
+    Alternatively, the format could be "NCDHW", the data storage order is:
+        [batch, in_channels, in_depth, in_height, in_width].
+)doc");
 
 REGISTER_OP("AvgPool3DGrad")
     .Input("orig_input_shape: int32")
@@ -607,7 +1232,24 @@ REGISTER_OP("AvgPool3DGrad")
       TF_RETURN_IF_ERROR(c->WithRank(s, 5, &s));
       c->set_output(0, s);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Computes gradients of average pooling function.
+
+ksize: 1-D tensor of length 5. The size of the window for each dimension of
+  the input tensor. Must have `ksize[0] = ksize[4] = 1`.
+strides: 1-D tensor of length 5. The stride of the sliding window for each
+  dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+padding: The type of padding algorithm to use.
+orig_input_shape: The original input dimensions.
+grad: Output backprop of shape `[batch, depth, rows, cols, channels]`.
+output: The backprop for input.
+data_format: The data format of the input and output data. With the
+    default format "NDHWC", the data is stored in the order of:
+        [batch, in_depth, in_height, in_width, in_channels].
+    Alternatively, the format could be "NCDHW", the data storage order is:
+        [batch, in_channels, in_depth, in_height, in_width].
+)doc");
 
 // --------------------------------------------------------------------------
 
@@ -619,7 +1261,23 @@ REGISTER_OP("MaxPool3D")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnet3dDataFormatAttrString())
     .Attr("T: {bfloat16, float}")
-    .SetShapeFn(shape_inference::Pool3DShape);
+    .SetShapeFn(shape_inference::Pool3DShape)
+    .Doc(R"doc(
+Performs 3D max pooling on the input.
+
+ksize: 1-D tensor of length 5. The size of the window for each dimension of
+  the input tensor. Must have `ksize[0] = ksize[4] = 1`.
+strides: 1-D tensor of length 5. The stride of the sliding window for each
+  dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+padding: The type of padding algorithm to use.
+input: Shape `[batch, depth, rows, cols, channels]` tensor to pool over.
+output: The max pooled output tensor.
+data_format: The data format of the input and output data. With the
+    default format "NDHWC", the data is stored in the order of:
+        [batch, in_depth, in_height, in_width, in_channels].
+    Alternatively, the format could be "NCDHW", the data storage order is:
+        [batch, in_channels, in_depth, in_height, in_width].
+)doc");
 
 REGISTER_OP("MaxPool3DGrad")
     .Input("orig_input: TInput")
@@ -634,7 +1292,24 @@ REGISTER_OP("MaxPool3DGrad")
     .Attr("TInput: {bfloat16, float} = DT_FLOAT")
     .SetShapeFn([](InferenceContext* c) {
       return UnchangedShapeWithRank(c, 5);
-    });
+    })
+    .Doc(R"doc(
+Computes gradients of max pooling function.
+
+ksize: 1-D tensor of length 5. The size of the window for each dimension of
+  the input tensor. Must have `ksize[0] = ksize[4] = 1`.
+strides: 1-D tensor of length 5. The stride of the sliding window for each
+  dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+padding: The type of padding algorithm to use.
+orig_input: The original input tensor.
+orig_output: The original output tensor.
+grad: Output backprop of shape `[batch, depth, rows, cols, channels]`.
+data_format: The data format of the input and output data. With the
+    default format "NDHWC", the data is stored in the order of:
+        [batch, in_depth, in_height, in_width, in_channels].
+    Alternatively, the format could be "NCDHW", the data storage order is:
+        [batch, in_channels, in_depth, in_height, in_width].
+)doc");
 
 REGISTER_OP("MaxPool3DGradGrad")
     .Input("orig_input: T")
@@ -654,7 +1329,25 @@ REGISTER_OP("MaxPool3DGradGrad")
       // Validate 'orig_output' is same shape as 'output'
       TF_RETURN_IF_ERROR(c->Merge(c->input(1), c->output(0), &unused));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Computes second-order gradients of the maxpooling function.
+
+ksize: 1-D tensor of length 5. The size of the window for each dimension of
+  the input tensor. Must have `ksize[0] = ksize[4] = 1`.
+strides: 1-D tensor of length 5. The stride of the sliding window for each
+  dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+padding: The type of padding algorithm to use.
+orig_input: The original input tensor.
+orig_output: The original output tensor.
+grad: Output backprop of shape `[batch, depth, rows, cols, channels]`.
+output: Gradients of gradients w.r.t. the input to `max_pool`.
+data_format: The data format of the input and output data. With the
+    default format "NDHWC", the data is stored in the order of:
+        [batch, in_depth, in_height, in_width, in_channels].
+    Alternatively, the format could be "NCDHW", the data storage order is:
+        [batch, in_channels, in_depth, in_height, in_width].
+)doc");
 
 // --------------------------------------------------------------------------
 
@@ -662,7 +1355,17 @@ REGISTER_OP("L2Loss")
     .Input("t: T")
     .Output("output: T")
     .Attr("T: {half, bfloat16, float, double}")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+L2 Loss.
+
+Computes half the L2 norm of a tensor without the `sqrt`:
+
+    output = sum(t ** 2) / 2
+
+t: Typically 2-D, but may have any dimensions.
+output: 0-D.
+)doc");
 
 // --------------------------------------------------------------------------
 
@@ -676,7 +1379,28 @@ REGISTER_OP("LRN")
     .Attr("T: {half, bfloat16, float} = DT_FLOAT")
     .SetShapeFn([](InferenceContext* c) {
       return UnchangedShapeWithRank(c, 4);
-    });
+    })
+    .Doc(R"doc(
+Local Response Normalization.
+
+The 4-D `input` tensor is treated as a 3-D array of 1-D vectors (along the last
+dimension), and each vector is normalized independently.  Within a given vector,
+each component is divided by the weighted, squared sum of inputs within
+`depth_radius`.  In detail,
+
+    sqr_sum[a, b, c, d] =
+        sum(input[a, b, c, d - depth_radius : d + depth_radius + 1] ** 2)
+    output = input / (bias + alpha * sqr_sum) ** beta
+
+For details, see [Krizhevsky et al., ImageNet classification with deep
+convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks).
+
+input: 4-D.
+depth_radius: 0-D.  Half-width of the 1-D normalization window.
+bias: An offset (usually positive to avoid dividing by 0).
+alpha: A scale factor, usually positive.
+beta: An exponent.
+)doc");
 
 REGISTER_OP("LRNGrad")
     .Input("input_grads: T")
@@ -695,7 +1419,19 @@ REGISTER_OP("LRNGrad")
       TF_RETURN_IF_ERROR(c->Merge(s, c->input(2), &s));     // output_image
       c->set_output(0, s);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Gradients for Local Response Normalization.
+
+input_grads: 4-D with shape `[batch, height, width, channels]`.
+input_image: 4-D with shape `[batch, height, width, channels]`.
+output_image: 4-D with shape `[batch, height, width, channels]`.
+depth_radius: A depth radius.
+bias: An offset (usually > 0 to avoid dividing by 0).
+alpha: A scale factor, usually positive.
+beta: An exponent.
+output: The gradients for LRN.
+)doc");
 
 // --------------------------------------------------------------------------
 
@@ -709,7 +1445,22 @@ REGISTER_OP("MaxPool")
     .Attr("data_format: {'NHWC', 'NCHW', 'NCHW_VECT_C'} = 'NHWC'")
     .Input("input: T")
     .Output("output: T")
-    .SetShapeFn(shape_inference::MaxPoolShape);
+    .SetShapeFn(shape_inference::MaxPoolShape)
+    .Doc(R"doc(
+Performs max pooling on the input.
+
+ksize: The size of the window for each dimension of the input tensor.
+strides: The stride of the sliding window for each dimension of the
+  input tensor.
+padding: The type of padding algorithm to use.
+data_format: Specify the data format of the input and output data. With the
+    default format "NHWC", the data is stored in the order of:
+        [batch, in_height, in_width, in_channels].
+    Alternatively, the format could be "NCHW", the data storage order of:
+        [batch, in_channels, in_height, in_width].
+input: 4-D input to pool over.
+output: The max pooled output tensor.
+)doc");
 
 REGISTER_OP("MaxPoolV2")
     .Attr(
@@ -724,7 +1475,22 @@ REGISTER_OP("MaxPoolV2")
     .SetShapeFn([](InferenceContext* c) {
       TF_RETURN_IF_ERROR(shape_inference::MaxPoolV2Shape(c, 3));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Performs max pooling on the input.
+
+ksize: The size of the window for each dimension of the input tensor.
+strides: The stride of the sliding window for each dimension of the
+  input tensor.
+padding: The type of padding algorithm to use.
+data_format: Specify the data format of the input and output data. With the
+    default format "NHWC", the data is stored in the order of:
+        [batch, in_height, in_width, in_channels].
+    Alternatively, the format could be "NCHW", the data storage order of:
+        [batch, in_channels, in_height, in_width].
+input: 4-D input to pool over.
+output: The max pooled output tensor.
+)doc");
 
 REGISTER_OP("MaxPoolGrad")
     .Attr("ksize: list(int) >= 4")
@@ -738,7 +1504,24 @@ REGISTER_OP("MaxPoolGrad")
     .Attr("T: realnumbertype = DT_FLOAT")
     .SetShapeFn([](InferenceContext* c) {
       return UnchangedShapeWithRank(c, 4);
-    });
+    })
+    .Doc(R"doc(
+Computes gradients of the maxpooling function.
+
+ksize: The size of the window for each dimension of the input tensor.
+strides: The stride of the sliding window for each dimension of the
+  input tensor.
+padding: The type of padding algorithm to use.
+data_format: Specify the data format of the input and output data. With the
+    default format "NHWC", the data is stored in the order of:
+        [batch, in_height, in_width, in_channels].
+    Alternatively, the format could be "NCHW", the data storage order of:
+        [batch, in_channels, in_height, in_width].
+orig_input: The original input tensor.
+orig_output: The original output tensor.
+grad: 4-D.  Gradients w.r.t. the output of `max_pool`.
+output: Gradients w.r.t. the input to `max_pool`.
+)doc");
 
 REGISTER_OP("MaxPoolGradV2")
     .Attr(GetPaddingAttrString())
@@ -752,7 +1535,24 @@ REGISTER_OP("MaxPoolGradV2")
     .Attr("T: realnumbertype = DT_FLOAT")
     .SetShapeFn([](InferenceContext* c) {
       return UnchangedShapeWithRank(c, 4);
-    });
+    })
+    .Doc(R"doc(
+Computes gradients of the maxpooling function.
+
+ksize: The size of the window for each dimension of the input tensor.
+strides: The stride of the sliding window for each dimension of the
+  input tensor.
+padding: The type of padding algorithm to use.
+data_format: Specify the data format of the input and output data. With the
+    default format "NHWC", the data is stored in the order of:
+        [batch, in_height, in_width, in_channels].
+    Alternatively, the format could be "NCHW", the data storage order of:
+        [batch, in_channels, in_height, in_width].
+orig_input: The original input tensor.
+orig_output: The original output tensor.
+grad: 4-D.  Gradients w.r.t. the output of `max_pool`.
+output: Gradients w.r.t. the input to `max_pool`.
+)doc");
 
 REGISTER_OP("MaxPoolGradGrad")
     .Attr("ksize: list(int) >= 4")
@@ -772,7 +1572,24 @@ REGISTER_OP("MaxPoolGradGrad")
       // Validate 'orig_output' is same shape as 'output'
       TF_RETURN_IF_ERROR(c->Merge(c->input(1), c->output(0), &unused));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Computes second-order gradients of the maxpooling function.
+
+ksize: The size of the window for each dimension of the input tensor.
+strides: The stride of the sliding window for each dimension of the
+  input tensor.
+padding: The type of padding algorithm to use.
+data_format: Specify the data format of the input and output data. With the
+    default format "NHWC", the data is stored in the order of:
+        [batch, in_height, in_width, in_channels].
+    Alternatively, the format could be "NCHW", the data storage order of:
+        [batch, in_channels, in_height, in_width].
+orig_input: The original input tensor.
+orig_output: The original output tensor.
+grad: 4-D.  Gradients of gradients w.r.t. the input of `max_pool`.
+output: Gradients of gradients w.r.t. the input to `max_pool`.
+)doc");
 
 REGISTER_OP("MaxPoolGradGradV2")
     .Attr(GetPaddingAttrString())
@@ -792,7 +1609,24 @@ REGISTER_OP("MaxPoolGradGradV2")
       // Validate 'orig_output' is same shape as 'output'
       TF_RETURN_IF_ERROR(c->Merge(c->input(1), c->output(0), &unused));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Computes second-order gradients of the maxpooling function.
+
+ksize: The size of the window for each dimension of the input tensor.
+strides: The stride of the sliding window for each dimension of the
+  input tensor.
+padding: The type of padding algorithm to use.
+data_format: Specify the data format of the input and output data. With the
+    default format "NHWC", the data is stored in the order of:
+        [batch, in_height, in_width, in_channels].
+    Alternatively, the format could be "NCHW", the data storage order of:
+        [batch, in_channels, in_height, in_width].
+orig_input: The original input tensor.
+orig_output: The original output tensor.
+grad: 4-D.  Gradients of gradients w.r.t. the input of `max_pool`.
+output: Gradients of gradients w.r.t. the input to `max_pool`.
+)doc");
 
 REGISTER_OP("MaxPoolWithArgmax")
     .Attr("ksize: list(int) >= 4")
@@ -807,7 +1641,27 @@ REGISTER_OP("MaxPoolWithArgmax")
       TF_RETURN_IF_ERROR(shape_inference::MaxPoolShape(c));
       c->set_output(1, c->output(0));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Performs max pooling on the input and outputs both max values and indices.
+
+The indices in `argmax` are flattened, so that a maximum value at position
+`[b, y, x, c]` becomes flattened index
+`((b * height + y) * width + x) * channels + c`.
+
+The indices returned are always in `[0, height) x [0, width)` before flattening,
+even if padding is involved and the mathematically correct answer is outside
+(either negative or too large).  This is a bug, but fixing it is difficult to do
+in a safe backwards compatible way, especially due to flattening.
+
+ksize: The size of the window for each dimension of the input tensor.
+strides: The stride of the sliding window for each dimension of the
+  input tensor.
+padding: The type of padding algorithm to use.
+input: 4-D with shape `[batch, height, width, channels]`.  Input to pool over.
+output: The max pooled output tensor.
+argmax: 4-D.  The flattened indices of the max values chosen for each output.
+)doc");
 
 REGISTER_OP("MaxPoolGradWithArgmax")
     .Attr("ksize: list(int) >= 4")
@@ -821,7 +1675,20 @@ REGISTER_OP("MaxPoolGradWithArgmax")
     .Attr("T: realnumbertype")
     .SetShapeFn([](InferenceContext* c) {
       return UnchangedShapeWithRank(c, 4);
-    });
+    })
+    .Doc(R"doc(
+Computes gradients of the maxpooling function.
+
+ksize: The size of the window for each dimension of the input tensor.
+strides: The stride of the sliding window for each dimension of the
+  input tensor.
+padding: The type of padding algorithm to use.
+input: The original input.
+grad: 4-D with shape `[batch, height, width, channels]`.  Gradients w.r.t. the
+  output of `max_pool`.
+argmax: The indices of the maximum values chosen for each output of `max_pool`.
+output: Gradients w.r.t. the input of `max_pool`.
+)doc");
 
 REGISTER_OP("MaxPoolGradGradWithArgmax")
     .Attr("ksize: list(int) >= 4")
@@ -841,7 +1708,20 @@ REGISTER_OP("MaxPoolGradGradWithArgmax")
       // Validate 'argmax' is same shape as 'output'
       TF_RETURN_IF_ERROR(c->Merge(c->input(2), c->output(0), &unused));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Computes second-order gradients of the maxpooling function.
+
+ksize: The size of the window for each dimension of the input tensor.
+strides: The stride of the sliding window for each dimension of the
+  input tensor.
+padding: The type of padding algorithm to use.
+input: The original input.
+grad: 4-D with shape `[batch, height, width, channels]`.  Gradients w.r.t. the
+  input of `max_pool`.
+argmax: The indices of the maximum values chosen for each output of `max_pool`.
+output: Gradients of gradients w.r.t. the input of `max_pool`.
+)doc");
 
 // --------------------------------------------------------------------------
 
@@ -925,7 +1805,43 @@ REGISTER_OP("Dilation2D")
           {batch_size_dim, output_rows, output_cols, output_depth_dim});
       c->set_output(0, output_shape);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Computes the grayscale dilation of 4-D `input` and 3-D `filter` tensors.
+
+The `input` tensor has shape `[batch, in_height, in_width, depth]` and the
+`filter` tensor has shape `[filter_height, filter_width, depth]`, i.e., each
+input channel is processed independently of the others with its own structuring
+function. The `output` tensor has shape
+`[batch, out_height, out_width, depth]`. The spatial dimensions of the output
+tensor depend on the `padding` algorithm. We currently only support the default
+"NHWC" `data_format`.
+
+In detail, the grayscale morphological 2-D dilation is the max-sum correlation
+(for consistency with `conv2d`, we use unmirrored filters):
+
+    output[b, y, x, c] =
+       max_{dy, dx} input[b,
+                          strides[1] * y + rates[1] * dy,
+                          strides[2] * x + rates[2] * dx,
+                          c] +
+                    filter[dy, dx, c]
+
+Max-pooling is a special case when the filter has size equal to the pooling
+kernel size and contains all zeros.
+
+Note on duality: The dilation of `input` by the `filter` is equal to the
+negation of the erosion of `-input` by the reflected `filter`.
+
+input: 4-D with shape `[batch, in_height, in_width, depth]`.
+filter: 3-D with shape `[filter_height, filter_width, depth]`.
+strides: The stride of the sliding window for each dimension of the input
+ tensor. Must be: `[1, stride_height, stride_width, 1]`.
+rates: The input stride for atrous morphological dilation. Must be:
+ `[1, rate_height, rate_width, 1]`.
+padding: The type of padding algorithm to use.
+output: 4-D with shape `[batch, out_height, out_width, depth]`.
+)doc");
 
 REGISTER_OP("Dilation2DBackpropInput")
     .Input("input: T")
@@ -936,7 +1852,20 @@ REGISTER_OP("Dilation2DBackpropInput")
     .Attr("strides: list(int) >= 4")
     .Attr("rates: list(int) >= 4")
     .Attr(GetPaddingAttrString())
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Computes the gradient of morphological 2-D dilation with respect to the input.
+
+input: 4-D with shape `[batch, in_height, in_width, depth]`.
+filter: 3-D with shape `[filter_height, filter_width, depth]`.
+out_backprop: 4-D with shape `[batch, out_height, out_width, depth]`.
+in_backprop: 4-D with shape `[batch, in_height, in_width, depth]`.
+strides: 1-D of length 4. The stride of the sliding window for each dimension of
+  the input tensor. Must be: `[1, stride_height, stride_width, 1]`.
+rates: 1-D of length 4. The input stride for atrous morphological dilation.
+  Must be: `[1, rate_height, rate_width, 1]`.
+padding: The type of padding algorithm to use.
+)doc");
 
 REGISTER_OP("Dilation2DBackpropFilter")
     .Input("input: T")
@@ -950,7 +1879,20 @@ REGISTER_OP("Dilation2DBackpropFilter")
     .SetShapeFn([](InferenceContext* c) {
       c->set_output(0, c->input(1));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Computes the gradient of morphological 2-D dilation with respect to the filter.
+
+input: 4-D with shape `[batch, in_height, in_width, depth]`.
+filter: 3-D with shape `[filter_height, filter_width, depth]`.
+out_backprop: 4-D with shape `[batch, out_height, out_width, depth]`.
+filter_backprop: 3-D with shape `[filter_height, filter_width, depth]`.
+strides: 1-D of length 4. The stride of the sliding window for each dimension of
+  the input tensor. Must be: `[1, stride_height, stride_width, 1]`.
+rates: 1-D of length 4. The input stride for atrous morphological dilation.
+  Must be: `[1, rate_height, rate_width, 1]`.
+padding: The type of padding algorithm to use.
+)doc");
 
 // --------------------------------------------------------------------------
 
@@ -958,79 +1900,150 @@ REGISTER_OP("Relu")
     .Input("features: T")
     .Output("activations: T")
     .Attr("T: realnumbertype")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Computes rectified linear: `max(features, 0)`.
+)doc");
 
 REGISTER_OP("ReluGrad")
     .Input("gradients: T")
     .Input("features: T")
     .Output("backprops: T")
     .Attr("T: realnumbertype")
-    .SetShapeFn(shape_inference::MergeBothInputsShapeFn);
+    .SetShapeFn(shape_inference::MergeBothInputsShapeFn)
+    .Doc(R"doc(
+Computes rectified linear gradients for a Relu operation.
+
+gradients: The backpropagated gradients to the corresponding Relu operation.
+features: The features passed as input to the corresponding Relu operation, OR
+  the outputs of that operation (both work equivalently).
+backprops: `gradients * (features > 0)`.
+)doc");
 
 REGISTER_OP("Relu6")
     .Input("features: T")
     .Output("activations: T")
     .Attr("T: realnumbertype")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Computes rectified linear 6: `min(max(features, 0), 6)`.
+)doc");
 
 REGISTER_OP("Relu6Grad")
     .Input("gradients: T")
     .Input("features: T")
     .Output("backprops: T")
     .Attr("T: realnumbertype")
-    .SetShapeFn(shape_inference::MergeBothInputsShapeFn);
+    .SetShapeFn(shape_inference::MergeBothInputsShapeFn)
+    .Doc(R"doc(
+Computes rectified linear 6 gradients for a Relu6 operation.
+
+gradients: The backpropagated gradients to the corresponding Relu6 operation.
+features: The features passed as input to the corresponding Relu6 operation, or
+  its output; using either one produces the same result.
+backprops: The gradients:
+  `gradients * (features > 0) * (features < 6)`.
+)doc");
 
 REGISTER_OP("Elu")
     .Input("features: T")
     .Output("activations: T")
     .Attr("T: {half, bfloat16, float, double}")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Computes exponential linear: `exp(features) - 1` if < 0, `features` otherwise.
+
+See [Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs)
+](http://arxiv.org/abs/1511.07289)
+)doc");
 
 REGISTER_OP("EluGrad")
     .Input("gradients: T")
     .Input("outputs: T")
     .Output("backprops: T")
     .Attr("T: {half, bfloat16, float, double}")
-    .SetShapeFn(shape_inference::MergeBothInputsShapeFn);
+    .SetShapeFn(shape_inference::MergeBothInputsShapeFn)
+    .Doc(R"doc(
+Computes gradients for the exponential linear (Elu) operation.
+
+gradients: The backpropagated gradients to the corresponding Elu operation.
+outputs: The outputs of the corresponding Elu operation.
+backprops: The gradients: `gradients * (outputs + 1)` if outputs < 0,
+`gradients` otherwise.
+)doc");
 
 REGISTER_OP("Selu")
     .Input("features: T")
     .Output("activations: T")
     .Attr("T: {half, bfloat16, float, double}")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Computes scaled exponential linear: `scale * alpha * (exp(features) - 1)`
+if < 0, `scale * features` otherwise.
+
+See [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515)
+)doc");
 
 REGISTER_OP("SeluGrad")
     .Input("gradients: T")
     .Input("outputs: T")
     .Output("backprops: T")
     .Attr("T: {half, bfloat16, float, double}")
-    .SetShapeFn(shape_inference::MergeBothInputsShapeFn);
+    .SetShapeFn(shape_inference::MergeBothInputsShapeFn)
+    .Doc(R"doc(
+Computes gradients for the scaled exponential linear (Selu) operation.
+
+gradients: The backpropagated gradients to the corresponding Selu operation.
+outputs: The outputs of the corresponding Selu operation.
+backprops: The gradients: `gradients * (outputs + scale * alpha)`
+if outputs < 0, `scale * gradients` otherwise.
+)doc");
 
 REGISTER_OP("Softplus")
     .Input("features: T")
     .Output("activations: T")
     .Attr("T: realnumbertype")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Computes softplus: `log(exp(features) + 1)`.
+)doc");
 
 REGISTER_OP("SoftplusGrad")
     .Input("gradients: T")
     .Input("features: T")
     .Output("backprops: T")
     .Attr("T: realnumbertype")
-    .SetShapeFn(shape_inference::MergeBothInputsShapeFn);
+    .SetShapeFn(shape_inference::MergeBothInputsShapeFn)
+    .Doc(R"doc(
+Computes softplus gradients for a softplus operation.
+
+gradients: The backpropagated gradients to the corresponding softplus operation.
+features: The features passed as input to the corresponding softplus operation.
+backprops: The gradients: `gradients / (1 + exp(-features))`.
+)doc");
 
 REGISTER_OP("Softsign")
     .Input("features: T")
     .Output("activations: T")
     .Attr("T: realnumbertype")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Computes softsign: `features / (abs(features) + 1)`.
+)doc");
 
 REGISTER_OP("SoftsignGrad")
     .Input("gradients: T")
     .Input("features: T")
     .Output("backprops: T")
     .Attr("T: realnumbertype")
-    .SetShapeFn(shape_inference::MergeBothInputsShapeFn);
+    .SetShapeFn(shape_inference::MergeBothInputsShapeFn)
+    .Doc(R"doc(
+Computes softsign gradients for a softsign operation.
+
+gradients: The backpropagated gradients to the corresponding softsign operation.
+features: The features passed as input to the corresponding softsign operation.
+backprops: The gradients: `gradients / (1 + abs(features)) ** 2`.
+)doc");
 
 // --------------------------------------------------------------------------
 
@@ -1040,7 +2053,17 @@ REGISTER_OP("Softmax")
     .Attr("T: {half, bfloat16, float, double}")
     .SetShapeFn([](InferenceContext* c) {
       return shape_inference::UnchangedShapeWithRankAtLeast(c, 1);
-    });
+    })
+    .Doc(R"doc(
+Computes softmax activations.
+
+For each batch `i` and class `j` we have
+
+    softmax[i, j] = exp(logits[i, j]) / sum_j(exp(logits[i, j]))
+
+logits: 2-D with shape `[batch_size, num_classes]`.
+softmax: Same shape as `logits`.
+)doc");
 
 // --------------------------------------------------------------------------
 
@@ -1050,7 +2073,17 @@ REGISTER_OP("LogSoftmax")
     .Attr("T: {half, bfloat16, float, double}")
     .SetShapeFn([](InferenceContext* c) {
       return shape_inference::UnchangedShapeWithRankAtLeast(c, 1);
-    });
+    })
+    .Doc(R"doc(
+Computes log softmax activations.
+
+For each batch `i` and class `j` we have
+
+    logsoftmax[i, j] = logits[i, j] - log(sum(exp(logits[i])))
+
+logits: 2-D with shape `[batch_size, num_classes]`.
+logsoftmax: Same shape as `logits`.
+)doc");
 
 // --------------------------------------------------------------------------
 
@@ -1069,7 +2102,19 @@ REGISTER_OP("SoftmaxCrossEntropyWithLogits")
       c->set_output(0, c->Vector(batch_size));
       c->set_output(1, input);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Computes softmax cross entropy cost and gradients to backpropagate.
+
+Inputs are the logits, not probabilities.
+
+features: batch_size x num_classes matrix
+labels: batch_size x num_classes matrix
+  The caller must ensure that each batch of labels represents a valid
+  probability distribution.
+loss: Per example loss (batch_size vector).
+backprop: backpropagated gradients (batch_size x num_classes matrix).
+)doc");
 
 REGISTER_OP("SparseSoftmaxCrossEntropyWithLogits")
     .Input("features: T")
@@ -1092,7 +2137,23 @@ REGISTER_OP("SparseSoftmaxCrossEntropyWithLogits")
       c->set_output(0, c->Vector(batch_size));
       c->set_output(1, features);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Computes softmax cross entropy cost and gradients to backpropagate.
+
+Unlike `SoftmaxCrossEntropyWithLogits`, this operation does not accept
+a matrix of label probabilities, but rather a single label per row
+of features.  This label is considered to have probability 1.0 for the
+given row.
+
+Inputs are the logits, not probabilities.
+
+features: batch_size x num_classes matrix
+labels: batch_size vector with values in [0, num_classes).
+  This is the label for the given minibatch entry.
+loss: Per example loss (batch_size vector).
+backprop: backpropagated gradients (batch_size x num_classes matrix).
+)doc");
 
 // --------------------------------------------------------------------------
 
@@ -1112,7 +2173,31 @@ REGISTER_OP("InTopK")
           c->Merge(c->Dim(predictions, 0), c->Dim(targets, 0), &batch_size));
       c->set_output(0, c->Vector(batch_size));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Says whether the targets are in the top `K` predictions.
+
+This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the
+prediction for the target class is among the top `k` predictions among
+all predictions for example `i`. Note that the behavior of `InTopK` differs
+from the `TopK` op in its handling of ties; if multiple classes have the
+same prediction value and straddle the top-`k` boundary, all of those
+classes are considered to be in the top `k`.
+
+More formally, let
+
+  \\(predictions_i\\) be the predictions for all classes for example `i`,
+  \\(targets_i\\) be the target class for example `i`,
+  \\(out_i\\) be the output for example `i`,
+
+$$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$
+
+predictions: A `batch_size` x `classes` tensor.
+targets: A `batch_size` vector of class ids.
+k: Number of top elements to look at for computing precision.
+precision: Computed Precision at `k` as a `bool Tensor`.
+
+)doc");
 
 // This is the same as `InTopK`, but takes `k` as in input rather than an attr.
 REGISTER_OP("InTopKV2")
@@ -1131,7 +2216,31 @@ REGISTER_OP("InTopKV2")
           c->Merge(c->Dim(predictions, 0), c->Dim(targets, 0), &batch_size));
       c->set_output(0, c->Vector(batch_size));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Says whether the targets are in the top `K` predictions.
+
+This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the
+prediction for the target class is among the top `k` predictions among
+all predictions for example `i`. Note that the behavior of `InTopK` differs
+from the `TopK` op in its handling of ties; if multiple classes have the
+same prediction value and straddle the top-`k` boundary, all of those
+classes are considered to be in the top `k`.
+
+More formally, let
+
+  \\(predictions_i\\) be the predictions for all classes for example `i`,
+  \\(targets_i\\) be the target class for example `i`,
+  \\(out_i\\) be the output for example `i`,
+
+$$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$
+
+predictions: A `batch_size` x `classes` tensor.
+targets: A `batch_size` vector of class ids.
+k: Number of top elements to look at for computing precision.
+precision: Computed precision at `k` as a `bool Tensor`.
+
+)doc");
 
 namespace {
 
@@ -1179,7 +2288,31 @@ REGISTER_OP("TopK")
     .Attr("sorted: bool = true")
     .Attr("T: realnumbertype")
     .Deprecated(7, "Use TopKV2 instead")
-    .SetShapeFn(TopKShapeFn);
+    .SetShapeFn(TopKShapeFn)
+    .Doc(R"doc(
+Finds values and indices of the `k` largest elements for the last dimension.
+
+If the input is a vector (rank-1), finds the `k` largest entries in the vector
+and outputs their values and indices as vectors.  Thus `values[j]` is the
+`j`-th largest entry in `input`, and its index is `indices[j]`.
+
+For matrices (resp. higher rank input), computes the top `k` entries in each
+row (resp. vector along the last dimension).  Thus,
+
+    values.shape = indices.shape = input.shape[:-1] + [k]
+
+If two elements are equal, the lower-index element appears first.
+
+If `k` varies dynamically, use `TopKV2` below.
+
+input: 1-D or higher with last dimension at least `k`.
+k: Number of top elements to look for along the last dimension (along each
+  row for matrices).
+sorted: If true the resulting `k` elements will be sorted by the values in
+  descending order.
+values: The `k` largest elements along each last dimensional slice.
+indices: The indices of `values` within the last dimension of `input`.
+)doc");
 
 // This is the same as `TopK`, but takes `k` as in input rather than an attr.
 REGISTER_OP("TopKV2")
@@ -1189,7 +2322,29 @@ REGISTER_OP("TopKV2")
     .Output("indices: int32")
     .Attr("sorted: bool = true")
     .Attr("T: realnumbertype")
-    .SetShapeFn(TopKShapeFn);
+    .SetShapeFn(TopKShapeFn)
+    .Doc(R"doc(
+Finds values and indices of the `k` largest elements for the last dimension.
+
+If the input is a vector (rank-1), finds the `k` largest entries in the vector
+and outputs their values and indices as vectors.  Thus `values[j]` is the
+`j`-th largest entry in `input`, and its index is `indices[j]`.
+
+For matrices (resp. higher rank input), computes the top `k` entries in each
+row (resp. vector along the last dimension).  Thus,
+
+    values.shape = indices.shape = input.shape[:-1] + [k]
+
+If two elements are equal, the lower-index element appears first.
+
+input: 1-D or higher with last dimension at least `k`.
+k: 0-D.  Number of top elements to look for along the last dimension (along each
+  row for matrices).
+sorted: If true the resulting `k` elements will be sorted by the values in
+  descending order.
+values: The `k` largest elements along each last dimensional slice.
+indices: The indices of `values` within the last dimension of `input`.
+)doc");
 
 // --------------------------------------------------------------------------
 
@@ -1221,7 +2376,25 @@ REGISTER_OP("NthElement")
       TF_RETURN_IF_ERROR(c->Subshape(input, 0, -1, &s));
       c->set_output(0, s);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Finds values of the `n`-th order statistic for the last dimension.
+
+If the input is a vector (rank-1), finds the entries which is the nth-smallest
+value in the vector and outputs their values as scalar tensor.
+
+For matrices (resp. higher rank input), computes the entries which is the
+nth-smallest value in each row (resp. vector along the last dimension). Thus,
+
+    values.shape = input.shape[:-1]
+
+input: 1-D or higher with last dimension at least `n+1`.
+n: 0-D. Position of sorted vector to select along the last dimension (along
+  each row for matrices). Valid range of n is `[0, input.shape[:-1])`
+reverse: When set to True, find the nth-largest value in the vector and vice
+  versa.
+values: The `n`-th order statistic along each last dimensional slice.
+)doc");
 
 // --------------------------------------------------------------------------
 
@@ -1237,7 +2410,70 @@ REGISTER_OP("FractionalMaxPool")
     .Attr("seed: int = 0")
     .Attr("seed2: int = 0")
     .Attr("T: {float, double, int32, int64}")
-    .SetShapeFn(FractionalPoolShapeFn);
+    .SetShapeFn(FractionalPoolShapeFn)
+    .Doc(R"doc(
+Performs fractional max pooling on the input.
+
+Fractional max pooling is slightly different than regular max pooling.  In
+regular max pooling, you downsize an input set by taking the maximum value of
+smaller N x N subsections of the set (often 2x2), and try to reduce the set by
+a factor of N, where N is an integer.  Fractional max pooling, as you might
+expect from the word "fractional", means that the overall reduction ratio N
+does not have to be an integer.
+
+The sizes of the pooling regions are generated randomly but are fairly uniform.
+For example, let's look at the height dimension, and the constraints on the
+list of rows that will be pool boundaries.
+
+First we define the following:
+
+1.  input_row_length : the number of rows from the input set
+2.  output_row_length : which will be smaller than the input
+3.  alpha = input_row_length / output_row_length : our reduction ratio
+4.  K = floor(alpha)
+5.  row_pooling_sequence : this is the result list of pool boundary rows
+
+Then, row_pooling_sequence should satisfy:
+
+1.  a[0] = 0 : the first value of the sequence is 0
+2.  a[end] = input_row_length : the last value of the sequence is the size
+3.  K <= (a[i+1] - a[i]) <= K+1 : all intervals are K or K+1 size
+4.  length(row_pooling_sequence) = output_row_length+1
+
+For more details on fractional max pooling, see this paper:
+[Benjamin Graham, Fractional Max-Pooling](http://arxiv.org/abs/1412.6071)
+
+value: 4-D with shape `[batch, height, width, channels]`.
+pooling_ratio: Pooling ratio for each dimension of `value`, currently only
+  supports row and col dimension and should be >= 1.0. For example, a valid
+  pooling ratio looks like [1.0, 1.44, 1.73, 1.0]. The first and last elements
+  must be 1.0 because we don't allow pooling on batch and channels
+  dimensions. 1.44 and 1.73 are pooling ratio on height and width dimensions
+  respectively.
+pseudo_random: When set to True, generates the pooling sequence in a
+  pseudorandom fashion, otherwise, in a random fashion. Check paper [Benjamin
+  Graham, Fractional Max-Pooling](http://arxiv.org/abs/1412.6071) for
+  difference between pseudorandom and random.
+overlapping: When set to True, it means when pooling, the values at the boundary
+  of adjacent pooling cells are used by both cells. For example:
+
+  `index  0  1  2  3  4`
+
+  `value  20 5  16 3  7`
+
+  If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.
+  The result would be [20, 16] for fractional max pooling.
+deterministic: When set to True, a fixed pooling region will be used when
+  iterating over a FractionalMaxPool node in the computation graph. Mainly used
+  in unit test to make FractionalMaxPool deterministic.
+seed: If either seed or seed2 are set to be non-zero, the random number
+  generator is seeded by the given seed.  Otherwise, it is seeded by a
+  random seed.
+seed2: An second seed to avoid seed collision.
+output: output tensor after fractional max pooling.
+row_pooling_sequence: row pooling sequence, needed to calculate gradient.
+col_pooling_sequence: column pooling sequence, needed to calculate gradient.
+)doc");
 
 REGISTER_OP("FractionalMaxPoolGrad")
     .Input("orig_input: T")
@@ -1250,7 +2486,29 @@ REGISTER_OP("FractionalMaxPoolGrad")
     .Attr("T: {float, double, int32, int64}")
     .SetShapeFn([](InferenceContext* c) {
       return shape_inference::UnchangedShapeWithRank(c, 4);
-    });
+    })
+    .Doc(R"doc(
+Computes gradient of the FractionalMaxPool function.
+
+orig_input: Original input for `fractional_max_pool`
+orig_output: Original output for `fractional_max_pool`
+out_backprop: 4-D with shape `[batch, height, width, channels]`.  Gradients
+  w.r.t. the output of `fractional_max_pool`.
+row_pooling_sequence: row pooling sequence, form pooling region with
+  col_pooling_sequence.
+col_pooling_sequence: column pooling sequence, form pooling region with
+  row_pooling sequence.
+overlapping: When set to True, it means when pooling, the values at the boundary
+  of adjacent pooling cells are used by both cells. For example:
+
+  `index  0  1  2  3  4`
+
+  `value  20 5  16 3  7`
+
+  If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.
+  The result would be [20, 16] for fractional max pooling.
+output: 4-D.  Gradients w.r.t. the input of `fractional_max_pool`.
+)doc");
 
 // --------------------------------------------------------------------------
 
@@ -1266,7 +2524,46 @@ REGISTER_OP("FractionalAvgPool")
     .Attr("seed: int = 0")
     .Attr("seed2: int = 0")
     .Attr("T: {float, double, int32, int64}")
-    .SetShapeFn(FractionalPoolShapeFn);
+    .SetShapeFn(FractionalPoolShapeFn)
+    .Doc(R"doc(
+Performs fractional average pooling on the input.
+
+Fractional average pooling is similar to Fractional max pooling in the pooling
+region generation step. The only difference is that after pooling regions are
+generated, a mean operation is performed instead of a max operation in each
+pooling region.
+
+value: 4-D with shape `[batch, height, width, channels]`.
+pooling_ratio: Pooling ratio for each dimension of `value`, currently only
+  supports row and col dimension and should be >= 1.0. For example, a valid
+  pooling ratio looks like [1.0, 1.44, 1.73, 1.0]. The first and last elements
+  must be 1.0 because we don't allow pooling on batch and channels
+  dimensions. 1.44 and 1.73 are pooling ratio on height and width dimensions
+  respectively.
+pseudo_random: When set to True, generates the pooling sequence in a
+  pseudorandom fashion, otherwise, in a random fashion. Check paper [Benjamin
+  Graham, Fractional Max-Pooling](http://arxiv.org/abs/1412.6071) for
+  difference between pseudorandom and random.
+overlapping: When set to True, it means when pooling, the values at the boundary
+  of adjacent pooling cells are used by both cells. For example:
+
+  `index  0  1  2  3  4`
+
+  `value  20 5  16 3  7`
+
+  If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.
+  The result would be [41/3, 26/3] for fractional avg pooling.
+deterministic: When set to True, a fixed pooling region will be used when
+  iterating over a FractionalAvgPool node in the computation graph. Mainly used
+  in unit test to make FractionalAvgPool deterministic.
+seed: If either seed or seed2 are set to be non-zero, the random number
+  generator is seeded by the given seed.  Otherwise, it is seeded by a
+  random seed.
+seed2: An second seed to avoid seed collision.
+output: output tensor after fractional avg pooling.
+row_pooling_sequence: row pooling sequence, needed to calculate gradient.
+col_pooling_sequence: column pooling sequence, needed to calculate gradient.
+)doc");
 
 REGISTER_OP("FractionalAvgPoolGrad")
     .Input("orig_input_tensor_shape: int64")
@@ -1285,7 +2582,34 @@ REGISTER_OP("FractionalAvgPoolGrad")
         c->set_output(0, c->UnknownShapeOfRank(4));
       }
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Computes gradient of the FractionalAvgPool function.
+
+Unlike FractionalMaxPoolGrad, we don't need to find arg_max for
+FractionalAvgPoolGrad, we just need to evenly back-propagate each element of
+out_backprop to those indices that form the same pooling cell. Therefore, we
+just need to know the shape of original input tensor, instead of the whole
+tensor.
+
+orig_input_tensor_shape: Original input tensor shape for `fractional_avg_pool`
+out_backprop: 4-D with shape `[batch, height, width, channels]`.  Gradients
+  w.r.t. the output of `fractional_avg_pool`.
+row_pooling_sequence: row pooling sequence, form pooling region with
+  col_pooling_sequence.
+col_pooling_sequence: column pooling sequence, form pooling region with
+  row_pooling sequence.
+overlapping: When set to True, it means when pooling, the values at the boundary
+  of adjacent pooling cells are used by both cells. For example:
+
+  `index  0  1  2  3  4`
+
+  `value  20 5  16 3  7`
+
+  If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.
+  The result would be [41/3, 26/3] for fractional avg pooling.
+output: 4-D.  Gradients w.r.t. the input of `fractional_avg_pool`.
+)doc");
 
 REGISTER_OP("QuantizedAvgPool")
     .Input("input: T")
@@ -1306,7 +2630,22 @@ REGISTER_OP("QuantizedAvgPool")
       c->set_output(1, c->Scalar());
       c->set_output(2, c->Scalar());
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Produces the average pool of the input tensor for quantized types.
+
+input: 4-D with shape `[batch, height, width, channels]`.
+ksize: The size of the window for each dimension of the input tensor.
+  The length must be 4 to match the number of dimensions of the input.
+strides: The stride of the sliding window for each dimension of the input
+  tensor.  The length must be 4 to match the number of dimensions of the input.
+padding: The type of padding algorithm to use.
+min_input: The float value that the lowest quantized input value represents.
+max_input: The float value that the highest quantized input value represents.
+min_output: The float value that the lowest quantized output value represents.
+max_output: The float value that the highest quantized output value represents.
+
+)doc");
 
 REGISTER_OP("QuantizedBiasAdd")
     .Input("input: T1")
@@ -1331,7 +2670,21 @@ REGISTER_OP("QuantizedBiasAdd")
       c->set_output(1, c->Scalar());
       c->set_output(2, c->Scalar());
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Adds Tensor 'bias' to Tensor 'input' for Quantized types.
+
+Broadcasts the values of bias on dimensions 0..N-2 of 'input'.
+
+bias: A 1D bias Tensor with size matching the last dimension of 'input'.
+min_input: The float value that the lowest quantized input value represents.
+max_input: The float value that the highest quantized input value represents.
+min_bias: The float value that the lowest quantized bias value represents.
+max_bias: The float value that the highest quantized bias value represents.
+min_out: The float value that the lowest quantized output value represents.
+max_out: The float value that the highest quantized output value represents.
+
+)doc");
 
 REGISTER_OP("QuantizedConv2D")
     .Input("input: Tinput")
@@ -1359,7 +2712,30 @@ REGISTER_OP("QuantizedConv2D")
       c->set_output(1, c->Scalar());
       c->set_output(2, c->Scalar());
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Computes a 2D convolution given quantized 4D input and filter tensors.
+The inputs are quantized tensors where the lowest value represents the real
+number of the associated minimum, and the highest represents the maximum.
+This means that you can only interpret the quantized output in the same way, by
+taking the returned minimum and maximum values into account.
+
+filter: filter's input_depth dimension must match input's depth dimensions.
+strides: The stride of the sliding window for each dimension of the input
+  tensor.
+padding: The type of padding algorithm to use.
+min_input: The float value that the lowest quantized input value represents.
+max_input: The float value that the highest quantized input value represents.
+min_filter: The float value that the lowest quantized filter value represents.
+max_filter: The float value that the highest quantized filter value represents.
+min_output: The float value that the lowest quantized output value represents.
+max_output: The float value that the highest quantized output value represents.
+dilations: 1-D tensor of length 4.  The dilation factor for each dimension of
+    `input`. If set to k > 1, there will be k-1 skipped cells between each
+    filter element on that dimension. The dimension order is determined by the
+    value of `data_format`, see above for details. Dilations in the batch and
+    depth dimensions must be 1.
+)doc");
 
 REGISTER_OP("QuantizedMaxPool")
     .Input("input: T")
@@ -1380,7 +2756,22 @@ REGISTER_OP("QuantizedMaxPool")
       c->set_output(1, c->Scalar());
       c->set_output(2, c->Scalar());
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Produces the max pool of the input tensor for quantized types.
+
+input: The 4D (batch x rows x cols x depth) Tensor to MaxReduce over.
+ksize: The size of the window for each dimension of the input tensor.
+  The length must be 4 to match the number of dimensions of the input.
+strides: The stride of the sliding window for each dimension of the input
+  tensor. The length must be 4 to match the number of dimensions of the input.
+padding: The type of padding algorithm to use.
+min_input: The float value that the lowest quantized input value represents.
+max_input: The float value that the highest quantized input value represents.
+min_output: The float value that the lowest quantized output value represents.
+max_output: The float value that the highest quantized output value represents.
+
+)doc");
 
 REGISTER_OP("QuantizedRelu")
     .Input("features: Tinput")
@@ -1399,7 +2790,17 @@ REGISTER_OP("QuantizedRelu")
       c->set_output(1, c->Scalar());
       c->set_output(2, c->Scalar());
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Computes Quantized Rectified Linear: `max(features, 0)`
+
+activations: Has the same output shape as "features".
+min_features: The float value that the lowest quantized value represents.
+max_features: The float value that the highest quantized value represents.
+min_activations: The float value that the lowest quantized value represents.
+max_activations: The float value that the highest quantized value represents.
+
+)doc");
 
 REGISTER_OP("QuantizedRelu6")
     .Input("features: Tinput")
@@ -1418,7 +2819,17 @@ REGISTER_OP("QuantizedRelu6")
       c->set_output(1, c->Scalar());
       c->set_output(2, c->Scalar());
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Computes Quantized Rectified Linear 6: `min(max(features, 0), 6)`
+
+activations: Has the same output shape as "features".
+min_features: The float value that the lowest quantized value represents.
+max_features: The float value that the highest quantized value represents.
+min_activations: The float value that the lowest quantized value represents.
+max_activations: The float value that the highest quantized value represents.
+
+)doc");
 
 REGISTER_OP("QuantizedReluX")
     .Input("features: Tinput")
@@ -1438,7 +2849,17 @@ REGISTER_OP("QuantizedReluX")
       c->set_output(1, c->Scalar());
       c->set_output(2, c->Scalar());
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Computes Quantized Rectified Linear X: `min(max(features, 0), max_value)`
+
+activations: Has the same output shape as "features".
+min_features: The float value that the lowest quantized value represents.
+max_features: The float value that the highest quantized value represents.
+min_activations: The float value that the lowest quantized value represents.
+max_activations: The float value that the highest quantized value represents.
+
+)doc");
 
 REGISTER_OP("QuantizedBatchNormWithGlobalNormalization")
     .Input("t: Tinput")
@@ -1481,7 +2902,39 @@ REGISTER_OP("QuantizedBatchNormWithGlobalNormalization")
       c->set_output(2, c->Scalar());
 
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Quantized Batch normalization.
+
+This op is deprecated and will be removed in the future. Prefer
+`tf.nn.batch_normalization`.
+
+t: A 4D input Tensor.
+t_min: The value represented by the lowest quantized input.
+t_max: The value represented by the highest quantized input.
+m: A 1D mean Tensor with size matching the last dimension of t.
+  This is the first output from tf.nn.moments,
+  or a saved moving average thereof.
+m_min: The value represented by the lowest quantized mean.
+m_max: The value represented by the highest quantized mean.
+v: A 1D variance Tensor with size matching the last dimension of t.
+  This is the second output from tf.nn.moments,
+  or a saved moving average thereof.
+v_min: The value represented by the lowest quantized variance.
+v_max: The value represented by the highest quantized variance.
+beta: A 1D beta Tensor with size matching the last dimension of t.
+  An offset to be added to the normalized tensor.
+beta_min: The value represented by the lowest quantized offset.
+beta_max: The value represented by the highest quantized offset.
+gamma: A 1D gamma Tensor with size matching the last dimension of t.
+  If "scale_after_normalization" is true, this tensor will be multiplied
+  with the normalized tensor.
+gamma_min: The value represented by the lowest quantized gamma.
+gamma_max: The value represented by the highest quantized gamma.
+variance_epsilon: A small float number to avoid dividing by 0.
+scale_after_normalization: A bool indicating whether the resulted tensor
+  needs to be multiplied with gamma.
+)doc");
 
 #ifdef INTEL_MKL
 REGISTER_OP("_MklConv2D")
diff --git a/tensorflow/core/ops/no_op.cc b/tensorflow/core/ops/no_op.cc
index 560e9e8dae..e62353bb7f 100644
--- a/tensorflow/core/ops/no_op.cc
+++ b/tensorflow/core/ops/no_op.cc
@@ -18,6 +18,8 @@ limitations under the License.
 
 namespace tensorflow {
 
-REGISTER_OP("NoOp").SetShapeFn(shape_inference::NoOutputs);
+REGISTER_OP("NoOp")
+    .SetShapeFn(shape_inference::NoOutputs)
+    .Doc("Does nothing. Only useful as a placeholder for control edges.");
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/parsing_ops.cc b/tensorflow/core/ops/parsing_ops.cc
index ddd2aa9274..c7c6ff5b35 100644
--- a/tensorflow/core/ops/parsing_ops.cc
+++ b/tensorflow/core/ops/parsing_ops.cc
@@ -35,13 +35,40 @@ REGISTER_OP("DecodeRaw")
           c->input(0), c->Vector(InferenceContext::kUnknownDim), &out));
       c->set_output(0, out);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Reinterpret the bytes of a string as a vector of numbers.
+
+bytes: All the elements must have the same length.
+little_endian: Whether the input `bytes` are in little-endian order.
+  Ignored for `out_type` values that are stored in a single byte like
+  `uint8`.
+output: A Tensor with one more dimension than the input `bytes`.  The
+  added dimension will have size equal to the length of the elements
+  of `bytes` divided by the number of bytes to represent `out_type`.
+)doc");
 
 REGISTER_OP("DecodeCompressed")
     .Input("bytes: string")
     .Output("output: string")
     .Attr("compression_type: string = ''")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Decompress strings. 
+
+This op decompresses each element of the `bytes` input `Tensor`, which
+is assumed to be compressed using the given `compression_type`. 
+
+The `output` is a string `Tensor` of the same shape as `bytes`, 
+each element containing the decompressed data from the corresponding
+element in `bytes`.
+
+bytes: A Tensor of string which is compressed.
+output: A Tensor with the same shape as input `bytes`, uncompressed
+  from bytes.
+compression_type: A scalar containing either (i) the empty string (no
+  compression), (ii) "ZLIB", or (iii) "GZIP".
+)doc");
 
 REGISTER_OP("ParseExample")
     .Input("serialized: string")
@@ -88,7 +115,50 @@ REGISTER_OP("ParseExample")
         c->set_output(output_idx++, dense);
       }
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Transforms a vector of brain.Example protos (as strings) into typed tensors.
+
+serialized: A vector containing a batch of binary serialized Example protos.
+names: A vector containing the names of the serialized protos.
+  May contain, for example, table key (descriptive) names for the
+  corresponding serialized protos.  These are purely useful for debugging
+  purposes, and the presence of values here has no effect on the output.
+  May also be an empty vector if no names are available.
+  If non-empty, this vector must be the same length as "serialized".
+dense_keys: A list of Ndense string Tensors (scalars).
+  The keys expected in the Examples' features associated with dense values.
+dense_defaults: A list of Ndense Tensors (some may be empty).
+  dense_defaults[j] provides default values
+  when the example's feature_map lacks dense_key[j].  If an empty Tensor is
+  provided for dense_defaults[j], then the Feature dense_keys[j] is required.
+  The input type is inferred from dense_defaults[j], even when it's empty.
+  If dense_defaults[j] is not empty, and dense_shapes[j] is fully defined,
+  then the shape of dense_defaults[j] must match that of dense_shapes[j].
+  If dense_shapes[j] has an undefined major dimension (variable strides dense
+  feature), dense_defaults[j] must contain a single element:
+  the padding element.
+dense_shapes: A list of Ndense shapes; the shapes of data in each Feature
+  given in dense_keys.
+  The number of elements in the Feature corresponding to dense_key[j]
+  must always equal dense_shapes[j].NumEntries().
+  If dense_shapes[j] == (D0, D1, ..., DN) then the shape of output
+  Tensor dense_values[j] will be (|serialized|, D0, D1, ..., DN):
+  The dense outputs are just the inputs row-stacked by batch.
+  This works for dense_shapes[j] = (-1, D1, ..., DN).  In this case
+  the shape of the output Tensor dense_values[j] will be
+  (|serialized|, M, D1, .., DN), where M is the maximum number of blocks
+  of elements of length D1 * .... * DN, across all minibatch entries
+  in the input.  Any minibatch entry with less than M blocks of elements of
+  length D1 * ... * DN will be padded with the corresponding default_value
+  scalar element along the second dimension.
+sparse_keys: A list of Nsparse string Tensors (scalars).
+  The keys expected in the Examples' features associated with sparse values.
+sparse_types: A list of Nsparse types; the data types of data in each Feature
+  given in sparse_keys.
+  Currently the ParseExample supports DT_FLOAT (FloatList),
+  DT_INT64 (Int64List), and DT_STRING (BytesList).
+)doc");
 
 REGISTER_OP("ParseSingleExample")
     .Input("serialized: string")
@@ -130,7 +200,45 @@ REGISTER_OP("ParseSingleExample")
         c->set_output(output_idx++, dense);
       }
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Transforms a tf.Example proto (as a string) into typed tensors.
+
+serialized: A vector containing a batch of binary serialized Example protos.
+dense_keys: The keys expected in the Examples' features associated with dense
+  values.
+dense_defaults: A list of Tensors (some may be empty), whose length matches
+  the length of `dense_keys`. dense_defaults[j] provides default values
+  when the example's feature_map lacks dense_key[j].  If an empty Tensor is
+  provided for dense_defaults[j], then the Feature dense_keys[j] is required.
+  The input type is inferred from dense_defaults[j], even when it's empty.
+  If dense_defaults[j] is not empty, and dense_shapes[j] is fully defined,
+  then the shape of dense_defaults[j] must match that of dense_shapes[j].
+  If dense_shapes[j] has an undefined major dimension (variable strides dense
+  feature), dense_defaults[j] must contain a single element:
+  the padding element.
+Tdense: The data types of data in each Feature given in dense_keys.
+  The length of this list must match the length of `dense_keys`.
+  Currently the ParseSingleExample op supports DT_FLOAT (FloatList),
+  DT_INT64 (Int64List), and DT_STRING (BytesList).
+dense_shapes: The shapes of data in each Feature given in dense_keys.
+  The length of this list must match the length of `dense_keys`.  The
+  number of elements in the Feature corresponding to dense_key[j] must
+  always equal dense_shapes[j].NumEntries().  If dense_shapes[j] ==
+  (D0, D1, ..., DN) then the shape of output Tensor dense_values[j]
+  will be (D0, D1, ..., DN): In the case dense_shapes[j] = (-1, D1,
+  ..., DN), the shape of the output Tensor dense_values[j] will be (M,
+  D1, .., DN), where M is the number of blocks of elements of length
+  D1 * .... * DN, in the input.
+num_sparse: The number of sparse features to be parsed from the example. This
+  must match the lengths of `sparse_keys` and `sparse_types`.
+sparse_keys: A list of `num_sparse` strings.
+  The keys expected in the Examples' features associated with sparse values.
+sparse_types: A list of `num_sparse` types; the data types of data in each
+  Feature given in sparse_keys.
+  Currently the ParseSingleExample op supports DT_FLOAT (FloatList),
+  DT_INT64 (Int64List), and DT_STRING (BytesList).
+)doc");
 
 REGISTER_OP("ParseSingleSequenceExample")
     .Input("serialized: string")
@@ -218,24 +326,106 @@ REGISTER_OP("ParseSingleSequenceExample")
         c->set_output(output_idx++, s);
       }
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Transforms a scalar brain.SequenceExample proto (as strings) into typed tensors.
+
+serialized: A scalar containing a binary serialized SequenceExample proto.
+feature_list_dense_missing_assumed_empty: A vector listing the
+  FeatureList keys which may be missing from the SequenceExample.  If the
+  associated FeatureList is missing, it is treated as empty.  By default,
+  any FeatureList not listed in this vector must exist in the SequenceExample.
+context_dense_keys: A list of Ncontext_dense string Tensors (scalars).
+  The keys expected in the SequenceExamples' context features associated with
+  dense values.
+feature_list_dense_keys: A list of Nfeature_list_dense string Tensors (scalars).
+  The keys expected in the SequenceExamples' feature_lists associated
+  with lists of dense values.
+context_dense_defaults: A list of Ncontext_dense Tensors (some may be empty).
+  context_dense_defaults[j] provides default values
+  when the SequenceExample's context map lacks context_dense_key[j].
+  If an empty Tensor is provided for context_dense_defaults[j],
+  then the Feature context_dense_keys[j] is required.
+  The input type is inferred from context_dense_defaults[j], even when it's
+  empty.  If context_dense_defaults[j] is not empty, its shape must match
+  context_dense_shapes[j].
+debug_name: A scalar containing the name of the serialized proto.
+  May contain, for example, table key (descriptive) name for the
+  corresponding serialized proto.  This is purely useful for debugging
+  purposes, and the presence of values here has no effect on the output.
+  May also be an empty scalar if no name is available.
+context_dense_shapes: A list of Ncontext_dense shapes; the shapes of data in
+  each context Feature given in context_dense_keys.
+  The number of elements in the Feature corresponding to context_dense_key[j]
+  must always equal context_dense_shapes[j].NumEntries().
+  The shape of context_dense_values[j] will match context_dense_shapes[j].
+feature_list_dense_shapes: A list of Nfeature_list_dense shapes; the shapes of
+  data in each FeatureList given in feature_list_dense_keys.
+  The shape of each Feature in the FeatureList corresponding to
+  feature_list_dense_key[j] must always equal
+  feature_list_dense_shapes[j].NumEntries().
+context_sparse_keys: A list of Ncontext_sparse string Tensors (scalars).
+  The keys expected in the Examples' features associated with context_sparse
+  values.
+context_sparse_types: A list of Ncontext_sparse types; the data types of data in
+  each context Feature given in context_sparse_keys.
+  Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList),
+  DT_INT64 (Int64List), and DT_STRING (BytesList).
+feature_list_sparse_keys: A list of Nfeature_list_sparse string Tensors
+  (scalars).  The keys expected in the FeatureLists associated with sparse
+  values.
+feature_list_sparse_types: A list of Nfeature_list_sparse types; the data types
+  of data in each FeatureList given in feature_list_sparse_keys.
+  Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList),
+  DT_INT64 (Int64List), and DT_STRING (BytesList).
+)doc");
 
 REGISTER_OP("ParseTensor")
     .Input("serialized: string")
     .Output("output: out_type")
     .Attr("out_type: type")
-    .SetShapeFn(shape_inference::UnknownShape);
+    .SetShapeFn(shape_inference::UnknownShape)
+    .Doc(R"doc(
+Transforms a serialized tensorflow.TensorProto proto into a Tensor.
+
+serialized: A scalar string containing a serialized TensorProto proto.
+out_type: The type of the serialized tensor.  The provided type must match the
+  type of the serialized tensor and no implicit conversion will take place.
+output: A Tensor of type `out_type`.
+)doc");
 
 REGISTER_OP("SerializeTensor")
     .Input("tensor: T")
     .Output("serialized: string")
     .Attr("T: type")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Transforms a Tensor into a serialized TensorProto proto.
+
+tensor: A Tensor of type `T`.
+T: The type of the input tensor.
+serialized: A serialized TensorProto proto of the input tensor.
+)doc");
 
 REGISTER_OP("DecodeJSONExample")
     .Input("json_examples: string")
     .Output("binary_examples: string")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Convert JSON-encoded Example records to binary protocol buffer strings.
+
+This op translates a tensor containing Example records, encoded using
+the [standard JSON
+mapping](https://developers.google.com/protocol-buffers/docs/proto3#json),
+into a tensor containing the same records encoded as binary protocol
+buffers. The resulting tensor can then be fed to any of the other
+Example-parsing ops.
+
+json_examples: Each string is a JSON object serialized according to the JSON
+  mapping of the Example proto.
+binary_examples: Each string is a binary Example protocol buffer corresponding
+  to the respective element of `json_examples`.
+)doc");
 
 REGISTER_OP("DecodeCSV")
     .Input("records: string")
@@ -259,12 +449,39 @@ REGISTER_OP("DecodeCSV")
       // Propagate shape of the records input.
       for (int i = 0; i < c->num_outputs(); ++i) c->set_output(i, c->input(0));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Convert CSV records to tensors. Each column maps to one tensor.
+
+RFC 4180 format is expected for the CSV records.
+(https://tools.ietf.org/html/rfc4180)
+Note that we allow leading and trailing spaces with int or float field.
+
+records: Each string is a record/row in the csv and all records should have
+  the same format.
+record_defaults: One tensor per column of the input record, with either a
+  scalar default value for that column or empty if the column is required.
+field_delim: char delimiter to separate fields in a record.
+use_quote_delim: If false, treats double quotation marks as regular
+  characters inside of the string fields (ignoring RFC 4180, Section 2,
+  Bullet 5).
+na_value: Additional string to recognize as NA/NaN.
+output: Each tensor will have the same shape as records.
+)doc");
 
 REGISTER_OP("StringToNumber")
     .Input("string_tensor: string")
     .Output("output: out_type")
     .Attr("out_type: {float, double, int32, int64} = DT_FLOAT")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Converts each string in the input Tensor to the specified numeric type.
+
+(Note that int32 overflow results in an error while float overflow
+results in a rounded value.)
+
+out_type: The numeric type to interpret each string in `string_tensor` as.
+output: A Tensor of the same shape as the input `string_tensor`.
+)doc");
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/random_ops.cc b/tensorflow/core/ops/random_ops.cc
index f6c668f5c9..31d9c82e53 100644
--- a/tensorflow/core/ops/random_ops.cc
+++ b/tensorflow/core/ops/random_ops.cc
@@ -31,7 +31,22 @@ REGISTER_OP("RandomUniform")
     .Attr("seed2: int = 0")
     .Attr("dtype: {half,bfloat16,float,double}")
     .Attr("T: {int32, int64}")
-    .SetShapeFn(shape_inference::RandomShape);
+    .SetShapeFn(shape_inference::RandomShape)
+    .Doc(R"doc(
+Outputs random values from a uniform distribution.
+
+The generated values follow a uniform distribution in the range `[0, 1)`. The
+lower bound 0 is included in the range, while the upper bound 1 is excluded.
+
+shape: The shape of the output tensor.
+dtype: The type of the output.
+seed: If either `seed` or `seed2` are set to be non-zero, the random number
+  generator is seeded by the given seed.  Otherwise, it is seeded by a
+  random seed.
+seed2: A second seed to avoid seed collision.
+
+output: A tensor of the specified shape filled with uniform random values.
+)doc");
 
 REGISTER_OP("RandomUniformInt")
     .Input("shape: T")
@@ -43,7 +58,28 @@ REGISTER_OP("RandomUniformInt")
     .Attr("seed2: int = 0")
     .Attr("Tout: {int32, int64}")
     .Attr("T: {int32, int64}")
-    .SetShapeFn(shape_inference::RandomShape);
+    .SetShapeFn(shape_inference::RandomShape)
+    .Doc(R"doc(
+Outputs random integers from a uniform distribution.
+
+The generated values are uniform integers in the range `[minval, maxval)`.
+The lower bound `minval` is included in the range, while the upper bound
+`maxval` is excluded.
+
+The random integers are slightly biased unless `maxval - minval` is an exact
+power of two.  The bias is small for values of `maxval - minval` significantly
+smaller than the range of the output (either `2^32` or `2^64`).
+
+shape: The shape of the output tensor.
+minval: 0-D.  Inclusive lower bound on the generated integers.
+maxval: 0-D.  Exclusive upper bound on the generated integers.
+seed: If either `seed` or `seed2` are set to be non-zero, the random number
+  generator is seeded by the given seed.  Otherwise, it is seeded by a
+  random seed.
+seed2: A second seed to avoid seed collision.
+
+output: A tensor of the specified shape filled with uniform random integers.
+)doc");
 
 REGISTER_OP("RandomStandardNormal")
     .Input("shape: T")
@@ -53,7 +89,21 @@ REGISTER_OP("RandomStandardNormal")
     .Attr("seed2: int = 0")
     .Attr("dtype: {half,bfloat16,float,double}")
     .Attr("T: {int32, int64}")
-    .SetShapeFn(shape_inference::RandomShape);
+    .SetShapeFn(shape_inference::RandomShape)
+    .Doc(R"doc(
+Outputs random values from a normal distribution.
+
+The generated values will have mean 0 and standard deviation 1.
+
+shape: The shape of the output tensor.
+dtype: The type of the output.
+seed: If either `seed` or `seed2` are set to be non-zero, the random number
+  generator is seeded by the given seed.  Otherwise, it is seeded by a
+  random seed.
+seed2: A second seed to avoid seed collision.
+
+output: A tensor of the specified shape filled with random normal values.
+)doc");
 
 REGISTER_OP("ParameterizedTruncatedNormal")
     .Input("shape: T")
@@ -67,7 +117,27 @@ REGISTER_OP("ParameterizedTruncatedNormal")
     .Attr("seed2: int = 0")
     .Attr("dtype: {half,bfloat16,float,double}")
     .Attr("T: {int32, int64}")
-    .SetShapeFn(shape_inference::RandomShape);
+    .SetShapeFn(shape_inference::RandomShape)
+    .Doc(R"doc(
+Outputs random values from a normal distribution. The parameters may each be a
+scalar which applies to the entire output, or a vector of length shape[0] which
+stores the parameters for each batch.
+
+shape: The shape of the output tensor. Batches are indexed by the 0th dimension.
+means: The mean parameter of each batch.
+stdevs: The standard deviation parameter of each batch. Must be greater than 0.
+minvals: The minimum cutoff. May be -infinity.
+maxvals: The maximum cutoff. May be +infinity, and must be more than the minval
+  for each batch.
+dtype: The type of the output.
+seed: If either `seed` or `seed2` are set to be non-zero, the random number
+  generator is seeded by the given seed.  Otherwise, it is seeded by a
+  random seed.
+seed2: A second seed to avoid seed collision.
+
+output: A matrix of shape num_batches x samples_per_batch, filled with random
+  truncated normal values using the parameters for each row.
+)doc");
 
 REGISTER_OP("TruncatedNormal")
     .Input("shape: T")
@@ -77,7 +147,24 @@ REGISTER_OP("TruncatedNormal")
     .Attr("seed2: int = 0")
     .Attr("dtype: {half,bfloat16,float,double}")
     .Attr("T: {int32, int64}")
-    .SetShapeFn(shape_inference::RandomShape);
+    .SetShapeFn(shape_inference::RandomShape)
+    .Doc(R"doc(
+Outputs random values from a truncated normal distribution.
+
+The generated values follow a normal distribution with mean 0 and standard
+deviation 1, except that values whose magnitude is more than 2 standard
+deviations from the mean are dropped and re-picked.
+
+shape: The shape of the output tensor.
+dtype: The type of the output.
+seed: If either `seed` or `seed2` are set to be non-zero, the random number
+  generator is seeded by the given seed.  Otherwise, it is seeded by a
+  random seed.
+seed2: A second seed to avoid seed collision.
+
+output: A tensor of the specified shape filled with random truncated normal
+  values.
+)doc");
 
 REGISTER_OP("RandomShuffle")
     .Input("value: T")
@@ -86,7 +173,29 @@ REGISTER_OP("RandomShuffle")
     .Attr("seed: int = 0")
     .Attr("seed2: int = 0")
     .Attr("T: type")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Randomly shuffles a tensor along its first dimension.
+
+  The tensor is shuffled along dimension 0, such that each `value[j]` is mapped
+  to one and only one `output[i]`. For example, a mapping that might occur for a
+  3x2 tensor is:
+
+```
+[[1, 2],       [[5, 6],
+ [3, 4],  ==>   [1, 2],
+ [5, 6]]        [3, 4]]
+```
+
+value: The tensor to be shuffled.
+seed: If either `seed` or `seed2` are set to be non-zero, the random number
+  generator is seeded by the given seed.  Otherwise, it is seeded by a
+  random seed.
+seed2: A second seed to avoid seed collision.
+
+output: A tensor of same shape and type as `value`, shuffled along its first
+  dimension.
+)doc");
 
 REGISTER_OP("Multinomial")
     .SetIsStateful()
@@ -106,7 +215,19 @@ REGISTER_OP("Multinomial")
       TF_RETURN_IF_ERROR(c->MakeDimForScalarInput(1, &num_samples));
       c->set_output(0, c->Matrix(c->Dim(logits_shape, 0), num_samples));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Draws samples from a multinomial distribution.
+
+logits: 2-D Tensor with shape `[batch_size, num_classes]`.  Each slice `[i, :]`
+  represents the unnormalized log probabilities for all classes.
+num_samples: 0-D.  Number of independent samples to draw for each row slice.
+seed: If either seed or seed2 is set to be non-zero, the internal random number
+  generator is seeded by the given seed.  Otherwise, a random seed is used.
+seed2: A second seed to avoid seed collision.
+output: 2-D Tensor with shape `[batch_size, num_samples]`.  Each slice `[i, :]`
+  contains the drawn class labels with range `[0, num_classes)`.
+)doc");
 
 REGISTER_OP("RandomGamma")
     .SetIsStateful()
@@ -123,7 +244,27 @@ REGISTER_OP("RandomGamma")
       TF_RETURN_IF_ERROR(c->Concatenate(out, c->input(1), &out));
       c->set_output(0, out);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Outputs random values from the Gamma distribution(s) described by alpha.
+
+This op uses the algorithm by Marsaglia et al. to acquire samples via
+transformation-rejection from pairs of uniform and normal random variables.
+See http://dl.acm.org/citation.cfm?id=358414
+
+shape: 1-D integer tensor. Shape of independent samples to draw from each
+  distribution described by the shape parameters given in alpha.
+alpha: A tensor in which each scalar is a "shape" parameter describing the
+  associated gamma distribution.
+seed: If either `seed` or `seed2` are set to be non-zero, the random number
+  generator is seeded by the given seed.  Otherwise, it is seeded by a
+  random seed.
+seed2: A second seed to avoid seed collision.
+
+output: A tensor with shape `shape + shape(alpha)`. Each slice
+  `[:, ..., :, i0, i1, ...iN]` contains the samples drawn for
+  `alpha[i0, i1, ...iN]`. The dtype of the output matches the dtype of alpha.
+)doc");
 
 REGISTER_OP("RandomPoisson")
     .SetIsStateful()
@@ -141,7 +282,10 @@ REGISTER_OP("RandomPoisson")
       c->set_output(0, out);
       return Status::OK();
     })
-    .Deprecated(25, "Replaced by RandomPoissonV2");
+    .Deprecated(25, "Replaced by RandomPoissonV2")
+    .Doc(R"doc(
+Use RandomPoissonV2 instead.
+)doc");
 
 REGISTER_OP("RandomPoissonV2")
     .SetIsStateful()
@@ -159,6 +303,32 @@ REGISTER_OP("RandomPoissonV2")
       TF_RETURN_IF_ERROR(c->Concatenate(out, c->input(1), &out));
       c->set_output(0, out);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Outputs random values from the Poisson distribution(s) described by rate.
+
+This op uses two algorithms, depending on rate. If rate >= 10, then
+the algorithm by Hormann is used to acquire samples via
+transformation-rejection.
+See http://www.sciencedirect.com/science/article/pii/0167668793909974.
+
+Otherwise, Knuth's algorithm is used to acquire samples via multiplying uniform
+random variables.
+See Donald E. Knuth (1969). Seminumerical Algorithms. The Art of Computer
+Programming, Volume 2. Addison Wesley
+
+shape: 1-D integer tensor. Shape of independent samples to draw from each
+  distribution described by the shape parameters given in rate.
+rate: A tensor in which each scalar is a "rate" parameter describing the
+  associated poisson distribution.
+seed: If either `seed` or `seed2` are set to be non-zero, the random number
+  generator is seeded by the given seed.  Otherwise, it is seeded by a
+  random seed.
+seed2: A second seed to avoid seed collision.
+
+output: A tensor with shape `shape + shape(rate)`. Each slice
+  `[:, ..., :, i0, i1, ...iN]` contains the samples drawn for
+  `rate[i0, i1, ...iN]`.
+)doc");
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/remote_fused_graph_ops.cc b/tensorflow/core/ops/remote_fused_graph_ops.cc
index d904666733..85370e648c 100644
--- a/tensorflow/core/ops/remote_fused_graph_ops.cc
+++ b/tensorflow/core/ops/remote_fused_graph_ops.cc
@@ -36,6 +36,23 @@ REGISTER_OP("RemoteFusedGraphExecute")
     .Attr("Tinputs: list(type) >= 0")
     .Attr("Toutputs: list(type) >= 0")
     .Attr("serialized_remote_fused_graph_execute_info: string")
-    .SetShapeFn(RemoteFusedGraphExecuteShapeFn);
+    .SetShapeFn(RemoteFusedGraphExecuteShapeFn)
+    .Doc(R"doc(
+Execute a sub graph on a remote processor.
+
+The graph specifications(such as graph itself, input tensors and output names)
+are stored as a serialized protocol buffer of RemoteFusedGraphExecuteInfo
+as serialized_remote_fused_graph_execute_info.
+The specifications will be passed to a dedicated registered
+remote fused graph executor.  The executor will send the graph specifications
+to a remote processor and execute that graph.  The execution results
+will be passed to consumer nodes as outputs of this node.
+
+inputs: Arbitrary number of tensors with arbitrary data types
+outputs: Arbitrary number of tensors with arbitrary data types
+serialized_remote_fused_graph_execute_info: Serialized protocol buffer
+of RemoteFusedGraphExecuteInfo which contains graph specifications.
+
+)doc");
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/resource_variable_ops.cc b/tensorflow/core/ops/resource_variable_ops.cc
index 839f48eacd..bf9e673e8e 100644
--- a/tensorflow/core/ops/resource_variable_ops.cc
+++ b/tensorflow/core/ops/resource_variable_ops.cc
@@ -76,19 +76,51 @@ REGISTER_OP("VarHandleOp")
                                             std::vector<ShapeAndType>{{s, t}});
 
       return Status::OK();
-    });
+    })
+    .Doc(R"(
+Creates a handle to a Variable resource.
+
+container: the container this variable is placed in.
+shared_name: the name by which this variable is referred to.
+dtype: the type of this variable. Must agree with the dtypes
+  of all ops using this variable.
+shape: The (possibly partially specified) shape of this variable.
+)");
 
 REGISTER_OP("ReadVariableOp")
     .Input("resource: resource")
     .Output("value: dtype")
     .Attr("dtype: type")
-    .SetShapeFn(ReadVariableShapeFn);
+    .SetShapeFn(ReadVariableShapeFn)
+    .Doc(R"(
+Reads the value of a variable.
+
+The tensor returned by this operation is immutable.
+
+The value returned by this operation is guaranteed to be influenced by all the
+writes on which this operation depends directly or indirectly, and to not be
+influenced by any of the writes which depend directly or indirectly on this
+operation.
+
+resource: handle to the resource in which to store the variable.
+dtype: the dtype of the value.
+)");
 
 REGISTER_OP("DestroyResourceOp")
     .Input("resource: resource")
     .Attr("ignore_lookup_error: bool = true")
     .SetIsStateful()
-    .SetShapeFn(shape_inference::NoOutputs);
+    .SetShapeFn(shape_inference::NoOutputs)
+    .Doc(R"(
+Deletes the resource specified by the handle.
+
+All subsequent operations using the resource will result in a NotFound
+error status.
+
+resource: handle to the resource to delete.
+ignore_lookup_error: whether to ignore the error when the resource
+  doesn't exist.
+)");
 
 Status CreateAssignShapeFn(InferenceContext* c) {
   ShapeAndType handle_shape_and_type;
@@ -105,24 +137,67 @@ REGISTER_OP("AssignVariableOp")
     .Input("resource: resource")
     .Input("value: dtype")
     .Attr("dtype: type")
-    .SetShapeFn(CreateAssignShapeFn);
+    .SetShapeFn(CreateAssignShapeFn)
+    .Doc(R"(
+Assigns a new value to a variable.
+
+Any ReadVariableOp with a control dependency on this op is guaranteed to return
+this value or a subsequent newer value of the variable.
+
+resource: handle to the resource in which to store the variable.
+value: the value to set the new tensor to use.
+dtype: the dtype of the value.
+)");
 
 REGISTER_OP("AssignAddVariableOp")
     .Input("resource: resource")
     .Input("value: dtype")
     .Attr("dtype: type")
-    .SetShapeFn(CreateAssignShapeFn);
+    .SetShapeFn(CreateAssignShapeFn)
+    .Doc(R"(
+Adds a value to the current value of a variable.
+
+Any ReadVariableOp which depends directly or indirectly on this assign is
+guaranteed to see the incremented value or a subsequent newer one.
+
+Outputs the incremented value, which can be used to totally order the
+increments to this variable.
+
+resource: handle to the resource in which to store the variable.
+value: the value by which the variable will be incremented.
+dtype: the dtype of the value.
+)");
 
 REGISTER_OP("AssignSubVariableOp")
     .Input("resource: resource")
     .Input("value: dtype")
     .Attr("dtype: type")
-    .SetShapeFn(CreateAssignShapeFn);
+    .SetShapeFn(CreateAssignShapeFn)
+    .Doc(R"(
+Subtracts a value from the current value of a variable.
+
+Any ReadVariableOp which depends directly or indirectly on this assign is
+guaranteed to see the incremented value or a subsequent newer one.
+
+Outputs the incremented value, which can be used to totally order the
+increments to this variable.
+
+resource: handle to the resource in which to store the variable.
+value: the value by which the variable will be incremented.
+dtype: the dtype of the value.
+)");
 
 REGISTER_OP("VarIsInitializedOp")
     .Input("resource: resource")
     .Output("is_initialized: bool")
-    .SetShapeFn(tensorflow::shape_inference::ScalarShape);
+    .SetShapeFn(tensorflow::shape_inference::ScalarShape)
+    .Doc(R"doc(
+Checks whether a resource handle-based variable has been initialized.
+
+resource: the input resource handle.
+is_initialized: a scalar boolean which is true if the variable has been
+initialized.
+)doc");
 
 Status VariableShapeShapeFn(InferenceContext* c) {
   auto* handle_data = c->input_handle_shapes_and_types(0);
@@ -140,7 +215,20 @@ REGISTER_OP("VariableShape")
     .Input("input: resource")
     .Output("output: out_type")
     .Attr("out_type: {int32, int64} = DT_INT32")
-    .SetShapeFn(VariableShapeShapeFn);
+    .SetShapeFn(VariableShapeShapeFn)
+    .Doc(R"doc(
+Returns the shape of the variable pointed to by `resource`.
+
+This operation returns a 1-D integer tensor representing the shape of `input`.
+
+For example:
+
+```
+# 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]
+shape(t) ==> [2, 2, 3]
+```
+
+)doc");
 
 REGISTER_OP("ResourceGather")
     .Input("resource: resource")
@@ -165,7 +253,25 @@ REGISTER_OP("ResourceGather")
       TF_RETURN_IF_ERROR(c->Concatenate(indices_shape, params_subshape, &out));
       c->set_output(0, out);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Gather slices from the variable pointed to by `resource` according to `indices`.
+
+`indices` must be an integer tensor of any dimension (usually 0-D or 1-D).
+Produces an output tensor with shape `indices.shape + params.shape[1:]` where:
+
+```python
+    # Scalar indices
+    output[:, ..., :] = params[indices, :, ... :]
+
+    # Vector indices
+    output[i, :, ..., :] = params[indices[i], :, ... :]
+
+    # Higher rank indices
+    output[i, ..., j, :, ... :] = params[indices[i, ..., j], :, ..., :]
+```
+
+)doc");
 
 REGISTER_OP("ResourceScatterAdd")
     .Input("resource: resource")
@@ -187,7 +293,34 @@ REGISTER_OP("ResourceScatterAdd")
       TF_RETURN_IF_ERROR(c->Concatenate(indices_shape, var_subshape, &concat));
       TF_RETURN_IF_ERROR(c->Merge(c->input(2), concat, &unused_updates_shape));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Adds sparse updates to the variable referenced by `resource`.
+
+This operation computes
+
+    # Scalar indices
+    ref[indices, ...] += updates[...]
+
+    # Vector indices (for each i)
+    ref[indices[i], ...] += updates[i, ...]
+
+    # High rank indices (for each i, ..., j)
+    ref[indices[i, ..., j], ...] += updates[i, ..., j, ...]
+
+Duplicate entries are handled correctly: if multiple `indices` reference
+the same location, their contributions add.
+
+Requires `updates.shape = indices.shape + ref.shape[1:]`.
+
+<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src='https://www.tensorflow.org/images/ScatterAdd.png' alt>
+</div>
+
+resource: Should be from a `Variable` node.
+indices: A tensor of indices into the first dimension of `ref`.
+updates: A tensor of updated values to add to `ref`.
+)doc");
 
 REGISTER_OP("ResourceScatterUpdate")
     .Input("resource: resource")
@@ -209,6 +342,24 @@ REGISTER_OP("ResourceScatterUpdate")
       TF_RETURN_IF_ERROR(c->Concatenate(indices_shape, var_subshape, &concat));
       TF_RETURN_IF_ERROR(c->Merge(c->input(2), concat, &unused_updates_shape));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Assigns sparse updates to the variable referenced by `resource`.
+
+This operation computes
+
+    # Scalar indices
+    ref[indices, ...] = updates[...]
+
+    # Vector indices (for each i)
+    ref[indices[i], ...] = updates[i, ...]
+
+    # High rank indices (for each i, ..., j)
+    ref[indices[i, ..., j], ...] = updates[i, ..., j, ...]
+
+resource: Should be from a `Variable` node.
+indices: A tensor of indices into the first dimension of `ref`.
+updates: A tensor of updated values to add to `ref`.
+)doc");
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/script_ops.cc b/tensorflow/core/ops/script_ops.cc
index d8716f0389..c7c594a999 100644
--- a/tensorflow/core/ops/script_ops.cc
+++ b/tensorflow/core/ops/script_ops.cc
@@ -25,7 +25,20 @@ REGISTER_OP("PyFunc")
     .Attr("Tin: list(type) >= 0")
     .Attr("Tout: list(type) >=0")
     .SetIsStateful()
-    .SetShapeFn(shape_inference::UnknownShape);
+    .SetShapeFn(shape_inference::UnknownShape)
+    .Doc(R"doc(
+Invokes a python function to compute func(input)->output.
+
+This operation is considered stateful. For a stateless version, see
+PyFuncStateless.
+
+token: A token representing a registered python function in this address space.
+input: List of Tensors that will provide input to the Op.
+output: The outputs from the Op.
+Tin: Data types of the inputs to the op.
+Tout: Data types of the outputs from the op.
+      The length of the list specifies the number of outputs.
+)doc");
 
 REGISTER_OP("PyFuncStateless")
     .Input("input: Tin")
@@ -33,7 +46,10 @@ REGISTER_OP("PyFuncStateless")
     .Attr("token: string")
     .Attr("Tin: list(type) >= 0")
     .Attr("Tout: list(type) >= 0")
-    .SetShapeFn(shape_inference::UnknownShape);
+    .SetShapeFn(shape_inference::UnknownShape)
+    .Doc(R"doc(
+A stateless version of PyFunc.
+)doc");
 
 REGISTER_OP("EagerPyFunc")
     .Input("input: Tin")
@@ -42,6 +58,11 @@ REGISTER_OP("EagerPyFunc")
     .Attr("Tin: list(type) >= 0")
     .Attr("Tout: list(type) >=0")
     .SetIsStateful()
-    .SetShapeFn(shape_inference::UnknownShape);
+    .SetShapeFn(shape_inference::UnknownShape)
+    .Doc(R"doc(
+Eagerly executes a python function to compute func(input)->output. The
+semantics of the input, output, and attributes are the same as those for
+PyFunc.
+)doc");
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/sdca_ops.cc b/tensorflow/core/ops/sdca_ops.cc
index e67d95fa8c..dea75a1af8 100644
--- a/tensorflow/core/ops/sdca_ops.cc
+++ b/tensorflow/core/ops/sdca_ops.cc
@@ -63,14 +63,78 @@ REGISTER_OP("SdcaOptimizer")
     .Output("out_example_state_data: float")
     .Output("out_delta_sparse_weights: num_sparse_features * float")
     .Output("out_delta_dense_weights: num_dense_features * float")
-    .SetShapeFn(ApplySdcaOptimizerShapeFn);
+    .SetShapeFn(ApplySdcaOptimizerShapeFn)
+    .Doc(R"doc(
+Distributed version of Stochastic Dual Coordinate Ascent (SDCA) optimizer for
+linear models with L1 + L2 regularization. As global optimization objective is
+strongly-convex, the optimizer optimizes the dual objective at each step. The
+optimizer applies each update one example at a time. Examples are sampled
+uniformly, and the optimizer is learning rate free and enjoys linear convergence
+rate.
+
+[Proximal Stochastic Dual Coordinate Ascent](http://arxiv.org/pdf/1211.2717v1.pdf).<br>
+Shai Shalev-Shwartz, Tong Zhang. 2012
+
+$$Loss Objective = \sum f_{i} (wx_{i}) + (l2 / 2) * |w|^2 + l1 * |w|$$
+
+[Adding vs. Averaging in Distributed Primal-Dual Optimization](http://arxiv.org/abs/1502.03508).<br>
+Chenxin Ma, Virginia Smith, Martin Jaggi, Michael I. Jordan,
+Peter Richtarik, Martin Takac. 2015
+
+[Stochastic Dual Coordinate Ascent with Adaptive Probabilities](https://arxiv.org/abs/1502.08053).<br>
+Dominik Csiba, Zheng Qu, Peter Richtarik. 2015
+
+loss_type: Type of the primal loss. Currently SdcaSolver supports logistic,
+  squared and hinge losses.
+adaptative: Whether to use Adapative SDCA for the inner loop.
+num_sparse_features: Number of sparse feature groups to train on.
+num_sparse_features_with_values: Number of sparse feature groups with values
+  associated with it, otherwise implicitly treats values as 1.0.
+num_dense_features: Number of dense feature groups to train on.
+l1: Symmetric l1 regularization strength.
+l2: Symmetric l2 regularization strength.
+num_loss_partitions: Number of partitions of the global loss function.
+num_inner_iterations: Number of iterations per mini-batch.
+sparse_example_indices: a list of vectors which contain example indices.
+sparse_feature_indices: a list of vectors which contain feature indices.
+sparse_feature_values: a list of vectors which contains feature value
+  associated with each feature group.
+dense_features: a list of matrices which contains the dense feature values.
+example_weights: a vector which contains the weight associated with each
+  example.
+example_labels: a vector which contains the label/target associated with each
+  example.
+sparse_indices: a list of vectors where each value is the indices which has
+  corresponding weights in sparse_weights. This field maybe omitted for the
+  dense approach.
+sparse_weights: a list of vectors where each value is the weight associated with
+  a sparse feature group.
+dense_weights: a list of vectors where the values are the weights associated
+ with a dense feature group.
+example_state_data: a list of vectors containing the example state data.
+out_example_state_data: a list of vectors containing the updated example state
+  data.
+out_delta_sparse_weights: a list of vectors where each value is the delta
+  weights associated with a sparse feature group.
+out_delta_dense_weights: a list of vectors where the values are the delta
+  weights associated with a dense feature group.
+)doc");
 
 REGISTER_OP("SdcaShrinkL1")
     .Attr("num_features: int >= 0")
     .Attr("l1: float")
     .Attr("l2: float")
     .Input("weights: Ref(num_features * float)")
-    .SetShapeFn(shape_inference::UnknownShape);
+    .SetShapeFn(shape_inference::UnknownShape)
+    .Doc(R"doc(
+Applies L1 regularization shrink step on the parameters.
+
+num_features: Number of feature groups to apply shrinking step.
+l1: Symmetric l1 regularization strength.
+l2: Symmetric l2 regularization strength. Should be a positive float.
+weights: a list of vectors where each value is the weight associated with a
+  feature group.
+)doc");
 
 REGISTER_OP("SdcaFprint")
     .Input("input: string")
@@ -82,6 +146,13 @@ REGISTER_OP("SdcaFprint")
       TF_RETURN_IF_ERROR(c->Concatenate(handle, c->Vector(2), &output_shape));
       c->set_output(0, output_shape);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Computes fingerprints of the input strings.
+
+input: vector of strings to compute fingerprints on.
+output: a (N,2) shaped matrix where N is the number of elements in the input
+  vector. Each row contains the low and high parts of the fingerprint.
+)doc");
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/set_ops.cc b/tensorflow/core/ops/set_ops.cc
index 5eb1c4d87d..85d1335dcf 100644
--- a/tensorflow/core/ops/set_ops.cc
+++ b/tensorflow/core/ops/set_ops.cc
@@ -30,7 +30,24 @@ REGISTER_OP("SetSize")
     .Attr("validate_indices: bool = true")
     .Attr("T: {int8, int16, int32, int64, uint8, uint16, string}")
     .Output("size: int32")
-    .SetShapeFn(shape_inference::UnknownShape);
+    .SetShapeFn(shape_inference::UnknownShape)
+    .Doc(R"doc(
+Number of unique elements along last dimension of input `set`.
+
+Input `set` is a `SparseTensor` represented by `set_indices`, `set_values`,
+and `set_shape`. The last dimension contains values in a set, duplicates are
+allowed but ignored.
+
+If `validate_indices` is `True`, this op validates the order and range of `set`
+indices.
+
+set_indices: 2D `Tensor`, indices of a `SparseTensor`.
+set_values: 1D `Tensor`, values of a `SparseTensor`.
+set_shape: 1D `Tensor`, shape of a `SparseTensor`.
+size: For `set` ranked `n`, this is a `Tensor` with rank `n-1`, and the same 1st
+    `n-1` dimensions as `set`. Each value is the number of unique elements in
+    the corresponding `[0...n-1]` dimension of `set`.
+)doc");
 
 REGISTER_OP("DenseToDenseSetOperation")
     .Input("set1: T")
@@ -86,7 +103,28 @@ REGISTER_OP("DenseToDenseSetOperation")
       c->set_output(1, c->Vector(c->UnknownDim()));
       c->set_output(2, c->Vector(output_rank));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Applies set operation along last dimension of 2 `Tensor` inputs.
+
+See SetOperationOp::SetOperationFromContext for values of `set_operation`.
+
+Output `result` is a `SparseTensor` represented by `result_indices`,
+`result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this
+has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth`
+dimension contains the result of `set_operation` applied to the corresponding
+`[0...n-1]` dimension of `set`.
+
+set1: `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set2`.
+    Dimension `n` contains values in a set, duplicates are allowed but ignored.
+set2: `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set1`.
+    Dimension `n` contains values in a set, duplicates are allowed but ignored.
+result_indices: 2D indices of a `SparseTensor`.
+result_values: 1D values of a `SparseTensor`.
+result_shape: 1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is
+    the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]`
+    is the max result set size across all `0...n-1` dimensions.
+)doc");
 
 REGISTER_OP("DenseToSparseSetOperation")
     .Input("set1: T")
@@ -130,7 +168,41 @@ REGISTER_OP("DenseToSparseSetOperation")
       c->set_output(1, c->Vector(c->UnknownDim()));
       c->set_output(2, c->Vector(output_rank_dim));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Applies set operation along last dimension of `Tensor` and `SparseTensor`.
+
+See SetOperationOp::SetOperationFromContext for values of `set_operation`.
+
+Input `set2` is a `SparseTensor` represented by `set2_indices`, `set2_values`,
+and `set2_shape`. For `set2` ranked `n`, 1st `n-1` dimensions must be the same
+as `set1`. Dimension `n` contains values in a set, duplicates are allowed but
+ignored.
+
+If `validate_indices` is `True`, this op validates the order and range of `set2`
+indices.
+
+Output `result` is a `SparseTensor` represented by `result_indices`,
+`result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this
+has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth`
+dimension contains the result of `set_operation` applied to the corresponding
+`[0...n-1]` dimension of `set`.
+
+set1: `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set2`.
+    Dimension `n` contains values in a set, duplicates are allowed but ignored.
+set2_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major
+    order.
+set2_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major
+    order.
+set2_shape: 1D `Tensor`, shape of a `SparseTensor`. `set2_shape[0...n-1]` must
+    be the same as the 1st `n-1` dimensions of `set1`, `result_shape[n]` is the
+    max set size across `n-1` dimensions.
+result_indices: 2D indices of a `SparseTensor`.
+result_values: 1D values of a `SparseTensor`.
+result_shape: 1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is
+    the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]`
+    is the max result set size across all `0...n-1` dimensions.
+)doc");
 
 REGISTER_OP("SparseToSparseSetOperation")
     .Input("set1_indices: int64")
@@ -186,6 +258,53 @@ REGISTER_OP("SparseToSparseSetOperation")
       c->set_output(1, c->Vector(c->UnknownDim()));
       c->set_output(2, c->Vector(output_rank_dim));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Applies set operation along last dimension of 2 `SparseTensor` inputs.
+
+See SetOperationOp::SetOperationFromContext for values of `set_operation`.
+
+If `validate_indices` is `True`, `SparseToSparseSetOperation` validates the
+order and range of `set1` and `set2` indices.
+
+Input `set1` is a `SparseTensor` represented by `set1_indices`, `set1_values`,
+and `set1_shape`. For `set1` ranked `n`, 1st `n-1` dimensions must be the same
+as `set2`. Dimension `n` contains values in a set, duplicates are allowed but
+ignored.
+
+Input `set2` is a `SparseTensor` represented by `set2_indices`, `set2_values`,
+and `set2_shape`. For `set2` ranked `n`, 1st `n-1` dimensions must be the same
+as `set1`. Dimension `n` contains values in a set, duplicates are allowed but
+ignored.
+
+If `validate_indices` is `True`, this op validates the order and range of `set1`
+and `set2` indices.
+
+Output `result` is a `SparseTensor` represented by `result_indices`,
+`result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this
+has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth`
+dimension contains the result of `set_operation` applied to the corresponding
+`[0...n-1]` dimension of `set`.
+
+set1_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major
+    order.
+set1_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major
+    order.
+set1_shape: 1D `Tensor`, shape of a `SparseTensor`. `set1_shape[0...n-1]` must
+    be the same as `set2_shape[0...n-1]`, `set1_shape[n]` is the
+    max set size across `0...n-1` dimensions.
+set2_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major
+    order.
+set2_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major
+    order.
+set2_shape: 1D `Tensor`, shape of a `SparseTensor`. `set2_shape[0...n-1]` must
+    be the same as `set1_shape[0...n-1]`, `set2_shape[n]` is the
+    max set size across `0...n-1` dimensions.
+result_indices: 2D indices of a `SparseTensor`.
+result_values: 1D values of a `SparseTensor`.
+result_shape: 1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is
+    the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]`
+    is the max result set size across all `0...n-1` dimensions.
+)doc");
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/sparse_ops.cc b/tensorflow/core/ops/sparse_ops.cc
index acc8c782ef..99f61a3054 100644
--- a/tensorflow/core/ops/sparse_ops.cc
+++ b/tensorflow/core/ops/sparse_ops.cc
@@ -57,7 +57,26 @@ REGISTER_OP("SparseAddGrad")
       c->set_output(0, c->Vector(c->Dim(a_indices, 0)));
       c->set_output(1, c->Vector(c->Dim(b_indices, 0)));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+The gradient operator for the SparseAdd op.
+
+The SparseAdd op calculates A + B, where A, B, and the sum are all represented
+as `SparseTensor` objects.  This op takes in the upstream gradient w.r.t.
+non-empty values of the sum, and outputs the gradients w.r.t. the non-empty
+values of A and B.
+
+backprop_val_grad: 1-D with shape `[nnz(sum)]`.  The gradient with respect to
+  the non-empty values of the sum.
+a_indices: 2-D.  The `indices` of the `SparseTensor` A, size `[nnz(A), ndims]`.
+b_indices: 2-D.  The `indices` of the `SparseTensor` B, size `[nnz(B), ndims]`.
+sum_indices: 2-D.  The `indices` of the sum `SparseTensor`, size
+  `[nnz(sum), ndims]`.
+a_val_grad: 1-D with shape `[nnz(A)]`. The gradient with respect to the
+  non-empty values of A.
+b_val_grad: 1-D with shape `[nnz(B)]`. The gradient with respect to the
+  non-empty values of B.
+)doc");
 
 REGISTER_OP("SparseAdd")
     .Input("a_indices: int64")
@@ -80,7 +99,33 @@ REGISTER_OP("SparseAdd")
       c->set_output(1, c->Vector(InferenceContext::kUnknownDim));
       c->set_output(2, a_shape);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Adds two `SparseTensor` objects to produce another `SparseTensor`.
+
+The input `SparseTensor` objects' indices are assumed ordered in standard
+lexicographic order.  If this is not the case, before this step run
+`SparseReorder` to restore index ordering.
+
+By default, if two values sum to zero at some index, the output `SparseTensor`
+would still include that particular location in its index, storing a zero in the
+corresponding value slot.  To override this, callers can specify `thresh`,
+indicating that if the sum has a magnitude strictly smaller than `thresh`, its
+corresponding value and index would then not be included.  In particular,
+`thresh == 0` (default) means everything is kept and actual thresholding happens
+only for a positive value.
+
+In the following shapes, `nnz` is the count after taking `thresh` into account.
+
+a_indices: 2-D.  The `indices` of the first `SparseTensor`, size `[nnz, ndims]` Matrix.
+a_values: 1-D.  The `values` of the first `SparseTensor`, size `[nnz]` Vector.
+a_shape: 1-D.  The `shape` of the first `SparseTensor`, size `[ndims]` Vector.
+b_indices: 2-D.  The `indices` of the second `SparseTensor`, size `[nnz, ndims]` Matrix.
+b_values: 1-D.  The `values` of the second `SparseTensor`, size `[nnz]` Vector.
+b_shape: 1-D.  The `shape` of the second `SparseTensor`, size `[ndims]` Vector.
+thresh: 0-D.  The magnitude threshold that determines if an output value/index
+pair takes space.
+)doc");
 
 REGISTER_OP("SparseTensorDenseMatMul")
     .Input("a_indices: Tindices")
@@ -116,7 +161,29 @@ REGISTER_OP("SparseTensorDenseMatMul")
       TF_RETURN_IF_ERROR(c->Merge(inner_left, inner_right, &unused_dim));
       c->set_output(0, c->Matrix(output_left, output_right));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Multiply SparseTensor (of rank 2) "A" by dense matrix "B".
+
+No validity checking is performed on the indices of A.  However, the following
+input format is recommended for optimal behavior:
+
+if adjoint_a == false:
+  A should be sorted in lexicographically increasing order.  Use SparseReorder
+  if you're not sure.
+if adjoint_a == true:
+  A should be sorted in order of increasing dimension 1 (i.e., "column major"
+  order instead of "row major" order).
+
+a_indices: 2-D.  The `indices` of the `SparseTensor`, size `[nnz, 2]` Matrix.
+a_values: 1-D.  The `values` of the `SparseTensor`, size `[nnz]` Vector.
+a_shape: 1-D.  The `shape` of the `SparseTensor`, size `[2]` Vector.
+b: 2-D.  A dense Matrix.
+adjoint_a: Use the adjoint of A in the matrix multiply.  If A is complex, this
+  is transpose(conj(A)).  Otherwise it's transpose(A).
+adjoint_b: Use the adjoint of B in the matrix multiply.  If B is complex, this
+  is transpose(conj(B)).  Otherwise it's transpose(B).
+)doc");
 
 REGISTER_OP("SerializeSparse")
     .Input("sparse_indices: int64")
@@ -132,7 +199,16 @@ REGISTER_OP("SerializeSparse")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &unused));
       c->set_output(0, c->Vector(3));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Serialize a `SparseTensor` into a `[3]` `Tensor` object.
+
+sparse_indices: 2-D.  The `indices` of the `SparseTensor`.
+sparse_values: 1-D.  The `values` of the `SparseTensor`.
+sparse_shape: 1-D.  The `shape` of the `SparseTensor`.
+out_type: The `dtype` to use for serialization; the supported types are `string`
+  (default) and `variant`.
+)doc");
 
 REGISTER_OP("SerializeManySparse")
     .Input("sparse_indices: int64")
@@ -148,7 +224,24 @@ REGISTER_OP("SerializeManySparse")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &unused));
       c->set_output(0, c->Matrix(InferenceContext::kUnknownDim, 3));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Serialize an `N`-minibatch `SparseTensor` into an `[N, 3]` `Tensor` object.
+
+The `SparseTensor` must have rank `R` greater than 1, and the first dimension
+is treated as the minibatch dimension.  Elements of the `SparseTensor`
+must be sorted in increasing order of this first dimension.  The serialized
+`SparseTensor` objects going into each row of `serialized_sparse` will have
+rank `R-1`.
+
+The minibatch size `N` is extracted from `sparse_shape[0]`.
+
+sparse_indices: 2-D.  The `indices` of the minibatch `SparseTensor`.
+sparse_values: 1-D.  The `values` of the minibatch `SparseTensor`.
+sparse_shape: 1-D.  The `shape` of the minibatch `SparseTensor`.
+out_type: The `dtype` to use for serialization; the supported types are `string`
+  (default) and `variant`.
+)doc");
 
 REGISTER_OP("DeserializeSparse")
     .Input("serialized_sparse: Tserialized")
@@ -166,7 +259,56 @@ REGISTER_OP("DeserializeSparse")
       c->set_output(1, c->Vector(InferenceContext::kUnknownDim));
       c->set_output(2, c->Vector(InferenceContext::kUnknownDim));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Deserialize `SparseTensor` objects.
+
+The input `serialized_sparse` must have the shape `[?, ?, ..., ?, 3]` where
+the last dimension stores serialized `SparseTensor` objects and the other N
+dimensions (N >= 0) correspond to a batch. The ranks of the original
+`SparseTensor` objects must all match. When the final `SparseTensor` is
+created, its rank is the rank of the incoming `SparseTensor` objects plus N;
+the sparse tensors have been concatenated along new dimensions, one for each
+batch.
+
+The output `SparseTensor` object's shape values for the original dimensions
+are the max across the input `SparseTensor` objects' shape values for the
+corresponding dimensions. The new dimensions match the size of the batch.
+
+The input `SparseTensor` objects' indices are assumed ordered in
+standard lexicographic order.  If this is not the case, after this
+step run `SparseReorder` to restore index ordering.
+
+For example, if the serialized input is a `[2 x 3]` matrix representing two
+original `SparseTensor` objects:
+
+    index = [ 0]
+            [10]
+            [20]
+    values = [1, 2, 3]
+    shape = [50]
+
+and
+
+    index = [ 2]
+            [10]
+    values = [4, 5]
+    shape = [30]
+
+then the final deserialized `SparseTensor` will be:
+
+    index = [0  0]
+            [0 10]
+            [0 20]
+            [1  2]
+            [1 10]
+    values = [1, 2, 3, 4, 5]
+    shape = [2 50]
+
+serialized_sparse: The serialized `SparseTensor` objects. The last dimension
+  must have 3 columns.
+dtype: The `dtype` of the serialized `SparseTensor` objects.
+)doc");
 
 REGISTER_OP("DeserializeManySparse")
     .Input("serialized_sparse: string")
@@ -187,7 +329,56 @@ REGISTER_OP("DeserializeManySparse")
       c->set_output(1, c->Vector(InferenceContext::kUnknownDim));
       c->set_output(2, c->Vector(InferenceContext::kUnknownDim));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Deserialize and concatenate `SparseTensors` from a serialized minibatch.
+
+The input `serialized_sparse` must be a string matrix of shape `[N x 3]` where
+`N` is the minibatch size and the rows correspond to packed outputs of
+`SerializeSparse`.  The ranks of the original `SparseTensor` objects
+must all match.  When the final `SparseTensor` is created, it has rank one
+higher than the ranks of the incoming `SparseTensor` objects
+(they have been concatenated along a new row dimension).
+
+The output `SparseTensor` object's shape values for all dimensions but the
+first are the max across the input `SparseTensor` objects' shape values
+for the corresponding dimensions.  Its first shape value is `N`, the minibatch
+size.
+
+The input `SparseTensor` objects' indices are assumed ordered in
+standard lexicographic order.  If this is not the case, after this
+step run `SparseReorder` to restore index ordering.
+
+For example, if the serialized input is a `[2 x 3]` matrix representing two
+original `SparseTensor` objects:
+
+    index = [ 0]
+            [10]
+            [20]
+    values = [1, 2, 3]
+    shape = [50]
+
+and
+
+    index = [ 2]
+            [10]
+    values = [4, 5]
+    shape = [30]
+
+then the final deserialized `SparseTensor` will be:
+
+    index = [0  0]
+            [0 10]
+            [0 20]
+            [1  2]
+            [1 10]
+    values = [1, 2, 3, 4, 5]
+    shape = [2 50]
+
+serialized_sparse: 2-D, The `N` serialized `SparseTensor` objects.
+  Must have 3 columns.
+dtype: The `dtype` of the serialized `SparseTensor` objects.
+)doc");
 
 REGISTER_OP("SparseToDense")
     .Input("sparse_indices: Tindices")
@@ -203,7 +394,41 @@ REGISTER_OP("SparseToDense")
       TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &out));
       c->set_output(0, out);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Converts a sparse representation into a dense tensor.
+
+Builds an array `dense` with shape `output_shape` such that
+
+```
+# If sparse_indices is scalar
+dense[i] = (i == sparse_indices ? sparse_values : default_value)
+
+# If sparse_indices is a vector, then for each i
+dense[sparse_indices[i]] = sparse_values[i]
+
+# If sparse_indices is an n by d matrix, then for each i in [0, n)
+dense[sparse_indices[i][0], ..., sparse_indices[i][d-1]] = sparse_values[i]
+```
+
+All other values in `dense` are set to `default_value`.  If `sparse_values` is a
+scalar, all sparse indices are set to this single value.
+
+Indices should be sorted in lexicographic order, and indices must not
+contain any repeats. If `validate_indices` is true, these properties
+are checked during execution.
+
+sparse_indices: 0-D, 1-D, or 2-D.  `sparse_indices[i]` contains the complete
+  index where `sparse_values[i]` will be placed.
+output_shape: 1-D.  Shape of the dense output tensor.
+sparse_values: 1-D.  Values corresponding to each row of `sparse_indices`,
+  or a scalar value to be used for all sparse indices.
+default_value: Scalar value to set for indices not specified in
+  `sparse_indices`.
+validate_indices: If true, indices are checked to make sure they are sorted in
+  lexicographic order and that there are no repeats.
+dense: Dense output tensor of shape `output_shape`.
+)doc");
 
 REGISTER_OP("SparseConcat")
     .Input("indices: N * int64")
@@ -248,7 +473,61 @@ REGISTER_OP("SparseConcat")
       c->set_output(1, c->Vector(output_row_count));
       c->set_output(2, output_shape);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Concatenates a list of `SparseTensor` along the specified dimension.
+
+Concatenation is with respect to the dense versions of these sparse tensors.
+It is assumed that each input is a `SparseTensor` whose elements are ordered
+along increasing dimension number.
+
+All inputs' shapes must match, except for the concat dimension.  The
+`indices`, `values`, and `shapes` lists must have the same length.
+
+The output shape is identical to the inputs', except along the concat
+dimension, where it is the sum of the inputs' sizes along that dimension.
+
+The output elements will be resorted to preserve the sort order along
+increasing dimension number.
+
+This op runs in `O(M log M)` time, where `M` is the total number of non-empty
+values across all inputs. This is due to the need for an internal sort in
+order to concatenate efficiently across an arbitrary dimension.
+
+For example, if `concat_dim = 1` and the inputs are
+
+    sp_inputs[0]: shape = [2, 3]
+    [0, 2]: "a"
+    [1, 0]: "b"
+    [1, 1]: "c"
+
+    sp_inputs[1]: shape = [2, 4]
+    [0, 1]: "d"
+    [0, 2]: "e"
+
+then the output will be
+
+    shape = [2, 7]
+    [0, 2]: "a"
+    [0, 4]: "d"
+    [0, 5]: "e"
+    [1, 0]: "b"
+    [1, 1]: "c"
+
+Graphically this is equivalent to doing
+
+    [    a] concat [  d e  ] = [    a   d e  ]
+    [b c  ]        [       ]   [b c          ]
+
+indices: 2-D.  Indices of each input `SparseTensor`.
+values: 1-D.  Non-empty values of each `SparseTensor`.
+shapes: 1-D.  Shapes of each `SparseTensor`.
+output_indices: 2-D.  Indices of the concatenated `SparseTensor`.
+output_values: 1-D.  Non-empty values of the concatenated `SparseTensor`.
+output_shape: 1-D.  Shape of the concatenated `SparseTensor`.
+concat_dim: Dimension to concatenate along. Must be in range [-rank, rank),
+    where rank is the number of dimensions in each input `SparseTensor`.
+)doc");
 
 REGISTER_OP("SparseCross")
     .Input("indices: N * int64")
@@ -271,7 +550,62 @@ REGISTER_OP("SparseCross")
       c->set_output(1, c->Vector(c->UnknownDim()));
       c->set_output(2, c->Vector(2));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Generates sparse cross from a list of sparse and dense tensors.
+
+The op takes two lists, one of 2D `SparseTensor` and one of 2D `Tensor`, each
+representing features of one feature column. It outputs a 2D `SparseTensor` with
+the batchwise crosses of these features.
+
+For example, if the inputs are
+
+    inputs[0]: SparseTensor with shape = [2, 2]
+    [0, 0]: "a"
+    [1, 0]: "b"
+    [1, 1]: "c"
+
+    inputs[1]: SparseTensor with shape = [2, 1]
+    [0, 0]: "d"
+    [1, 0]: "e"
+
+    inputs[2]: Tensor [["f"], ["g"]]
+
+then the output will be
+
+    shape = [2, 2]
+    [0, 0]: "a_X_d_X_f"
+    [1, 0]: "b_X_e_X_g"
+    [1, 1]: "c_X_e_X_g"
+
+if hashed_output=true then the output will be
+
+    shape = [2, 2]
+    [0, 0]: FingerprintCat64(
+                Fingerprint64("f"), FingerprintCat64(
+                    Fingerprint64("d"), Fingerprint64("a")))
+    [1, 0]: FingerprintCat64(
+                Fingerprint64("g"), FingerprintCat64(
+                    Fingerprint64("e"), Fingerprint64("b")))
+    [1, 1]: FingerprintCat64(
+                Fingerprint64("g"), FingerprintCat64(
+                    Fingerprint64("e"), Fingerprint64("c")))
+
+indices: 2-D.  Indices of each input `SparseTensor`.
+values: 1-D.   values of each `SparseTensor`.
+shapes: 1-D.   Shapes of each `SparseTensor`.
+dense_inputs: 2-D.    Columns represented by dense `Tensor`.
+hashed_output: If true, returns the hash of the cross instead of the string.
+  This will allow us avoiding string manipulations.
+num_buckets: It is used if hashed_output is true.
+  output = hashed_value%num_buckets if num_buckets > 0 else hashed_value.
+hash_key: Specify the hash_key that will be used by the `FingerprintCat64`
+  function to combine the crosses fingerprints.
+output_indices: 2-D.  Indices of the concatenated `SparseTensor`.
+output_values: 1-D.  Non-empty values of the concatenated or hashed
+  `SparseTensor`.
+output_shape: 1-D.  Shape of the concatenated `SparseTensor`.
+)doc");
 
 REGISTER_OP("SparseSplit")
     .Input("split_dim: int64")
@@ -300,7 +634,41 @@ REGISTER_OP("SparseSplit")
       for (int i = 0; i < num_splits; ++i)
         c->set_output(out_idx++, output_shape);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Split a `SparseTensor` into `num_split` tensors along one dimension.
+
+If the `shape[split_dim]` is not an integer multiple of `num_split`. Slices
+`[0 : shape[split_dim] % num_split]` gets one extra dimension.
+For example, if `split_dim = 1` and `num_split = 2` and the input is
+
+    input_tensor = shape = [2, 7]
+    [    a   d e  ]
+    [b c          ]
+
+Graphically the output tensors are:
+
+    output_tensor[0] = shape = [2, 4]
+    [    a  ]
+    [b c    ]
+
+    output_tensor[1] = shape = [2, 3]
+    [ d e  ]
+    [      ]
+
+split_dim: 0-D.  The dimension along which to split.  Must be in the range
+  `[0, rank(shape))`.
+num_split: The number of ways to split.
+indices: 2-D tensor represents the indices of the sparse tensor.
+values: 1-D tensor represents the values of the sparse tensor.
+shape: 1-D. tensor represents the shape of the sparse tensor.
+output indices: A list of 1-D tensors represents the indices of the output
+sparse tensors.
+output_values: A list of 1-D tensors represents the values of the output sparse
+  tensors.
+output_shape: A list of 1-D tensors represents the shape of the output sparse
+  tensors.
+)doc");
 
 REGISTER_OP("SparseSlice")
     .Input("indices: int64")
@@ -323,7 +691,38 @@ REGISTER_OP("SparseSlice")
       c->set_output(1, output_values);
       c->set_output(2, output_shape);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Slice a `SparseTensor` based on the `start` and `size`.
+
+For example, if the input is
+
+    input_tensor = shape = [2, 7]
+    [    a   d e  ]
+    [b c          ]
+
+Graphically the output tensors are:
+
+    sparse_slice([0, 0], [2, 4]) = shape = [2, 4]
+    [    a  ]
+    [b c    ]
+
+    sparse_slice([0, 4], [2, 3]) = shape = [2, 3]
+    [ d e  ]
+    [      ]
+
+indices: 2-D tensor represents the indices of the sparse tensor.
+values: 1-D tensor represents the values of the sparse tensor.
+shape: 1-D. tensor represents the shape of the sparse tensor.
+start: 1-D. tensor represents the start of the slice.
+size: 1-D. tensor represents the size of the slice.
+output indices: A list of 1-D tensors represents the indices of the output
+sparse tensors.
+output_values: A list of 1-D tensors represents the values of the output sparse
+  tensors.
+output_shape: A list of 1-D tensors represents the shape of the output sparse
+  tensors.
+)doc");
 
 REGISTER_OP("SparseReorder")
     .Input("input_indices: int64")
@@ -344,7 +743,27 @@ REGISTER_OP("SparseReorder")
       c->set_output(0, indices);
       c->set_output(1, values);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Reorders a SparseTensor into the canonical, row-major ordering.
+
+Note that by convention, all sparse ops preserve the canonical ordering along
+increasing dimension number. The only time ordering can be violated is during
+manual manipulation of the indices and values vectors to add entries.
+
+Reordering does not affect the shape of the SparseTensor.
+
+If the tensor has rank `R` and `N` non-empty values, `input_indices` has
+shape `[N, R]`, input_values has length `N`, and input_shape has length `R`.
+
+input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+  SparseTensor, possibly not in canonical ordering.
+input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
+input_shape: 1-D.  Shape of the input SparseTensor.
+output_indices: 2-D.  `N x R` matrix with the same indices as input_indices, but
+  in canonical row-major ordering.
+output_values: 1-D.  `N` non-empty values corresponding to `output_indices`.
+)doc");
 
 REGISTER_OP("SparseReshape")
     .Input("input_indices: int64")
@@ -364,7 +783,36 @@ REGISTER_OP("SparseReshape")
       c->set_output(0, c->Matrix(c->Dim(indices, 0), c->Dim(new_shape, 0)));
       c->set_output(1, new_shape);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Reshapes a SparseTensor to represent values in a new dense shape.
+
+This operation has the same semantics as reshape on the represented dense
+tensor.  The `input_indices` are recomputed based on the requested `new_shape`.
+
+If one component of `new_shape` is the special value -1, the size of that
+dimension is computed so that the total dense size remains constant.  At
+most one component of `new_shape` can be -1.  The number of dense elements
+implied by `new_shape` must be the same as the number of dense elements
+originally implied by `input_shape`.
+
+Reshaping does not affect the order of values in the SparseTensor.
+
+If the input tensor has rank `R_in` and `N` non-empty values, and `new_shape`
+has length `R_out`, then `input_indices` has shape `[N, R_in]`,
+`input_shape` has length `R_in`, `output_indices` has shape `[N, R_out]`, and
+`output_shape` has length `R_out`.
+
+input_indices: 2-D.  `N x R_in` matrix with the indices of non-empty values in a
+  SparseTensor.
+input_shape: 1-D.  `R_in` vector with the input SparseTensor's dense shape.
+new_shape: 1-D.  `R_out` vector with the requested new dense shape.
+output_indices: 2-D.  `N x R_out` matrix with the updated indices of non-empty
+  values in the output SparseTensor.
+output_shape: 1-D.  `R_out` vector with the full dense shape of the output
+  SparseTensor.  This is the same as `new_shape` but with any -1 dimensions
+  filled in.
+)doc");
 
 REGISTER_OP("SparseTensorDenseAdd")
     .Input("a_indices: Tindices")
@@ -377,7 +825,17 @@ REGISTER_OP("SparseTensorDenseAdd")
     .SetShapeFn([](InferenceContext* c) {
       c->set_output(0, c->input(3));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Adds up a `SparseTensor` and a dense `Tensor`, producing a dense `Tensor`.
+
+This Op does not require `a_indices` be sorted in standard lexicographic order.
+
+a_indices: 2-D.  The `indices` of the `SparseTensor`, with shape `[nnz, ndims]`.
+a_values: 1-D.  The `values` of the `SparseTensor`, with shape `[nnz]`.
+a_shape: 1-D.  The `shape` of the `SparseTensor`, with shape `[ndims]`.
+b: `ndims`-D Tensor.  With shape `a_shape`.
+)doc");
 
 REGISTER_OP("SparseReduceMax")
     .Input("input_indices: int64")
@@ -387,7 +845,31 @@ REGISTER_OP("SparseReduceMax")
     .Attr("keep_dims: bool = False")
     .Output("output: T")
     .Attr("T: realnumbertype")
-    .SetShapeFn(shape_inference::UnknownShape);
+    .SetShapeFn(shape_inference::UnknownShape)
+    .Doc(R"doc(
+Computes the max of elements across dimensions of a SparseTensor.
+
+This Op takes a SparseTensor and is the sparse counterpart to
+`tf.reduce_max()`.  In particular, this Op also returns a dense `Tensor`
+instead of a sparse one.
+
+Reduces `sp_input` along the dimensions given in `reduction_axes`.  Unless
+`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+`reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained
+with length 1.
+
+If `reduction_axes` has no entries, all dimensions are reduced, and a tensor
+with a single element is returned.  Additionally, the axes can be negative,
+which are interpreted according to the indexing rules in Python.
+
+input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+  SparseTensor, possibly not in canonical ordering.
+input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
+input_shape: 1-D.  Shape of the input SparseTensor.
+reduction_axes: 1-D.  Length-`K` vector containing the reduction axes.
+keep_dims: If true, retain reduced dimensions with length 1.
+output: `R-K`-D.  The reduced Tensor.
+)doc");
 
 REGISTER_OP("SparseReduceMaxSparse")
     .Input("input_indices: int64")
@@ -399,7 +881,30 @@ REGISTER_OP("SparseReduceMaxSparse")
     .Output("output_values: T")
     .Output("output_shape: int64")
     .Attr("T: realnumbertype")
-    .SetShapeFn(shape_inference::UnknownShape);
+    .SetShapeFn(shape_inference::UnknownShape)
+    .Doc(R"doc(
+Computes the max of elements across dimensions of a SparseTensor.
+
+This Op takes a SparseTensor and is the sparse counterpart to
+`tf.reduce_max()`.  In contrast to SparseReduceMax, this Op returns a
+SparseTensor.
+
+Reduces `sp_input` along the dimensions given in `reduction_axes`.  Unless
+`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+`reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained
+with length 1.
+
+If `reduction_axes` has no entries, all dimensions are reduced, and a tensor
+with a single element is returned.  Additionally, the axes can be negative,
+which are interpreted according to the indexing rules in Python.
+
+input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+  SparseTensor, possibly not in canonical ordering.
+input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
+input_shape: 1-D.  Shape of the input SparseTensor.
+reduction_axes: 1-D.  Length-`K` vector containing the reduction axes.
+keep_dims: If true, retain reduced dimensions with length 1.
+)doc");
 
 REGISTER_OP("SparseReduceSum")
     .Input("input_indices: int64")
@@ -409,7 +914,31 @@ REGISTER_OP("SparseReduceSum")
     .Attr("keep_dims: bool = False")
     .Output("output: T")
     .Attr("T: numbertype")
-    .SetShapeFn(shape_inference::UnknownShape);
+    .SetShapeFn(shape_inference::UnknownShape)
+    .Doc(R"doc(
+Computes the sum of elements across dimensions of a SparseTensor.
+
+This Op takes a SparseTensor and is the sparse counterpart to
+`tf.reduce_sum()`.  In particular, this Op also returns a dense `Tensor`
+instead of a sparse one.
+
+Reduces `sp_input` along the dimensions given in `reduction_axes`.  Unless
+`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+`reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained
+with length 1.
+
+If `reduction_axes` has no entries, all dimensions are reduced, and a tensor
+with a single element is returned.  Additionally, the axes can be negative,
+which are interpreted according to the indexing rules in Python.
+
+input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+  SparseTensor, possibly not in canonical ordering.
+input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
+input_shape: 1-D.  Shape of the input SparseTensor.
+reduction_axes: 1-D.  Length-`K` vector containing the reduction axes.
+keep_dims: If true, retain reduced dimensions with length 1.
+output: `R-K`-D.  The reduced Tensor.
+)doc");
 
 REGISTER_OP("SparseReduceSumSparse")
     .Input("input_indices: int64")
@@ -421,7 +950,30 @@ REGISTER_OP("SparseReduceSumSparse")
     .Output("output_values: T")
     .Output("output_shape: int64")
     .Attr("T: numbertype")
-    .SetShapeFn(shape_inference::UnknownShape);
+    .SetShapeFn(shape_inference::UnknownShape)
+    .Doc(R"doc(
+Computes the sum of elements across dimensions of a SparseTensor.
+
+This Op takes a SparseTensor and is the sparse counterpart to
+`tf.reduce_sum()`.  In contrast to SparseReduceSum, this Op returns a
+SparseTensor.
+
+Reduces `sp_input` along the dimensions given in `reduction_axes`.  Unless
+`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+`reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained
+with length 1.
+
+If `reduction_axes` has no entries, all dimensions are reduced, and a tensor
+with a single element is returned.  Additionally, the axes can be negative,
+which are interpreted according to the indexing rules in Python.
+
+input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+  SparseTensor, possibly not in canonical ordering.
+input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
+input_shape: 1-D.  Shape of the input SparseTensor.
+reduction_axes: 1-D.  Length-`K` vector containing the reduction axes.
+keep_dims: If true, retain reduced dimensions with length 1.
+)doc");
 
 #define SPARSE_DENSE_CWISE_SIGNATURE()                           \
   Input("sp_indices: int64")                                     \
@@ -437,11 +989,63 @@ REGISTER_OP("SparseReduceSumSparse")
         return Status::OK();                                     \
       })
 
-REGISTER_OP("SparseDenseCwiseMul").SPARSE_DENSE_CWISE_SIGNATURE();
+REGISTER_OP("SparseDenseCwiseMul")
+    .SPARSE_DENSE_CWISE_SIGNATURE()
+    .Doc(R"doc(
+Component-wise multiplies a SparseTensor by a dense Tensor.
+
+The output locations corresponding to the implicitly zero elements in the sparse
+tensor will be zero (i.e., will not take up storage space), regardless of the
+contents of the dense tensor (even if it's +/-INF and that INF*0 == NaN).
+
+*Limitation*: this Op only broadcasts the dense side to the sparse side, but not
+the other direction.
+
+sp_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+  SparseTensor, possibly not in canonical ordering.
+sp_values: 1-D.  `N` non-empty values corresponding to `sp_indices`.
+sp_shape: 1-D.  Shape of the input SparseTensor.
+dense: `R`-D.  The dense Tensor operand.
+output: 1-D.  The `N` values that are operated on.
+)doc");
+
+REGISTER_OP("SparseDenseCwiseDiv")
+    .SPARSE_DENSE_CWISE_SIGNATURE()
+    .Doc(R"doc(
+Component-wise divides a SparseTensor by a dense Tensor.
+
+*Limitation*: this Op only broadcasts the dense side to the sparse side, but not
+the other direction.
+
+sp_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+  SparseTensor, possibly not in canonical ordering.
+sp_values: 1-D.  `N` non-empty values corresponding to `sp_indices`.
+sp_shape: 1-D.  Shape of the input SparseTensor.
+dense: `R`-D.  The dense Tensor operand.
+output: 1-D.  The `N` values that are operated on.
+)doc");
+
+REGISTER_OP("SparseDenseCwiseAdd")
+    .SPARSE_DENSE_CWISE_SIGNATURE()
+    .Doc(R"doc(
+Adds up a SparseTensor and a dense Tensor, using these special rules:
 
-REGISTER_OP("SparseDenseCwiseDiv").SPARSE_DENSE_CWISE_SIGNATURE();
+(1) Broadcasts the dense side to have the same shape as the sparse side, if
+    eligible;
+(2) Then, only the dense values pointed to by the indices of the SparseTensor
+    participate in the cwise addition.
 
-REGISTER_OP("SparseDenseCwiseAdd").SPARSE_DENSE_CWISE_SIGNATURE();
+By these rules, the result is a logical SparseTensor with exactly the same
+indices and shape, but possibly with different non-zero values.  The output of
+this Op is the resultant non-zero values.
+
+sp_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+  SparseTensor, possibly not in canonical ordering.
+sp_values: 1-D.  `N` non-empty values corresponding to `sp_indices`.
+sp_shape: 1-D.  Shape of the input SparseTensor.
+dense: `R`-D.  The dense Tensor operand.
+output: 1-D.  The `N` values that are operated on.
+)doc");
 
 #undef SPARSE_DENSE_CWISE_SIGNATURE
 
@@ -459,7 +1063,32 @@ REGISTER_OP("SparseSoftmax")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &unused));
       c->set_output(0, values);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Applies softmax to a batched N-D `SparseTensor`.
+
+The inputs represent an N-D SparseTensor  with logical shape `[..., B, C]`
+(where `N >= 2`), and with indices sorted in the canonical lexicographic order.
+
+This op is equivalent to applying the normal `tf.nn.softmax()` to each innermost
+logical submatrix with shape `[B, C]`, but with the catch that *the implicitly
+zero elements do not participate*.  Specifically, the algorithm is equivalent
+to the following:
+
+  (1) Applies `tf.nn.softmax()` to a densified view of each innermost submatrix
+      with shape `[B, C]`, along the size-C dimension;
+  (2) Masks out the original implicitly-zero locations;
+  (3) Renormalizes the remaining elements.
+
+Hence, the `SparseTensor` result has exactly the same non-zero indices and
+shape.
+
+sp_indices: 2-D.  `NNZ x R` matrix with the indices of non-empty values in a
+  SparseTensor, in canonical ordering.
+sp_values: 1-D.  `NNZ` non-empty values corresponding to `sp_indices`.
+sp_shape: 1-D.  Shape of the input SparseTensor.
+output: 1-D.  The `NNZ` values for the result `SparseTensor`.
+)doc");
 
 REGISTER_OP("SparseSparseMaximum")
     .Input("a_indices: int64")
@@ -471,7 +1100,23 @@ REGISTER_OP("SparseSparseMaximum")
     .Output("output_indices: int64")
     .Output("output_values: T")
     .Attr("T: realnumbertype")
-    .SetShapeFn(SparseSparseMinOrMaxShapeFn);
+    .SetShapeFn(SparseSparseMinOrMaxShapeFn)
+    .Doc(R"doc(
+Returns the element-wise max of two SparseTensors.
+
+Assumes the two SparseTensors have the same shape, i.e., no broadcasting.
+
+a_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+  SparseTensor, in the canonical lexicographic ordering.
+a_values: 1-D.  `N` non-empty values corresponding to `a_indices`.
+a_shape: 1-D.  Shape of the input SparseTensor.
+b_indices: counterpart to `a_indices` for the other operand.
+b_values: counterpart to `a_values` for the other operand; must be of the same dtype.
+b_shape: counterpart to `a_shape` for the other operand; the two shapes must be equal.
+
+output_indices: 2-D.  The indices of the output SparseTensor.
+output_values: 1-D.  The values of the output SparseTensor.
+)doc");
 
 REGISTER_OP("SparseSparseMinimum")
     .Input("a_indices: int64")
@@ -483,7 +1128,23 @@ REGISTER_OP("SparseSparseMinimum")
     .Output("output_indices: int64")
     .Output("output_values: T")
     .Attr("T: numbertype")
-    .SetShapeFn(SparseSparseMinOrMaxShapeFn);
+    .SetShapeFn(SparseSparseMinOrMaxShapeFn)
+    .Doc(R"doc(
+Returns the element-wise min of two SparseTensors.
+
+Assumes the two SparseTensors have the same shape, i.e., no broadcasting.
+
+a_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+  SparseTensor, in the canonical lexicographic ordering.
+a_values: 1-D.  `N` non-empty values corresponding to `a_indices`.
+a_shape: 1-D.  Shape of the input SparseTensor.
+b_indices: counterpart to `a_indices` for the other operand.
+b_values: counterpart to `a_values` for the other operand; must be of the same dtype.
+b_shape: counterpart to `a_shape` for the other operand; the two shapes must be equal.
+
+output_indices: 2-D.  The indices of the output SparseTensor.
+output_values: 1-D.  The values of the output SparseTensor.
+)doc");
 
 REGISTER_OP("AddSparseToTensorsMap")
     .Input("sparse_indices: int64")
@@ -501,7 +1162,34 @@ REGISTER_OP("AddSparseToTensorsMap")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &unused));
       c->set_output(0, c->Scalar());
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Add a `SparseTensor` to a `SparseTensorsMap` return its handle.
+
+A `SparseTensor` is represented by three tensors: `sparse_indices`,
+`sparse_values`, and `sparse_shape`.
+
+This operator takes the given `SparseTensor` and adds it to a container
+object (a `SparseTensorsMap`).  A unique key within this container is generated
+in the form of an `int64`, and this is the value that is returned.
+
+The `SparseTensor` can then be read out as part of a minibatch by passing
+the key as a vector element to `TakeManySparseFromTensorsMap`.  To ensure
+the correct `SparseTensorsMap` is accessed, ensure that the same
+`container` and `shared_name` are passed to that Op.  If no `shared_name`
+is provided here, instead use the *name* of the Operation created by calling
+`AddSparseToTensorsMap` as the `shared_name` passed to
+`TakeManySparseFromTensorsMap`.  Ensure the Operations are colocated.
+
+sparse_indices: 2-D.  The `indices` of the `SparseTensor`.
+sparse_values: 1-D.  The `values` of the `SparseTensor`.
+sparse_shape: 1-D.  The `shape` of the `SparseTensor`.
+sparse_handle: 0-D.  The handle of the `SparseTensor` now stored in the
+  `SparseTensorsMap`.
+container: The container name for the `SparseTensorsMap` created by this op.
+shared_name: The shared name for the `SparseTensorsMap` created by this op.
+  If blank, the new Operation's unique name is used.
+)doc");
 
 REGISTER_OP("AddManySparseToTensorsMap")
     .Input("sparse_indices: int64")
@@ -519,7 +1207,44 @@ REGISTER_OP("AddManySparseToTensorsMap")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &unused));
       c->set_output(0, c->Vector(InferenceContext::kUnknownDim));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Add an `N`-minibatch `SparseTensor` to a `SparseTensorsMap`, return `N` handles.
+
+A `SparseTensor` of rank `R` is represented by three tensors: `sparse_indices`,
+`sparse_values`, and `sparse_shape`, where
+
+```sparse_indices.shape[1] == sparse_shape.shape[0] == R```
+
+An `N`-minibatch of `SparseTensor` objects is represented as a `SparseTensor`
+having a first `sparse_indices` column taking values between `[0, N)`, where
+the minibatch size `N == sparse_shape[0]`.
+
+The input `SparseTensor` must have rank `R` greater than 1, and the first
+dimension is treated as the minibatch dimension.  Elements of the `SparseTensor`
+must be sorted in increasing order of this first dimension.  The stored
+`SparseTensor` objects pointed to by each row of the output `sparse_handles`
+will have rank `R-1`.
+
+The `SparseTensor` values can then be read out as part of a minibatch by passing
+the given keys as vector elements to `TakeManySparseFromTensorsMap`.  To ensure
+the correct `SparseTensorsMap` is accessed, ensure that the same
+`container` and `shared_name` are passed to that Op.  If no `shared_name`
+is provided here, instead use the *name* of the Operation created by calling
+`AddManySparseToTensorsMap` as the `shared_name` passed to
+`TakeManySparseFromTensorsMap`.  Ensure the Operations are colocated.
+
+sparse_indices: 2-D.  The `indices` of the minibatch `SparseTensor`.
+  `sparse_indices[:, 0]` must be ordered values in `[0, N)`.
+sparse_values: 1-D.  The `values` of the minibatch `SparseTensor`.
+sparse_shape: 1-D.  The `shape` of the minibatch `SparseTensor`.
+  The minibatch size `N == sparse_shape[0]`.
+sparse_handles: 1-D.  The handles of the `SparseTensor` now stored in the
+  `SparseTensorsMap`.  Shape: `[N]`.
+container: The container name for the `SparseTensorsMap` created by this op.
+shared_name: The shared name for the `SparseTensorsMap` created by this op.
+  If blank, the new Operation's unique name is used.
+)doc");
 
 REGISTER_OP("TakeManySparseFromTensorsMap")
     .Input("sparse_handles: int64")
@@ -540,7 +1265,71 @@ REGISTER_OP("TakeManySparseFromTensorsMap")
       c->set_output(1, c->Vector(InferenceContext::kUnknownDim));
       c->set_output(2, c->Vector(InferenceContext::kUnknownDim));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Read `SparseTensors` from a `SparseTensorsMap` and concatenate them.
+
+The input `sparse_handles` must be an `int64` matrix of shape `[N, 1]` where
+`N` is the minibatch size and the rows correspond to the output handles of
+`AddSparseToTensorsMap` or `AddManySparseToTensorsMap`.  The ranks of the
+original `SparseTensor` objects that went into the given input ops must all
+match.  When the final `SparseTensor` is created, it has rank one
+higher than the ranks of the incoming `SparseTensor` objects
+(they have been concatenated along a new row dimension on the left).
+
+The output `SparseTensor` object's shape values for all dimensions but the
+first are the max across the input `SparseTensor` objects' shape values
+for the corresponding dimensions.  Its first shape value is `N`, the minibatch
+size.
+
+The input `SparseTensor` objects' indices are assumed ordered in
+standard lexicographic order.  If this is not the case, after this
+step run `SparseReorder` to restore index ordering.
+
+For example, if the handles represent an input, which is a `[2, 3]` matrix
+representing two original `SparseTensor` objects:
+
+```
+    index = [ 0]
+            [10]
+            [20]
+    values = [1, 2, 3]
+    shape = [50]
+```
+
+and
+
+```
+    index = [ 2]
+            [10]
+    values = [4, 5]
+    shape = [30]
+```
+
+then the final `SparseTensor` will be:
+
+```
+    index = [0  0]
+            [0 10]
+            [0 20]
+            [1  2]
+            [1 10]
+    values = [1, 2, 3, 4, 5]
+    shape = [2 50]
+```
+
+sparse_handles: 1-D, The `N` serialized `SparseTensor` objects.
+  Shape: `[N]`.
+sparse_indices: 2-D.  The `indices` of the minibatch `SparseTensor`.
+sparse_values: 1-D.  The `values` of the minibatch `SparseTensor`.
+sparse_shape: 1-D.  The `shape` of the minibatch `SparseTensor`.
+dtype: The `dtype` of the `SparseTensor` objects stored in the
+  `SparseTensorsMap`.
+container: The container name for the `SparseTensorsMap` read by this op.
+shared_name: The shared name for the `SparseTensorsMap` read by this op.
+  It should not be blank; rather the `shared_name` or unique Operation name
+  of the Op that created the original `SparseTensorsMap` should be used.
+)doc");
 
 REGISTER_OP("SparseFillEmptyRows")
     .Input("indices: int64")
@@ -579,7 +1368,59 @@ REGISTER_OP("SparseFillEmptyRows")
       c->set_output(2, empty_row_indicator);
       c->set_output(3, reverse_index_map);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Fills empty rows in the input 2-D `SparseTensor` with a default value.
+
+The input `SparseTensor` is represented via the tuple of inputs
+(`indices`, `values`, `dense_shape`).  The output `SparseTensor` has the
+same `dense_shape` but with indices `output_indices` and values
+`output_values`.
+
+This op inserts a single entry for every row that doesn't have any values.
+The index is created as `[row, 0, ..., 0]` and the inserted value
+is `default_value`.
+
+For example, suppose `sp_input` has shape `[5, 6]` and non-empty values:
+
+    [0, 1]: a
+    [0, 3]: b
+    [2, 0]: c
+    [3, 1]: d
+
+Rows 1 and 4 are empty, so the output will be of shape `[5, 6]` with values:
+
+    [0, 1]: a
+    [0, 3]: b
+    [1, 0]: default_value
+    [2, 0]: c
+    [3, 1]: d
+    [4, 0]: default_value
+
+The output `SparseTensor` will be in row-major order and will have the
+same shape as the input.
+
+This op also returns an indicator vector shaped `[dense_shape[0]]` such that
+
+    empty_row_indicator[i] = True iff row i was an empty row.
+
+And a reverse index map vector shaped `[indices.shape[0]]` that is used during
+backpropagation,
+
+    reverse_index_map[j] = out_j s.t. indices[j, :] == output_indices[out_j, :]
+
+
+indices: 2-D. the indices of the sparse tensor.
+values: 1-D. the values of the sparse tensor.
+dense_shape: 1-D. the shape of the sparse tensor.
+default_value: 0-D. default value to insert into location `[row, 0, ..., 0]`
+  for rows missing from the input sparse tensor.
+output indices: 2-D. the indices of the filled sparse tensor.
+output_values: 1-D. the values of the filled sparse tensor.
+empty_row_indicator: 1-D. whether the dense row was missing in the
+  input sparse tensor.
+reverse_index_map: 1-D. a map from the input indices to the output indices.
+)doc");
 
 REGISTER_OP("SparseFillEmptyRowsGrad")
     .Input("reverse_index_map: int64")
@@ -595,6 +1436,23 @@ REGISTER_OP("SparseFillEmptyRowsGrad")
       c->set_output(0, reverse_index_map);
       c->set_output(1, c->Scalar());
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+The gradient of SparseFillEmptyRows.
+
+Takes vectors reverse_index_map, shaped `[N]`, and grad_values,
+shaped `[N_full]`, where `N_full >= N` and copies data into either
+`d_values` or `d_default_value`.  Here `d_values` is shaped `[N]` and
+`d_default_value` is a scalar.
+
+  d_values[j] = grad_values[reverse_index_map[j]]
+  d_default_value = sum_{k : 0 .. N_full - 1} (
+     grad_values[k] * 1{k not in reverse_index_map})
+
+reverse_index_map: 1-D.  The reverse index map from SparseFillEmptyRows.
+grad_values: 1-D.  The gradients from backprop.
+d_values: 1-D.  The backprop into values.
+d_default_value: 0-D.  The backprop into default_value.
+)doc");
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/spectral_ops.cc b/tensorflow/core/ops/spectral_ops.cc
index 508cea3495..592aaa25c3 100644
--- a/tensorflow/core/ops/spectral_ops.cc
+++ b/tensorflow/core/ops/spectral_ops.cc
@@ -29,42 +29,126 @@ REGISTER_OP("FFT")
     .Output("output: complex64")
     .SetShapeFn([](InferenceContext* c) {
       return shape_inference::UnchangedShapeWithRankAtLeast(c, 1);
-    });
+    })
+    .Doc(R"doc(
+Fast Fourier transform.
+
+Computes the 1-dimensional discrete Fourier transform over the inner-most
+dimension of `input`.
+
+input: A complex64 tensor.
+output: A complex64 tensor of the same shape as `input`. The inner-most
+  dimension of `input` is replaced with its 1D Fourier transform.
+
+@compatibility(numpy)
+Equivalent to np.fft.fft
+@end_compatibility
+)doc");
 
 REGISTER_OP("IFFT")
     .Input("input: complex64")
     .Output("output: complex64")
     .SetShapeFn([](InferenceContext* c) {
       return shape_inference::UnchangedShapeWithRankAtLeast(c, 1);
-    });
+    })
+    .Doc(R"doc(
+Inverse fast Fourier transform.
+
+Computes the inverse 1-dimensional discrete Fourier transform over the
+inner-most dimension of `input`.
+
+input: A complex64 tensor.
+output: A complex64 tensor of the same shape as `input`. The inner-most
+  dimension of `input` is replaced with its inverse 1D Fourier transform.
+
+@compatibility(numpy)
+Equivalent to np.fft.ifft
+@end_compatibility
+)doc");
 
 REGISTER_OP("FFT2D")
     .Input("input: complex64")
     .Output("output: complex64")
     .SetShapeFn([](InferenceContext* c) {
       return shape_inference::UnchangedShapeWithRankAtLeast(c, 2);
-    });
+    })
+    .Doc(R"doc(
+2D fast Fourier transform.
+
+Computes the 2-dimensional discrete Fourier transform over the inner-most
+2 dimensions of `input`.
+
+input: A complex64 tensor.
+output: A complex64 tensor of the same shape as `input`. The inner-most 2
+  dimensions of `input` are replaced with their 2D Fourier transform.
+
+@compatibility(numpy)
+Equivalent to np.fft.fft2
+@end_compatibility
+)doc");
 
 REGISTER_OP("IFFT2D")
     .Input("input: complex64")
     .Output("output: complex64")
     .SetShapeFn([](InferenceContext* c) {
       return shape_inference::UnchangedShapeWithRankAtLeast(c, 2);
-    });
+    })
+    .Doc(R"doc(
+Inverse 2D fast Fourier transform.
+
+Computes the inverse 2-dimensional discrete Fourier transform over the
+inner-most 2 dimensions of `input`.
+
+input: A complex64 tensor.
+output: A complex64 tensor of the same shape as `input`. The inner-most 2
+  dimensions of `input` are replaced with their inverse 2D Fourier transform.
+
+@compatibility(numpy)
+Equivalent to np.fft.ifft2
+@end_compatibility
+)doc");
 
 REGISTER_OP("FFT3D")
     .Input("input: complex64")
     .Output("output: complex64")
     .SetShapeFn([](InferenceContext* c) {
       return shape_inference::UnchangedShapeWithRankAtLeast(c, 3);
-    });
+    })
+    .Doc(R"doc(
+3D fast Fourier transform.
+
+Computes the 3-dimensional discrete Fourier transform over the inner-most 3
+dimensions of `input`.
+
+input: A complex64 tensor.
+output: A complex64 tensor of the same shape as `input`. The inner-most 3
+  dimensions of `input` are replaced with their 3D Fourier transform.
+
+@compatibility(numpy)
+Equivalent to np.fft.fftn with 3 dimensions.
+@end_compatibility
+)doc");
 
 REGISTER_OP("IFFT3D")
     .Input("input: complex64")
     .Output("output: complex64")
     .SetShapeFn([](InferenceContext* c) {
       return shape_inference::UnchangedShapeWithRankAtLeast(c, 3);
-    });
+    })
+    .Doc(R"doc(
+Inverse 3D fast Fourier transform.
+
+Computes the inverse 3-dimensional discrete Fourier transform over the
+inner-most 3 dimensions of `input`.
+
+input: A complex64 tensor.
+output: A complex64 tensor of the same shape as `input`. The inner-most 3
+  dimensions of `input` are replaced with their inverse 3D Fourier transform.
+
+@compatibility(numpy)
+Equivalent to np.fft.ifftn with 3 dimensions.
+@end_compatibility
+)doc");
 
 Status RFFTShape(InferenceContext* c, const bool forward, const int rank) {
   ShapeHandle out;
@@ -106,37 +190,196 @@ REGISTER_OP("RFFT")
     .Input("input: float")
     .Input("fft_length: int32")
     .Output("output: complex64")
-    .SetShapeFn([](InferenceContext* c) { return RFFTShape(c, true, 1); });
+    .SetShapeFn([](InferenceContext* c) { return RFFTShape(c, true, 1); })
+    .Doc(R"doc(
+Real-valued fast Fourier transform.
+
+Computes the 1-dimensional discrete Fourier transform of a real-valued signal
+over the inner-most dimension of `input`.
+
+Since the DFT of a real signal is Hermitian-symmetric, `RFFT` only returns the
+`fft_length / 2 + 1` unique components of the FFT: the zero-frequency term,
+followed by the `fft_length / 2` positive-frequency terms.
+
+Along the axis `RFFT` is computed on, if `fft_length` is smaller than the
+corresponding dimension of `input`, the dimension is cropped. If it is larger,
+the dimension is padded with zeros.
+
+input: A float32 tensor.
+fft_length: An int32 tensor of shape [1]. The FFT length.
+output: A complex64 tensor of the same rank as `input`. The inner-most
+  dimension of `input` is replaced with the `fft_length / 2 + 1` unique
+  frequency components of its 1D Fourier transform.
+
+@compatibility(numpy)
+Equivalent to np.fft.rfft
+@end_compatibility
+)doc");
 
 REGISTER_OP("IRFFT")
     .Input("input: complex64")
     .Input("fft_length: int32")
     .Output("output: float")
-    .SetShapeFn([](InferenceContext* c) { return RFFTShape(c, false, 1); });
+    .SetShapeFn([](InferenceContext* c) { return RFFTShape(c, false, 1); })
+    .Doc(R"doc(
+Inverse real-valued fast Fourier transform.
+
+Computes the inverse 1-dimensional discrete Fourier transform of a real-valued
+signal over the inner-most dimension of `input`.
+
+The inner-most dimension of `input` is assumed to be the result of `RFFT`: the
+`fft_length / 2 + 1` unique components of the DFT of a real-valued signal. If
+`fft_length` is not provided, it is computed from the size of the inner-most
+dimension of `input` (`fft_length = 2 * (inner - 1)`). If the FFT length used to
+compute `input` is odd, it should be provided since it cannot be inferred
+properly.
+
+Along the axis `IRFFT` is computed on, if `fft_length / 2 + 1` is smaller
+than the corresponding dimension of `input`, the dimension is cropped. If it is
+larger, the dimension is padded with zeros.
+
+input: A complex64 tensor.
+fft_length: An int32 tensor of shape [1]. The FFT length.
+output: A float32 tensor of the same rank as `input`. The inner-most
+  dimension of `input` is replaced with the `fft_length` samples of its inverse
+  1D Fourier transform.
+
+@compatibility(numpy)
+Equivalent to np.fft.irfft
+@end_compatibility
+)doc");
 
 REGISTER_OP("RFFT2D")
     .Input("input: float")
     .Input("fft_length: int32")
     .Output("output: complex64")
-    .SetShapeFn([](InferenceContext* c) { return RFFTShape(c, true, 2); });
+    .SetShapeFn([](InferenceContext* c) { return RFFTShape(c, true, 2); })
+    .Doc(R"doc(
+2D real-valued fast Fourier transform.
+
+Computes the 2-dimensional discrete Fourier transform of a real-valued signal
+over the inner-most 2 dimensions of `input`.
+
+Since the DFT of a real signal is Hermitian-symmetric, `RFFT2D` only returns the
+`fft_length / 2 + 1` unique components of the FFT for the inner-most dimension
+of `output`: the zero-frequency term, followed by the `fft_length / 2`
+positive-frequency terms.
+
+Along each axis `RFFT2D` is computed on, if `fft_length` is smaller than the
+corresponding dimension of `input`, the dimension is cropped. If it is larger,
+the dimension is padded with zeros.
+
+input: A float32 tensor.
+fft_length: An int32 tensor of shape [2]. The FFT length for each dimension.
+output: A complex64 tensor of the same rank as `input`. The inner-most 2
+  dimensions of `input` are replaced with their 2D Fourier transform. The
+  inner-most dimension contains `fft_length / 2 + 1` unique frequency
+  components.
+
+@compatibility(numpy)
+Equivalent to np.fft.rfft2
+@end_compatibility
+)doc");
 
 REGISTER_OP("IRFFT2D")
     .Input("input: complex64")
     .Input("fft_length: int32")
     .Output("output: float")
-    .SetShapeFn([](InferenceContext* c) { return RFFTShape(c, false, 2); });
+    .SetShapeFn([](InferenceContext* c) { return RFFTShape(c, false, 2); })
+    .Doc(R"doc(
+Inverse 2D real-valued fast Fourier transform.
+
+Computes the inverse 2-dimensional discrete Fourier transform of a real-valued
+signal over the inner-most 2 dimensions of `input`.
+
+The inner-most 2 dimensions of `input` are assumed to be the result of `RFFT2D`:
+The inner-most dimension contains the `fft_length / 2 + 1` unique components of
+the DFT of a real-valued signal. If `fft_length` is not provided, it is computed
+from the size of the inner-most 2 dimensions of `input`. If the FFT length used
+to compute `input` is odd, it should be provided since it cannot be inferred
+properly.
+
+Along each axis `IRFFT2D` is computed on, if `fft_length` (or
+`fft_length / 2 + 1` for the inner-most dimension) is smaller than the
+corresponding dimension of `input`, the dimension is cropped. If it is larger,
+the dimension is padded with zeros.
+
+input: A complex64 tensor.
+fft_length: An int32 tensor of shape [2]. The FFT length for each dimension.
+output: A float32 tensor of the same rank as `input`. The inner-most 2
+  dimensions of `input` are replaced with the `fft_length` samples of their
+  inverse 2D Fourier transform.
+
+@compatibility(numpy)
+Equivalent to np.fft.irfft2
+@end_compatibility
+)doc");
 
 REGISTER_OP("RFFT3D")
     .Input("input: float")
     .Input("fft_length: int32")
     .Output("output: complex64")
-    .SetShapeFn([](InferenceContext* c) { return RFFTShape(c, true, 3); });
+    .SetShapeFn([](InferenceContext* c) { return RFFTShape(c, true, 3); })
+    .Doc(R"doc(
+3D real-valued fast Fourier transform.
+
+Computes the 3-dimensional discrete Fourier transform of a real-valued signal
+over the inner-most 3 dimensions of `input`.
+
+Since the DFT of a real signal is Hermitian-symmetric, `RFFT3D` only returns the
+`fft_length / 2 + 1` unique components of the FFT for the inner-most dimension
+of `output`: the zero-frequency term, followed by the `fft_length / 2`
+positive-frequency terms.
+
+Along each axis `RFFT3D` is computed on, if `fft_length` is smaller than the
+corresponding dimension of `input`, the dimension is cropped. If it is larger,
+the dimension is padded with zeros.
+
+input: A float32 tensor.
+fft_length: An int32 tensor of shape [3]. The FFT length for each dimension.
+output: A complex64 tensor of the same rank as `input`. The inner-most 3
+  dimensions of `input` are replaced with the their 3D Fourier transform. The
+  inner-most dimension contains `fft_length / 2 + 1` unique frequency
+  components.
+
+@compatibility(numpy)
+Equivalent to np.fft.rfftn with 3 dimensions.
+@end_compatibility
+)doc");
 
 REGISTER_OP("IRFFT3D")
     .Input("input: complex64")
     .Input("fft_length: int32")
     .Output("output: float")
-    .SetShapeFn([](InferenceContext* c) { return RFFTShape(c, false, 3); });
+    .SetShapeFn([](InferenceContext* c) { return RFFTShape(c, false, 3); })
+    .Doc(R"doc(
+Inverse 3D real-valued fast Fourier transform.
+
+Computes the inverse 3-dimensional discrete Fourier transform of a real-valued
+signal over the inner-most 3 dimensions of `input`.
+
+The inner-most 3 dimensions of `input` are assumed to be the result of `RFFT3D`:
+The inner-most dimension contains the `fft_length / 2 + 1` unique components of
+the DFT of a real-valued signal. If `fft_length` is not provided, it is computed
+from the size of the inner-most 3 dimensions of `input`. If the FFT length used
+to compute `input` is odd, it should be provided since it cannot be inferred
+properly.
+
+Along each axis `IRFFT3D` is computed on, if `fft_length` (or
+`fft_length / 2 + 1` for the inner-most dimension) is smaller than the
+corresponding dimension of `input`, the dimension is cropped. If it is larger,
+the dimension is padded with zeros.
+
+input: A complex64 tensor.
+fft_length: An int32 tensor of shape [3]. The FFT length for each dimension.
+output: A float32 tensor of the same rank as `input`. The inner-most 3
+  dimensions of `input` are replaced with the `fft_length` samples of their
+  inverse 3D real Fourier transform.
+
+@compatibility(numpy)
+Equivalent to np.irfftn with 3 dimensions.
+@end_compatibility
+)doc");
 
 // Deprecated ops:
 REGISTER_OP("BatchFFT")
diff --git a/tensorflow/core/ops/state_ops.cc b/tensorflow/core/ops/state_ops.cc
index 7a524b60c0..5b1f5d2477 100644
--- a/tensorflow/core/ops/state_ops.cc
+++ b/tensorflow/core/ops/state_ops.cc
@@ -28,7 +28,22 @@ REGISTER_OP("VariableV2")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(shape_inference::ExplicitShape);
+    .SetShapeFn(shape_inference::ExplicitShape)
+    .Doc(R"doc(
+Holds state in the form of a tensor that persists across steps.
+
+Outputs a ref to the tensor state so it may be read or modified.
+TODO(zhifengc/mrry): Adds a pointer to a more detail document
+about sharing states in tensorflow.
+
+ref: A reference to the variable tensor.
+shape: The shape of the variable tensor.
+dtype: The type of elements in the variable tensor.
+container: If non-empty, this variable is placed in the given container.
+        Otherwise, a default container is used.
+shared_name: If non-empty, this variable is named in the given bucket
+             with this shared_name. Otherwise, the node name is used instead.
+)doc");
 
 REGISTER_OP("Variable")
     .Output("ref: Ref(dtype)")
@@ -52,14 +67,23 @@ REGISTER_OP("Variable")
       TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape(shape, &out));
       c->set_output(0, out);
       return Status::OK();
-    });
+    })
+    .Doc("Use VariableV2 instead.");
 
 REGISTER_OP("IsVariableInitialized")
     .Input("ref: Ref(dtype)")
     .Output("is_initialized: bool")
     .Attr("dtype: type")
     .SetAllowsUninitializedInput()
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Checks whether a tensor has been initialized.
+
+Outputs boolean scalar indicating whether the tensor has been initialized.
+
+ref: Should be from a `Variable` node. May be uninitialized.
+dtype: The type of elements in the variable tensor.
+)doc");
 
 REGISTER_OP("TemporaryVariable")
     .Output("ref: Ref(dtype)")
@@ -67,14 +91,53 @@ REGISTER_OP("TemporaryVariable")
     .Attr("dtype: type")
     .Attr("var_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(shape_inference::ExplicitShape);
+    .SetShapeFn(shape_inference::ExplicitShape)
+    .Doc(R"doc(
+Returns a tensor that may be mutated, but only persists within a single step.
+
+This is an experimental op for internal use only and it is possible to use this
+op in unsafe ways.  DO NOT USE unless you fully understand the risks.
+
+It is the caller's responsibility to ensure that 'ref' is eventually passed to a
+matching 'DestroyTemporaryVariable' op after all other uses have completed.
+
+Outputs a ref to the tensor state so it may be read or modified.
+
+  E.g.
+      var = state_ops._temporary_variable([1, 2], types.float_)
+      var_name = var.op.name
+      var = state_ops.assign(var, [[4.0, 5.0]])
+      var = state_ops.assign_add(var, [[6.0, 7.0]])
+      final = state_ops._destroy_temporary_variable(var, var_name=var_name)
+
+ref: A reference to the variable tensor.
+shape: The shape of the variable tensor.
+dtype: The type of elements in the variable tensor.
+var_name: Overrides the name used for the temporary variable resource. Default
+value is the name of the 'TemporaryVariable' op (which is guaranteed unique).
+)doc");
 
 REGISTER_OP("DestroyTemporaryVariable")
     .Input("ref: Ref(T)")
     .Output("value: T")
     .Attr("T: type")
     .Attr("var_name: string")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Destroys the temporary variable and returns its final value.
+
+Sets output to the value of the Tensor pointed to by 'ref', then destroys
+the temporary variable called 'var_name'.
+All other uses of 'ref' *must* have executed before this op.
+This is typically achieved by chaining the ref through each assign op, or by
+using control dependencies.
+
+Outputs the final value of the tensor pointed to by 'ref'.
+
+ref: A reference to the temporary variable tensor.
+var_name: Name of the temporary variable, usually the name of the matching
+'TemporaryVariable' op.
+)doc");
 
 REGISTER_OP("Assign")
     .Input("ref: Ref(T)")
@@ -93,7 +156,23 @@ REGISTER_OP("Assign")
 
       c->set_output(0, c->input(1));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Update 'ref' by assigning 'value' to it.
+
+This operation outputs "ref" after the assignment is done.
+This makes it easier to chain operations that need to use the reset value.
+
+ref: Should be from a `Variable` node. May be uninitialized.
+value: The value to be assigned to the variable.
+validate_shape: If true, the operation will validate that the shape
+  of 'value' matches the shape of the Tensor being assigned to.  If false,
+  'ref' will take on the shape of 'value'.
+use_locking: If True, the assignment will be protected by a lock;
+  otherwise the behavior is undefined, but may exhibit less contention.
+output_ref:= Same as "ref".  Returned as a convenience for operations that want
+  to use the new value after the variable has been reset.
+)doc");
 
 REGISTER_OP("AssignAdd")
     .Input("ref: Ref(T)")
@@ -101,7 +180,20 @@ REGISTER_OP("AssignAdd")
     .Output("output_ref: Ref(T)")
     .Attr("T: numbertype")
     .Attr("use_locking: bool = false")
-    .SetShapeFn(shape_inference::MergeBothInputsShapeFn);
+    .SetShapeFn(shape_inference::MergeBothInputsShapeFn)
+    .Doc(R"doc(
+Update 'ref' by adding 'value' to it.
+
+This operation outputs "ref" after the update is done.
+This makes it easier to chain operations that need to use the reset value.
+
+ref: Should be from a `Variable` node.
+value: The value to be added to the variable.
+use_locking: If True, the addition will be protected by a lock;
+  otherwise the behavior is undefined, but may exhibit less contention.
+output_ref:= Same as "ref".  Returned as a convenience for operations that want
+  to use the new value after the variable has been updated.
+)doc");
 
 REGISTER_OP("AssignSub")
     .Input("ref: Ref(T)")
@@ -109,7 +201,20 @@ REGISTER_OP("AssignSub")
     .Output("output_ref: Ref(T)")
     .Attr("T: numbertype")
     .Attr("use_locking: bool = false")
-    .SetShapeFn(shape_inference::MergeBothInputsShapeFn);
+    .SetShapeFn(shape_inference::MergeBothInputsShapeFn)
+    .Doc(R"doc(
+Update 'ref' by subtracting 'value' from it.
+
+This operation outputs "ref" after the update is done.
+This makes it easier to chain operations that need to use the reset value.
+
+ref: Should be from a `Variable` node.
+value: The value to be subtracted to the variable.
+use_locking: If True, the subtraction will be protected by a lock;
+  otherwise the behavior is undefined, but may exhibit less contention.
+output_ref:= Same as "ref".  Returned as a convenience for operations that want
+  to use the new value after the variable has been updated.
+)doc");
 
 namespace {
 
@@ -138,7 +243,44 @@ REGISTER_OP("ScatterUpdate")
     .Attr("T: type")
     .Attr("Tindices: {int32, int64}")
     .Attr("use_locking: bool = true")
-    .SetShapeFn(ScatterUpdateShape);
+    .SetShapeFn(ScatterUpdateShape)
+    .Doc(R"doc(
+Applies sparse updates to a variable reference.
+
+This operation computes
+
+```python
+    # Scalar indices
+    ref[indices, ...] = updates[...]
+
+    # Vector indices (for each i)
+    ref[indices[i], ...] = updates[i, ...]
+
+    # High rank indices (for each i, ..., j)
+    ref[indices[i, ..., j], ...] = updates[i, ..., j, ...]
+```
+
+This operation outputs `ref` after the update is done.
+This makes it easier to chain operations that need to use the reset value.
+
+If values in `ref` is to be updated more than once, because there are
+duplicate entries in `indices`, the order at which the updates happen
+for each value is undefined.
+
+Requires `updates.shape = indices.shape + ref.shape[1:]`.
+
+<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="https://www.tensorflow.org/images/ScatterUpdate.png" alt>
+</div>
+
+ref: Should be from a `Variable` node.
+indices: A tensor of indices into the first dimension of `ref`.
+updates: A tensor of updated values to store in `ref`.
+output_ref:= Same as `ref`.  Returned as a convenience for operations that want
+  to use the updated values after the update is done.
+use_locking: If True, the assignment will be protected by a lock;
+  otherwise the behavior is undefined, but may exhibit less contention.
+)doc");
 
 REGISTER_OP("ScatterAdd")
     .Input("ref: Ref(T)")
@@ -148,7 +290,41 @@ REGISTER_OP("ScatterAdd")
     .Attr("T: numbertype")
     .Attr("Tindices: {int32, int64}")
     .Attr("use_locking: bool = false")
-    .SetShapeFn(ScatterUpdateShape);
+    .SetShapeFn(ScatterUpdateShape)
+    .Doc(R"doc(
+Adds sparse updates to a variable reference.
+
+This operation computes
+
+    # Scalar indices
+    ref[indices, ...] += updates[...]
+
+    # Vector indices (for each i)
+    ref[indices[i], ...] += updates[i, ...]
+
+    # High rank indices (for each i, ..., j)
+    ref[indices[i, ..., j], ...] += updates[i, ..., j, ...]
+
+This operation outputs `ref` after the update is done.
+This makes it easier to chain operations that need to use the reset value.
+
+Duplicate entries are handled correctly: if multiple `indices` reference
+the same location, their contributions add.
+
+Requires `updates.shape = indices.shape + ref.shape[1:]`.
+
+<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="https://www.tensorflow.org/images/ScatterAdd.png" alt>
+</div>
+
+ref: Should be from a `Variable` node.
+indices: A tensor of indices into the first dimension of `ref`.
+updates: A tensor of updated values to add to `ref`.
+output_ref:= Same as `ref`.  Returned as a convenience for operations that want
+  to use the updated values after the update is done.
+use_locking: If True, the addition will be protected by a lock;
+  otherwise the behavior is undefined, but may exhibit less contention.
+)doc");
 
 REGISTER_OP("ScatterSub")
     .Input("ref: Ref(T)")
@@ -158,7 +334,41 @@ REGISTER_OP("ScatterSub")
     .Attr("T: numbertype")
     .Attr("Tindices: {int32, int64}")
     .Attr("use_locking: bool = false")
-    .SetShapeFn(ScatterUpdateShape);
+    .SetShapeFn(ScatterUpdateShape)
+    .Doc(R"doc(
+Subtracts sparse updates to a variable reference.
+
+```python
+    # Scalar indices
+    ref[indices, ...] -= updates[...]
+
+    # Vector indices (for each i)
+    ref[indices[i], ...] -= updates[i, ...]
+
+    # High rank indices (for each i, ..., j)
+    ref[indices[i, ..., j], ...] -= updates[i, ..., j, ...]
+```
+
+This operation outputs `ref` after the update is done.
+This makes it easier to chain operations that need to use the reset value.
+
+Duplicate entries are handled correctly: if multiple `indices` reference
+the same location, their (negated) contributions add.
+
+Requires `updates.shape = indices.shape + ref.shape[1:]`.
+
+<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="https://www.tensorflow.org/images/ScatterSub.png" alt>
+</div>
+
+ref: Should be from a `Variable` node.
+indices: A tensor of indices into the first dimension of `ref`.
+updates: A tensor of updated values to subtract from `ref`.
+output_ref:= Same as `ref`.  Returned as a convenience for operations that want
+  to use the updated values after the update is done.
+use_locking: If True, the subtraction will be protected by a lock;
+  otherwise the behavior is undefined, but may exhibit less contention.
+)doc");
 
 REGISTER_OP("ScatterMul")
     .Input("ref: Ref(T)")
@@ -168,7 +378,39 @@ REGISTER_OP("ScatterMul")
     .Attr("T: numbertype")
     .Attr("Tindices: {int32, int64}")
     .Attr("use_locking: bool = false")
-    .SetShapeFn(ScatterUpdateShape);
+    .SetShapeFn(ScatterUpdateShape)
+    .Doc(R"doc(
+Multiplies sparse updates into a variable reference.
+
+This operation computes
+
+```python
+    # Scalar indices
+    ref[indices, ...] *= updates[...]
+
+    # Vector indices (for each i)
+    ref[indices[i], ...] *= updates[i, ...]
+
+    # High rank indices (for each i, ..., j)
+    ref[indices[i, ..., j], ...] *= updates[i, ..., j, ...]
+```
+
+This operation outputs `ref` after the update is done.
+This makes it easier to chain operations that need to use the reset value.
+
+Duplicate entries are handled correctly: if multiple `indices` reference
+the same location, their contributions multiply.
+
+Requires `updates.shape = indices.shape + ref.shape[1:]`.
+
+ref: Should be from a `Variable` node.
+indices: A tensor of indices into the first dimension of `ref`.
+updates: A tensor of updated values to multiply to `ref`.
+output_ref:= Same as `ref`.  Returned as a convenience for operations that want
+  to use the updated values after the update is done.
+use_locking: If True, the operation will be protected by a lock;
+  otherwise the behavior is undefined, but may exhibit less contention.
+)doc");
 
 REGISTER_OP("ScatterDiv")
     .Input("ref: Ref(T)")
@@ -178,7 +420,39 @@ REGISTER_OP("ScatterDiv")
     .Attr("T: numbertype")
     .Attr("Tindices: {int32, int64}")
     .Attr("use_locking: bool = false")
-    .SetShapeFn(ScatterUpdateShape);
+    .SetShapeFn(ScatterUpdateShape)
+    .Doc(R"doc(
+Divides a variable reference by sparse updates.
+
+This operation computes
+
+```python
+    # Scalar indices
+    ref[indices, ...] /= updates[...]
+
+    # Vector indices (for each i)
+    ref[indices[i], ...] /= updates[i, ...]
+
+    # High rank indices (for each i, ..., j)
+    ref[indices[i, ..., j], ...] /= updates[i, ..., j, ...]
+```
+
+This operation outputs `ref` after the update is done.
+This makes it easier to chain operations that need to use the reset value.
+
+Duplicate entries are handled correctly: if multiple `indices` reference
+the same location, their contributions divide.
+
+Requires `updates.shape = indices.shape + ref.shape[1:]`.
+
+ref: Should be from a `Variable` node.
+indices: A tensor of indices into the first dimension of `ref`.
+updates: A tensor of values that `ref` is divided by.
+output_ref:= Same as `ref`.  Returned as a convenience for operations that want
+  to use the updated values after the update is done.
+use_locking: If True, the operation will be protected by a lock;
+  otherwise the behavior is undefined, but may exhibit less contention.
+)doc");
 
 REGISTER_OP("ScatterNdUpdate")
     .Input("ref: Ref(T)")
@@ -188,7 +462,56 @@ REGISTER_OP("ScatterNdUpdate")
     .Attr("T: type")
     .Attr("Tindices: {int32, int64}")
     .Attr("use_locking: bool = true")
-    .SetShapeFn(shape_inference::ScatterNdUpdateShape);
+    .SetShapeFn(shape_inference::ScatterNdUpdateShape)
+    .Doc(R"doc(
+Applies sparse `updates` to individual values or slices within a given
+variable according to `indices`.
+
+`ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
+
+`indices` must be integer tensor, containing indices into `ref`.
+It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.
+
+The innermost dimension of `indices` (with length `K`) corresponds to
+indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th
+dimension of `ref`.
+
+`updates` is `Tensor` of rank `Q-1+P-K` with shape:
+
+```
+[d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].
+```
+
+For example, say we want to update 4 scattered elements to a rank-1 tensor to
+8 elements. In Python, that update would look like this:
+
+```python
+    ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8])
+    indices = tf.constant([[4], [3], [1] ,[7]])
+    updates = tf.constant([9, 10, 11, 12])
+    update = tf.scatter_nd_update(ref, indices, updates)
+    with tf.Session() as sess:
+      print sess.run(update)
+```
+
+The resulting update to ref would look like this:
+
+    [1, 11, 3, 10, 9, 6, 7, 12]
+
+See @{tf.scatter_nd} for more details about how to make updates to
+slices.
+
+ref: A mutable Tensor. Should be from a Variable node.
+indices: A Tensor. Must be one of the following types: int32, int64.
+  A tensor of indices into ref.
+updates: A Tensor. Must have the same type as ref. A tensor of updated
+  values to add to ref.
+use_locking: An optional bool. Defaults to True. If True, the assignment will
+  be protected by a lock; otherwise the behavior is undefined,
+  but may exhibit less contention.
+output_ref: Same as ref. Returned as a convenience for operations that want to
+  use the updated values after the update is done.
+)doc");
 
 REGISTER_OP("ResourceScatterNdUpdate")
     .Input("ref: resource")
@@ -197,7 +520,54 @@ REGISTER_OP("ResourceScatterNdUpdate")
     .Attr("T: type")
     .Attr("Tindices: {int32, int64}")
     .Attr("use_locking: bool = true")
-    .SetShapeFn(shape_inference::ScatterNdUpdateShape);
+    .SetShapeFn(shape_inference::ScatterNdUpdateShape)
+    .Doc(R"doc(
+Applies sparse `updates` to individual values or slices within a given
+variable according to `indices`.
+
+`ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
+
+`indices` must be integer tensor, containing indices into `ref`.
+It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.
+
+The innermost dimension of `indices` (with length `K`) corresponds to
+indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th
+dimension of `ref`.
+
+`updates` is `Tensor` of rank `Q-1+P-K` with shape:
+
+```
+[d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].
+```
+
+For example, say we want to update 4 scattered elements to a rank-1 tensor to
+8 elements. In Python, that update would look like this:
+
+```python
+    ref = tfe.Variable([1, 2, 3, 4, 5, 6, 7, 8])
+    indices = tf.constant([[4], [3], [1] ,[7]])
+    updates = tf.constant([9, 10, 11, 12])
+    update = tf.scatter_nd_update(ref, indices, updates)
+    with tf.Session() as sess:
+      print sess.run(update)
+```
+
+The resulting update to ref would look like this:
+
+    [1, 11, 3, 10, 9, 6, 7, 12]
+
+See @{tf.scatter_nd} for more details about how to make updates to
+slices.
+
+ref: A resource handle. Must be from a VarHandleOp.
+indices: A Tensor. Must be one of the following types: int32, int64.
+  A tensor of indices into ref.
+updates: A Tensor. Must have the same type as ref. A tensor of updated
+  values to add to ref.
+use_locking: An optional bool. Defaults to True. If True, the assignment will
+  be protected by a lock; otherwise the behavior is undefined,
+  but may exhibit less contention.
+)doc");
 
 REGISTER_OP("ScatterNdAdd")
     .Input("ref: Ref(T)")
@@ -207,7 +577,54 @@ REGISTER_OP("ScatterNdAdd")
     .Attr("T: numbertype")
     .Attr("Tindices: {int32, int64}")
     .Attr("use_locking: bool = false")
-    .SetShapeFn(shape_inference::ScatterNdUpdateShape);
+    .SetShapeFn(shape_inference::ScatterNdUpdateShape)
+    .Doc(R"doc(
+Applies sparse addition between `updates` and individual values or slices
+within a given variable according to `indices`.
+
+`ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
+
+`indices` must be integer tensor, containing indices into `ref`.
+It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.
+
+The innermost dimension of `indices` (with length `K`) corresponds to
+indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th
+dimension of `ref`.
+
+`updates` is `Tensor` of rank `Q-1+P-K` with shape:
+
+```
+[d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].
+```
+
+For example, say we want to add 4 scattered elements to a rank-1 tensor to 8
+elements. In Python, that addition would look like this:
+
+    ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8])
+    indices = tf.constant([[4], [3], [1], [7]])
+    updates = tf.constant([9, 10, 11, 12])
+    add = tf.scatter_nd_add(ref, indices, updates)
+    with tf.Session() as sess:
+      print sess.run(add)
+
+The resulting update to ref would look like this:
+
+    [1, 13, 3, 14, 14, 6, 7, 20]
+
+See @{tf.scatter_nd} for more details about how to make updates to
+slices.
+
+ref: A mutable Tensor. Should be from a Variable node.
+indices: A Tensor. Must be one of the following types: int32, int64.
+  A tensor of indices into ref.
+updates: A Tensor. Must have the same type as ref. A tensor of updated values
+  to add to ref.
+use_locking: An optional bool. Defaults to True. If True, the assignment will
+  be protected by a lock; otherwise the behavior is undefined,
+  but may exhibit less contention.
+output_ref: Same as ref. Returned as a convenience for operations that want
+  to use the updated values after the update is done.
+)doc");
 
 REGISTER_OP("ScatterNdSub")
     .Input("ref: Ref(T)")
@@ -217,7 +634,54 @@ REGISTER_OP("ScatterNdSub")
     .Attr("T: numbertype")
     .Attr("Tindices: {int32, int64}")
     .Attr("use_locking: bool = false")
-    .SetShapeFn(shape_inference::ScatterNdUpdateShape);
+    .SetShapeFn(shape_inference::ScatterNdUpdateShape)
+    .Doc(R"doc(
+Applies sparse subtraction between `updates` and individual values or slices
+within a given variable according to `indices`.
+
+`ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
+
+`indices` must be integer tensor, containing indices into `ref`.
+It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.
+
+The innermost dimension of `indices` (with length `K`) corresponds to
+indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th
+dimension of `ref`.
+
+`updates` is `Tensor` of rank `Q-1+P-K` with shape:
+
+```
+[d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].
+```
+
+For example, say we want to subtract 4 scattered elements from a rank-1 tensor
+with 8 elements. In Python, that subtraction would look like this:
+
+    ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8])
+    indices = tf.constant([[4], [3], [1], [7]])
+    updates = tf.constant([9, 10, 11, 12])
+    sub = tf.scatter_nd_sub(ref, indices, updates)
+    with tf.Session() as sess:
+      print sess.run(sub)
+
+The resulting update to ref would look like this:
+
+    [1, -9, 3, -6, -4, 6, 7, -4]
+
+See @{tf.scatter_nd} for more details about how to make updates to
+slices.
+
+ref: A mutable Tensor. Should be from a Variable node.
+indices: A Tensor. Must be one of the following types: int32, int64.
+  A tensor of indices into ref.
+updates: A Tensor. Must have the same type as ref. A tensor of updated values
+  to subtract from ref.
+use_locking: An optional bool. Defaults to True. If True, the assignment will
+  be protected by a lock; otherwise the behavior is undefined,
+  but may exhibit less contention.
+output_ref: Same as ref. Returned as a convenience for operations that want
+  to use the updated values after the update is done.
+)doc");
 
 REGISTER_OP("CountUpTo")
     .Input("ref: Ref(T)")
@@ -229,7 +693,16 @@ REGISTER_OP("CountUpTo")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &output));
       c->set_output(0, output);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Increments 'ref' until it reaches 'limit'.
+
+ref: Should be from a scalar `Variable` node.
+limit: If incrementing ref would bring it above limit, instead generates an
+  'OutOfRange' error.
+output: A copy of the input before increment. If nothing else modifies the
+  input, the values produced will all be distinct.
+)doc");
 
 REGISTER_OP("ResourceCountUpTo")
     .Input("resource: resource")
@@ -253,6 +726,15 @@ REGISTER_OP("ResourceCountUpTo")
       TF_RETURN_IF_ERROR(c->WithRank(shape_and_type.shape, 0, &output));
       c->set_output(0, output);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Increments variable pointed to by 'resource' until it reaches 'limit'.
+
+resource: Should be from a scalar `Variable` node.
+limit: If incrementing ref would bring it above limit, instead generates an
+  'OutOfRange' error.
+output: A copy of the input before increment. If nothing else modifies the
+  input, the values produced will all be distinct.
+)doc");
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/stateless_random_ops.cc b/tensorflow/core/ops/stateless_random_ops.cc
index 553850610a..3e1f8781fc 100644
--- a/tensorflow/core/ops/stateless_random_ops.cc
+++ b/tensorflow/core/ops/stateless_random_ops.cc
@@ -46,13 +46,52 @@ static Status StatelessShape(shape_inference::InferenceContext* context) {
       .SetShapeFn(StatelessShape)
 
 // This op is exposed through contrib/stateless only.  The interface may change.
-REGISTER_STATELESS_OP("StatelessRandomUniform");
+REGISTER_STATELESS_OP("StatelessRandomUniform")
+    .Doc(R"doc(
+Outputs deterministic pseudorandom random values from a uniform distribution.
+
+The generated values follow a uniform distribution in the range `[0, 1)`. The
+lower bound 0 is included in the range, while the upper bound 1 is excluded.
+
+The outputs are a deterministic function of `shape` and `seed`.
+
+shape: The shape of the output tensor.
+dtype: The type of the output.
+seed: 2 seeds (shape [2]).
+output: Random values with specified shape.
+)doc");
 
 // This op is exposed through contrib/stateless only.  The interface may change.
-REGISTER_STATELESS_OP("StatelessRandomNormal");
+REGISTER_STATELESS_OP("StatelessRandomNormal")
+    .Doc(R"doc(
+Outputs deterministic pseudorandom values from a normal distribution.
+
+The generated values will have mean 0 and standard deviation 1.
+
+The outputs are a deterministic function of `shape` and `seed`.
+
+shape: The shape of the output tensor.
+dtype: The type of the output.
+seed: 2 seeds (shape [2]).
+output: Random values with specified shape.
+)doc");
 
 // This op is exposed through contrib/stateless only.  The interface may change.
-REGISTER_STATELESS_OP("StatelessTruncatedNormal");
+REGISTER_STATELESS_OP("StatelessTruncatedNormal")
+    .Doc(R"doc(
+Outputs deterministic pseudorandom values from a truncated normal distribution.
+
+The generated values follow a normal distribution with mean 0 and standard
+deviation 1, except that values whose magnitude is more than 2 standard
+deviations from the mean are dropped and re-picked.
+
+The outputs are a deterministic function of `shape` and `seed`.
+
+shape: The shape of the output tensor.
+dtype: The type of the output.
+seed: 2 seeds (shape [2]).
+output: Random values with specified shape.
+)doc");
 
 #undef REGISTER_STATELESS_OP
 
diff --git a/tensorflow/core/ops/string_ops.cc b/tensorflow/core/ops/string_ops.cc
index 8beb28de0a..aebd14c7e5 100644
--- a/tensorflow/core/ops/string_ops.cc
+++ b/tensorflow/core/ops/string_ops.cc
@@ -27,20 +27,67 @@ REGISTER_OP("StringToHashBucketFast")
     .Input("input: string")
     .Output("output: int64")
     .Attr("num_buckets: int >= 1")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Converts each string in the input Tensor to its hash mod by a number of buckets.
+
+The hash function is deterministic on the content of the string within the
+process and will never change. However, it is not suitable for cryptography.
+This function may be used when CPU time is scarce and inputs are trusted or
+unimportant. There is a risk of adversaries constructing inputs that all hash
+to the same bucket. To prevent this problem, use a strong hash function with
+`tf.string_to_hash_bucket_strong`.
+
+input: The strings to assign a hash bucket.
+num_buckets: The number of buckets.
+output: A Tensor of the same shape as the input `string_tensor`.
+)doc");
 
 REGISTER_OP("StringToHashBucketStrong")
     .Input("input: string")
     .Output("output: int64")
     .Attr("num_buckets: int >= 1")
     .Attr("key: list(int)")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Converts each string in the input Tensor to its hash mod by a number of buckets.
+
+The hash function is deterministic on the content of the string within the
+process. The hash function is a keyed hash function, where attribute `key`
+defines the key of the hash function. `key` is an array of 2 elements.
+
+A strong hash is important when inputs may be malicious, e.g. URLs with
+additional components. Adversaries could try to make their inputs hash to the
+same bucket for a denial-of-service attack or to skew the results. A strong
+hash prevents this by making it difficult, if not infeasible, to compute inputs
+that hash to the same bucket. This comes at a cost of roughly 4x higher compute
+time than `tf.string_to_hash_bucket_fast`.
+
+input: The strings to assign a hash bucket.
+num_buckets: The number of buckets.
+key: The key for the keyed hash function passed as a list of two uint64
+  elements.
+output: A Tensor of the same shape as the input `string_tensor`.
+)doc");
 
 REGISTER_OP("StringToHashBucket")
     .Input("string_tensor: string")
     .Output("output: int64")
     .Attr("num_buckets: int >= 1")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Converts each string in the input Tensor to its hash mod by a number of buckets.
+
+The hash function is deterministic on the content of the string within the
+process.
+
+Note that the hash function may change from time to time.
+This functionality will be deprecated and it's recommended to use
+`tf.string_to_hash_bucket_fast()` or `tf.string_to_hash_bucket_strong()`.
+
+num_buckets: The number of buckets.
+output: A Tensor of the same shape as the input `string_tensor`.
+)doc");
 
 REGISTER_OP("ReduceJoin")
     .Input("inputs: string")
@@ -48,7 +95,41 @@ REGISTER_OP("ReduceJoin")
     .Attr("keep_dims: bool = false")
     .Attr("separator: string = ''")
     .Output("output: string")
-    .SetShapeFn(shape_inference::ReductionShape);
+    .SetShapeFn(shape_inference::ReductionShape)
+    .Doc(R"doc(
+Joins a string Tensor across the given dimensions.
+
+Computes the string join across dimensions in the given string Tensor of shape
+`[d_0, d_1, ..., d_n-1]`.  Returns a new Tensor created by joining the input
+strings with the given separator (default: empty string).  Negative indices are
+counted backwards from the end, with `-1` being equivalent to `n - 1`.
+
+For example:
+
+```python
+# tensor `a` is [["a", "b"], ["c", "d"]]
+tf.reduce_join(a, 0) ==> ["ac", "bd"]
+tf.reduce_join(a, 1) ==> ["ab", "cd"]
+tf.reduce_join(a, -2) = tf.reduce_join(a, 0) ==> ["ac", "bd"]
+tf.reduce_join(a, -1) = tf.reduce_join(a, 1) ==> ["ab", "cd"]
+tf.reduce_join(a, 0, keep_dims=True) ==> [["ac", "bd"]]
+tf.reduce_join(a, 1, keep_dims=True) ==> [["ab"], ["cd"]]
+tf.reduce_join(a, 0, separator=".") ==> ["a.c", "b.d"]
+tf.reduce_join(a, [0, 1]) ==> ["acbd"]
+tf.reduce_join(a, [1, 0]) ==> ["abcd"]
+tf.reduce_join(a, []) ==> ["abcd"]
+```
+
+inputs: The input to be joined.  All reduced indices must have non-zero size.
+reduction_indices: The dimensions to reduce over.  Dimensions are reduced in the
+  order specified.  Omitting `reduction_indices` is equivalent to passing
+  `[n-1, n-2, ..., 0]`.  Negative indices from `-n` to `-1` are supported.
+keep_dims: If `True`, retain reduced dimensions with length `1`.
+separator: The separator to use when joining.
+
+output: Has shape equal to that of the input with reduced dimensions removed or
+  set to `1` depending on `keep_dims`.
+)doc");
 
 REGISTER_OP("AsString")
     .Input("input: T")
@@ -59,7 +140,22 @@ REGISTER_OP("AsString")
     .Attr("shortest: bool = false")
     .Attr("width: int = -1")
     .Attr("fill: string = ''")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Converts each entry in the given tensor to strings.  Supports many numeric
+types and boolean.
+
+precision: The post-decimal precision to use for floating point numbers.
+  Only used if precision > -1.
+scientific: Use scientific notation for floating point numbers.
+shortest: Use shortest representation (either scientific or standard) for
+  floating point numbers.
+width: Pad pre-decimal numbers to this width.
+  Applies to both floating point and integer numbers.
+  Only used if width > -1.
+fill: The value to pad if width > -1.  If empty, pads with spaces.
+  Another typical value is '0'.  String cannot be longer than 1 character.
+)doc");
 
 REGISTER_OP("StringJoin")
     .Input("inputs: N * string")
@@ -89,7 +185,16 @@ REGISTER_OP("StringJoin")
       }
       c->set_output(0, out);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Joins the strings in the given list of string tensors into one tensor;
+with the given separator (default is an empty separator).
+
+inputs: A list of string tensors.  The tensors must all have the same shape,
+  or be scalars.  Scalars may be mixed in; these will be broadcast to the shape
+  of non-scalar inputs.
+separator: string, an optional join separator.
+)doc");
 
 REGISTER_OP("StringSplit")
     .Input("input: string")
@@ -107,18 +212,74 @@ REGISTER_OP("StringSplit")
       c->set_output(1, c->Vector(InferenceContext::kUnknownDim));
       c->set_output(2, c->Vector(2));
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Split elements of `input` based on `delimiter` into a `SparseTensor`.
+
+Let N be the size of source (typically N will be the batch size). Split each
+element of `input` based on `delimiter` and return a `SparseTensor`
+containing the splitted tokens. Empty tokens are ignored.
+
+`delimiter` can be empty, or a string of split characters. If `delimiter` is an
+ empty string, each element of `input` is split into individual single-byte
+ character strings, including splitting of UTF-8 multibyte sequences. Otherwise
+ every character of `delimiter` is a potential split point.
+
+For example:
+  N = 2, input[0] is 'hello world' and input[1] is 'a b c', then the output
+  will be
+
+  indices = [0, 0;
+             0, 1;
+             1, 0;
+             1, 1;
+             1, 2]
+  shape = [2, 3]
+  values = ['hello', 'world', 'a', 'b', 'c']
+
+input: 1-D. Strings to split.
+delimiter: 0-D. Delimiter characters (bytes), or empty string.
+skip_empty: A `bool`. If `True`, skip the empty strings from the result.
+indices: A dense matrix of int64 representing the indices of the sparse tensor.
+values: A vector of strings corresponding to the splited values.
+shape: a length-2 vector of int64 representing the shape of the sparse
+  tensor, where the first value is N and the second value is the maximum number
+  of tokens in a single input entry.
+)doc");
 
 REGISTER_OP("EncodeBase64")
     .Input("input: string")
     .Output("output: string")
     .Attr("pad: bool = false")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Encode strings into web-safe base64 format.
+
+Refer to the following article for more information on base64 format:
+en.wikipedia.org/wiki/Base64. Base64 strings may have padding with '=' at the
+end so that the encoded has length multiple of 4. See Padding section of the
+link above.
+
+Web-safe means that the encoder uses - and _ instead of + and /.
+
+input: Strings to be encoded.
+output: Input strings encoded in base64.
+pad: Bool whether padding is applied at the ends.
+)doc");
 
 REGISTER_OP("DecodeBase64")
     .Input("input: string")
     .Output("output: string")
-    .SetShapeFn(shape_inference::UnchangedShape);
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Decode web-safe base64-encoded strings.
+
+Input may or may not have padding at the end. See EncodeBase64 for padding.
+Web-safe means that input must use - and _ instead of + and /.
+
+input: Base64 strings to decode.
+output: Decoded strings.
+)doc");
 
 REGISTER_OP("Substr")
     .Input("input: string")
@@ -145,6 +306,88 @@ REGISTER_OP("Substr")
       // c->input(0) is the ShapeHandle to input strings
       // BroadcastBinaryOpShapeFn infers shape from c->input(0) and c->input(1).
       return shape_inference::BroadcastBinaryOpShapeFn(c);
-    });
+    })
+    .Doc(R"doc(
+Return substrings from `Tensor` of strings.
+
+For each string in the input `Tensor`, creates a substring starting at index
+`pos` with a total length of `len`.
+
+If `len` defines a substring that would extend beyond the length of the input
+string, then as many characters as possible are used.
+
+If `pos` is negative or specifies a character index larger than any of the input
+strings, then an `InvalidArgumentError` is thrown.
+
+`pos` and `len` must have the same shape, otherwise a `ValueError` is thrown on
+Op creation.
+
+*NOTE*: `Substr` supports broadcasting up to two dimensions. More about
+broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+
+---
+
+Examples
+
+Using scalar `pos` and `len`:
+
+```python
+input = [b'Hello', b'World']
+position = 1
+length = 3
+
+output = [b'ell', b'orl']
+```
+
+Using `pos` and `len` with same shape as `input`:
+
+```python
+input = [[b'ten', b'eleven', b'twelve'],
+         [b'thirteen', b'fourteen', b'fifteen'],
+         [b'sixteen', b'seventeen', b'eighteen']]
+position = [[1, 2, 3],
+            [1, 2, 3],
+            [1, 2, 3]]
+length =   [[2, 3, 4],
+            [4, 3, 2],
+            [5, 5, 5]]
+
+output = [[b'en', b'eve', b'lve'],
+          [b'hirt', b'urt', b'te'],
+          [b'ixtee', b'vente', b'hteen']]
+```
+
+Broadcasting `pos` and `len` onto `input`:
+
+```
+input = [[b'ten', b'eleven', b'twelve'],
+         [b'thirteen', b'fourteen', b'fifteen'],
+         [b'sixteen', b'seventeen', b'eighteen'],
+         [b'nineteen', b'twenty', b'twentyone']]
+position = [1, 2, 3]
+length =   [1, 2, 3]
+
+output = [[b'e', b'ev', b'lve'],
+          [b'h', b'ur', b'tee'],
+          [b'i', b've', b'hte'],
+          [b'i', b'en', b'nty']]
+```
+
+Broadcasting `input` onto `pos` and `len`:
+
+```
+input = b'thirteen'
+position = [1, 5, 7]
+length =   [3, 2, 1]
+
+output = [b'hir', b'ee', b'n']
+```
+
+input: Tensor of strings
+pos: Scalar defining the position of first character in each substring
+len: Scalar defining the number of characters to include in each substring
+output: Tensor of substrings
+)doc");
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/training_ops.cc b/tensorflow/core/ops/training_ops.cc
index e8d03877c9..405318caf2 100644
--- a/tensorflow/core/ops/training_ops.cc
+++ b/tensorflow/core/ops/training_ops.cc
@@ -116,7 +116,17 @@ REGISTER_OP("ApplyGradientDescent")
     .Output("out: Ref(T)")
     .Attr("T: numbertype")
     .Attr("use_locking: bool = false")
-    .SetShapeFn(ApplyGradientDescentShapeFn);
+    .SetShapeFn(ApplyGradientDescentShapeFn)
+    .Doc(R"doc(
+Update '*var' by subtracting 'alpha' * 'delta' from it.
+
+var: Should be from a Variable().
+alpha: Scaling factor. Must be a scalar.
+delta: The change.
+out: Same as "var".
+use_locking: If `True`, the subtraction will be protected by a lock;
+  otherwise the behavior is undefined, but may exhibit less contention.
+)doc");
 
 REGISTER_OP("ResourceApplyGradientDescent")
     .Input("var: resource")
@@ -124,7 +134,16 @@ REGISTER_OP("ResourceApplyGradientDescent")
     .Input("delta: T")
     .Attr("T: numbertype")
     .Attr("use_locking: bool = false")
-    .SetShapeFn(ApplyGradientDescentShapeFn);
+    .SetShapeFn(ApplyGradientDescentShapeFn)
+    .Doc(R"doc(
+Update '*var' by subtracting 'alpha' * 'delta' from it.
+
+var: Should be from a Variable().
+alpha: Scaling factor. Must be a scalar.
+delta: The change.
+use_locking: If `True`, the subtraction will be protected by a lock;
+  otherwise the behavior is undefined, but may exhibit less contention.
+)doc");
 
 static Status ApplyProximalGradientDescentShapeFn(InferenceContext* c,
                                                   bool sparse) {
@@ -152,7 +171,21 @@ REGISTER_OP("ApplyProximalGradientDescent")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyProximalGradientDescentShapeFn(c, false /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Update '*var' as FOBOS algorithm with fixed learning rate.
+prox_v = var - alpha * delta
+var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
+
+var: Should be from a Variable().
+alpha: Scaling factor. Must be a scalar.
+l1: L1 regularization. Must be a scalar.
+l2: L2 regularization. Must be a scalar.
+delta: The change.
+out: Same as "var".
+use_locking: If True, the subtraction will be protected by a lock;
+  otherwise the behavior is undefined, but may exhibit less contention.
+)doc");
 
 REGISTER_OP("SparseApplyProximalGradientDescent")
     .Input("var: Ref(T)")
@@ -167,7 +200,24 @@ REGISTER_OP("SparseApplyProximalGradientDescent")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyProximalGradientDescentShapeFn(c, true /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Sparse update '*var' as FOBOS algorithm with fixed learning rate.
+
+That is for rows we have grad for, we update var as follows:
+prox_v = var - alpha * grad
+var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
+
+var: Should be from a Variable().
+alpha: Scaling factor. Must be a scalar.
+l1: L1 regularization. Must be a scalar.
+l2: L2 regularization. Must be a scalar.
+grad: The gradient.
+indices: A vector of indices into the first dimension of var and accum.
+out: Same as "var".
+use_locking: If True, the subtraction will be protected by a lock;
+  otherwise the behavior is undefined, but may exhibit less contention.
+)doc");
 
 REGISTER_OP("ResourceApplyProximalGradientDescent")
     .Input("var: resource")
@@ -179,7 +229,20 @@ REGISTER_OP("ResourceApplyProximalGradientDescent")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyProximalGradientDescentShapeFn(c, false /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Update '*var' as FOBOS algorithm with fixed learning rate.
+prox_v = var - alpha * delta
+var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
+
+var: Should be from a Variable().
+alpha: Scaling factor. Must be a scalar.
+l1: L1 regularization. Must be a scalar.
+l2: L2 regularization. Must be a scalar.
+delta: The change.
+use_locking: If True, the subtraction will be protected by a lock;
+  otherwise the behavior is undefined, but may exhibit less contention.
+)doc");
 
 REGISTER_OP("ResourceSparseApplyProximalGradientDescent")
     .Input("var: resource")
@@ -193,7 +256,23 @@ REGISTER_OP("ResourceSparseApplyProximalGradientDescent")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyProximalGradientDescentShapeFn(c, true /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Sparse update '*var' as FOBOS algorithm with fixed learning rate.
+
+That is for rows we have grad for, we update var as follows:
+prox_v = var - alpha * grad
+var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
+
+var: Should be from a Variable().
+alpha: Scaling factor. Must be a scalar.
+l1: L1 regularization. Must be a scalar.
+l2: L2 regularization. Must be a scalar.
+grad: The gradient.
+indices: A vector of indices into the first dimension of var and accum.
+use_locking: If True, the subtraction will be protected by a lock;
+  otherwise the behavior is undefined, but may exhibit less contention.
+)doc");
 
 static Status ApplyAdadeltaShapeFn(InferenceContext* c, bool sparse) {
   ShapeHandle unused;
@@ -225,7 +304,26 @@ REGISTER_OP("ApplyAdadelta")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyAdadeltaShapeFn(c, false /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Update '*var' according to the adadelta scheme.
+
+accum = rho() * accum + (1 - rho()) * grad.square();
+update = (update_accum + epsilon).sqrt() * (accum + epsilon()).rsqrt() * grad;
+update_accum = rho() * update_accum + (1 - rho()) * update.square();
+var -= update;
+
+var: Should be from a Variable().
+accum: Should be from a Variable().
+accum_update: Should be from a Variable().
+lr: Scaling factor. Must be a scalar.
+rho: Decay factor. Must be a scalar.
+epsilon: Constant factor. Must be a scalar.
+grad: The gradient.
+out: Same as "var".
+use_locking: If True, updating of the var, accum and update_accum tensors will be protected by
+a lock; otherwise the behavior is undefined, but may exhibit less contention.
+)doc");
 
 REGISTER_OP("SparseApplyAdadelta")
     .Input("var: Ref(T)")
@@ -242,7 +340,20 @@ REGISTER_OP("SparseApplyAdadelta")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyAdadeltaShapeFn(c, true /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+var: Should be from a Variable().
+accum: Should be from a Variable().
+accum_update:: Should be from a Variable().
+lr: Learning rate. Must be a scalar.
+rho: Decay factor. Must be a scalar.
+epsilon: Constant factor. Must be a scalar.
+grad: The gradient.
+indices: A vector of indices into the first dimension of var and accum.
+out: Same as "var".
+use_locking: If True, updating of the var and accum tensors will be protected by
+a lock; otherwise the behavior is undefined, but may exhibit less contention.
+)doc");
 
 REGISTER_OP("ResourceApplyAdadelta")
     .Input("var: resource")
@@ -256,7 +367,25 @@ REGISTER_OP("ResourceApplyAdadelta")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyAdadeltaShapeFn(c, false /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Update '*var' according to the adadelta scheme.
+
+accum = rho() * accum + (1 - rho()) * grad.square();
+update = (update_accum + epsilon).sqrt() * (accum + epsilon()).rsqrt() * grad;
+update_accum = rho() * update_accum + (1 - rho()) * update.square();
+var -= update;
+
+var: Should be from a Variable().
+accum: Should be from a Variable().
+accum_update: Should be from a Variable().
+lr: Scaling factor. Must be a scalar.
+rho: Decay factor. Must be a scalar.
+epsilon: Constant factor. Must be a scalar.
+grad: The gradient.
+use_locking: If True, updating of the var, accum and update_accum tensors will be protected by
+a lock; otherwise the behavior is undefined, but may exhibit less contention.
+)doc");
 
 REGISTER_OP("ResourceSparseApplyAdadelta")
     .Input("var: resource")
@@ -272,7 +401,19 @@ REGISTER_OP("ResourceSparseApplyAdadelta")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyAdadeltaShapeFn(c, true /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+var: Should be from a Variable().
+accum: Should be from a Variable().
+accum_update:: Should be from a Variable().
+lr: Learning rate. Must be a scalar.
+rho: Decay factor. Must be a scalar.
+epsilon: Constant factor. Must be a scalar.
+grad: The gradient.
+indices: A vector of indices into the first dimension of var and accum.
+use_locking: If True, updating of the var and accum tensors will be protected by
+a lock; otherwise the behavior is undefined, but may exhibit less contention.
+)doc");
 
 static Status ApplyAdagradShapeFn(InferenceContext* c, bool sparse) {
   ShapeHandle unused;
@@ -297,7 +438,22 @@ REGISTER_OP("ApplyAdagrad")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyAdagradShapeFn(c, false /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Update '*var' according to the adagrad scheme.
+
+accum += grad * grad
+var -= lr * grad * (1 / sqrt(accum))
+
+var: Should be from a Variable().
+accum: Should be from a Variable().
+lr: Scaling factor. Must be a scalar.
+grad: The gradient.
+out: Same as "var".
+use_locking: If `True`, updating of the var and accum tensors will be protected
+  by a lock; otherwise the behavior is undefined, but may exhibit less
+  contention.
+)doc");
 
 REGISTER_OP("ResourceApplyAdagrad")
     .Input("var: resource")
@@ -308,7 +464,21 @@ REGISTER_OP("ResourceApplyAdagrad")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyAdagradShapeFn(c, false /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Update '*var' according to the adagrad scheme.
+
+accum += grad * grad
+var -= lr * grad * (1 / sqrt(accum))
+
+var: Should be from a Variable().
+accum: Should be from a Variable().
+lr: Scaling factor. Must be a scalar.
+grad: The gradient.
+use_locking: If `True`, updating of the var and accum tensors will be protected
+  by a lock; otherwise the behavior is undefined, but may exhibit less
+  contention.
+)doc");
 
 static Status ApplyProximalAdagradShapeFn(InferenceContext* c, bool sparse) {
   ShapeHandle unused;
@@ -337,7 +507,23 @@ REGISTER_OP("ApplyProximalAdagrad")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyProximalAdagradShapeFn(c, false /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Update '*var' and '*accum' according to FOBOS with Adagrad learning rate.
+accum += grad * grad
+prox_v = var - lr * grad * (1 / sqrt(accum))
+var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}
+
+var: Should be from a Variable().
+accum: Should be from a Variable().
+grad: The gradient.
+lr: Scaling factor. Must be a scalar.
+l1: L1 regularization. Must be a scalar.
+l2: L2 regularization. Must be a scalar.
+out: Same as "var".
+use_locking: If True, updating of the var and accum tensors will be protected by
+a lock; otherwise the behavior is undefined, but may exhibit less contention.
+)doc");
 
 REGISTER_OP("ResourceApplyProximalAdagrad")
     .Input("var: resource")
@@ -350,7 +536,22 @@ REGISTER_OP("ResourceApplyProximalAdagrad")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyProximalAdagradShapeFn(c, false /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Update '*var' and '*accum' according to FOBOS with Adagrad learning rate.
+accum += grad * grad
+prox_v = var - lr * grad * (1 / sqrt(accum))
+var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}
+
+var: Should be from a Variable().
+accum: Should be from a Variable().
+grad: The gradient.
+lr: Scaling factor. Must be a scalar.
+l1: L1 regularization. Must be a scalar.
+l2: L2 regularization. Must be a scalar.
+use_locking: If True, updating of the var and accum tensors will be protected by
+a lock; otherwise the behavior is undefined, but may exhibit less contention.
+)doc");
 
 REGISTER_OP("SparseApplyAdagrad")
     .Input("var: Ref(T)")
@@ -364,7 +565,24 @@ REGISTER_OP("SparseApplyAdagrad")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyAdagradShapeFn(c, true /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Update relevant entries in '*var' and '*accum' according to the adagrad scheme.
+
+That is for rows we have grad for, we update var and accum as follows:
+accum += grad * grad
+var -= lr * grad * (1 / sqrt(accum))
+
+var: Should be from a Variable().
+accum: Should be from a Variable().
+lr: Learning rate. Must be a scalar.
+grad: The gradient.
+indices: A vector of indices into the first dimension of var and accum.
+out: Same as "var".
+use_locking: If `True`, updating of the var and accum tensors will be protected
+  by a lock; otherwise the behavior is undefined, but may exhibit less
+  contention.
+)doc");
 
 REGISTER_OP("ResourceSparseApplyAdagrad")
     .Input("var: resource")
@@ -377,7 +595,23 @@ REGISTER_OP("ResourceSparseApplyAdagrad")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyAdagradShapeFn(c, true /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Update relevant entries in '*var' and '*accum' according to the adagrad scheme.
+
+That is for rows we have grad for, we update var and accum as follows:
+accum += grad * grad
+var -= lr * grad * (1 / sqrt(accum))
+
+var: Should be from a Variable().
+accum: Should be from a Variable().
+lr: Learning rate. Must be a scalar.
+grad: The gradient.
+indices: A vector of indices into the first dimension of var and accum.
+use_locking: If `True`, updating of the var and accum tensors will be protected
+  by a lock; otherwise the behavior is undefined, but may exhibit less
+  contention.
+)doc");
 
 static Status ApplyAdagradDAShapeFn(InferenceContext* c, bool sparse) {
   ShapeHandle unused;
@@ -413,7 +647,22 @@ REGISTER_OP("ApplyAdagradDA")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyAdagradDAShapeFn(c, false /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Update '*var' according to the proximal adagrad scheme.
+
+var: Should be from a Variable().
+gradient_accumulator: Should be from a Variable().
+gradient_squared_accumulator: Should be from a Variable().
+grad: The gradient.
+lr: Scaling factor. Must be a scalar.
+l1: L1 regularization. Must be a scalar.
+l2: L2 regularization. Must be a scalar.
+global_step: Training step number. Must be a scalar.
+out: Same as "var".
+use_locking: If True, updating of the var and accum tensors will be protected by
+a lock; otherwise the behavior is undefined, but may exhibit less contention.
+)doc");
 
 REGISTER_OP("SparseApplyAdagradDA")
     .Input("var: Ref(T)")
@@ -431,7 +680,23 @@ REGISTER_OP("SparseApplyAdagradDA")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyAdagradDAShapeFn(c, true /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Update entries in '*var' and '*accum' according to the proximal adagrad scheme.
+
+var: Should be from a Variable().
+gradient_accumulator: Should be from a Variable().
+gradient_squared_accumulator: Should be from a Variable().
+grad: The gradient.
+indices: A vector of indices into the first dimension of var and accum.
+lr: Learning rate. Must be a scalar.
+l1: L1 regularization. Must be a scalar.
+l2: L2 regularization. Must be a scalar.
+global_step: Training step number. Must be a scalar.
+out: Same as "var".
+use_locking: If True, updating of the var and accum tensors will be protected by
+a lock; otherwise the behavior is undefined, but may exhibit less contention.
+)doc");
 
 REGISTER_OP("SparseApplyProximalAdagrad")
     .Input("var: Ref(T)")
@@ -447,7 +712,27 @@ REGISTER_OP("SparseApplyProximalAdagrad")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyProximalAdagradShapeFn(c, true /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Sparse update entries in '*var' and '*accum' according to FOBOS algorithm.
+
+That is for rows we have grad for, we update var and accum as follows:
+accum += grad * grad
+prox_v = var
+prox_v -= lr * grad * (1 / sqrt(accum))
+var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}
+
+var: Should be from a Variable().
+accum: Should be from a Variable().
+lr: Learning rate. Must be a scalar.
+l1: L1 regularization. Must be a scalar.
+l2: L2 regularization. Must be a scalar.
+grad: The gradient.
+indices: A vector of indices into the first dimension of var and accum.
+out: Same as "var".
+use_locking: If True, updating of the var and accum tensors will be protected by
+a lock; otherwise the behavior is undefined, but may exhibit less contention.
+)doc");
 
 REGISTER_OP("ResourceApplyAdagradDA")
     .Input("var: resource")
@@ -462,7 +747,21 @@ REGISTER_OP("ResourceApplyAdagradDA")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyAdagradDAShapeFn(c, false /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Update '*var' according to the proximal adagrad scheme.
+
+var: Should be from a Variable().
+gradient_accumulator: Should be from a Variable().
+gradient_squared_accumulator: Should be from a Variable().
+grad: The gradient.
+lr: Scaling factor. Must be a scalar.
+l1: L1 regularization. Must be a scalar.
+l2: L2 regularization. Must be a scalar.
+global_step: Training step number. Must be a scalar.
+use_locking: If True, updating of the var and accum tensors will be protected by
+a lock; otherwise the behavior is undefined, but may exhibit less contention.
+)doc");
 
 REGISTER_OP("ResourceSparseApplyAdagradDA")
     .Input("var: resource")
@@ -479,7 +778,22 @@ REGISTER_OP("ResourceSparseApplyAdagradDA")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyAdagradDAShapeFn(c, true /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Update entries in '*var' and '*accum' according to the proximal adagrad scheme.
+
+var: Should be from a Variable().
+gradient_accumulator: Should be from a Variable().
+gradient_squared_accumulator: Should be from a Variable().
+grad: The gradient.
+indices: A vector of indices into the first dimension of var and accum.
+lr: Learning rate. Must be a scalar.
+l1: L1 regularization. Must be a scalar.
+l2: L2 regularization. Must be a scalar.
+global_step: Training step number. Must be a scalar.
+use_locking: If True, updating of the var and accum tensors will be protected by
+a lock; otherwise the behavior is undefined, but may exhibit less contention.
+)doc");
 
 REGISTER_OP("ResourceSparseApplyProximalAdagrad")
     .Input("var: resource")
@@ -494,7 +808,26 @@ REGISTER_OP("ResourceSparseApplyProximalAdagrad")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyProximalAdagradShapeFn(c, true /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Sparse update entries in '*var' and '*accum' according to FOBOS algorithm.
+
+That is for rows we have grad for, we update var and accum as follows:
+accum += grad * grad
+prox_v = var
+prox_v -= lr * grad * (1 / sqrt(accum))
+var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}
+
+var: Should be from a Variable().
+accum: Should be from a Variable().
+lr: Learning rate. Must be a scalar.
+l1: L1 regularization. Must be a scalar.
+l2: L2 regularization. Must be a scalar.
+grad: The gradient.
+indices: A vector of indices into the first dimension of var and accum.
+use_locking: If True, updating of the var and accum tensors will be protected by
+a lock; otherwise the behavior is undefined, but may exhibit less contention.
+)doc");
 
 static Status ApplyFtrlShapeFn(InferenceContext* c, bool sparse) {
   ShapeHandle unused;
@@ -528,7 +861,29 @@ REGISTER_OP("ApplyFtrl")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyFtrlShapeFn(c, false /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Update '*var' according to the Ftrl-proximal scheme.
+
+accum_new = accum + grad * grad
+linear += grad + (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
+quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
+var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
+accum = accum_new
+
+var: Should be from a Variable().
+accum: Should be from a Variable().
+linear: Should be from a Variable().
+grad: The gradient.
+lr: Scaling factor. Must be a scalar.
+l1: L1 regulariation. Must be a scalar.
+l2: L2 regulariation. Must be a scalar.
+lr_power: Scaling factor. Must be a scalar.
+out: Same as "var".
+use_locking: If `True`, updating of the var and accum tensors will be protected
+  by a lock; otherwise the behavior is undefined, but may exhibit less
+  contention.
+)doc");
 
 REGISTER_OP("SparseApplyFtrl")
     .Input("var: Ref(T)")
@@ -546,7 +901,31 @@ REGISTER_OP("SparseApplyFtrl")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyFtrlShapeFn(c, true /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Update relevant entries in '*var' according to the Ftrl-proximal scheme.
+
+That is for rows we have grad for, we update var, accum and linear as follows:
+accum_new = accum + grad * grad
+linear += grad + (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
+quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
+var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
+accum = accum_new
+
+var: Should be from a Variable().
+accum: Should be from a Variable().
+linear: Should be from a Variable().
+grad: The gradient.
+indices: A vector of indices into the first dimension of var and accum.
+lr: Scaling factor. Must be a scalar.
+l1: L1 regularization. Must be a scalar.
+l2: L2 regularization. Must be a scalar.
+lr_power: Scaling factor. Must be a scalar.
+out: Same as "var".
+use_locking: If `True`, updating of the var and accum tensors will be protected
+  by a lock; otherwise the behavior is undefined, but may exhibit less
+  contention.
+)doc");
 
 REGISTER_OP("ResourceApplyFtrl")
     .Input("var: resource")
@@ -561,7 +940,28 @@ REGISTER_OP("ResourceApplyFtrl")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyFtrlShapeFn(c, false /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Update '*var' according to the Ftrl-proximal scheme.
+
+accum_new = accum + grad * grad
+linear += grad - (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
+quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
+var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
+accum = accum_new
+
+var: Should be from a Variable().
+accum: Should be from a Variable().
+linear: Should be from a Variable().
+grad: The gradient.
+lr: Scaling factor. Must be a scalar.
+l1: L1 regulariation. Must be a scalar.
+l2: L2 regulariation. Must be a scalar.
+lr_power: Scaling factor. Must be a scalar.
+use_locking: If `True`, updating of the var and accum tensors will be protected
+  by a lock; otherwise the behavior is undefined, but may exhibit less
+  contention.
+)doc");
 
 REGISTER_OP("ResourceSparseApplyFtrl")
     .Input("var: resource")
@@ -578,7 +978,30 @@ REGISTER_OP("ResourceSparseApplyFtrl")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyFtrlShapeFn(c, true /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Update relevant entries in '*var' according to the Ftrl-proximal scheme.
+
+That is for rows we have grad for, we update var, accum and linear as follows:
+accum_new = accum + grad * grad
+linear += grad + (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
+quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
+var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
+accum = accum_new
+
+var: Should be from a Variable().
+accum: Should be from a Variable().
+linear: Should be from a Variable().
+grad: The gradient.
+indices: A vector of indices into the first dimension of var and accum.
+lr: Scaling factor. Must be a scalar.
+l1: L1 regularization. Must be a scalar.
+l2: L2 regularization. Must be a scalar.
+lr_power: Scaling factor. Must be a scalar.
+use_locking: If `True`, updating of the var and accum tensors will be protected
+  by a lock; otherwise the behavior is undefined, but may exhibit less
+  contention.
+)doc");
 
 REGISTER_OP("ApplyFtrlV2")
     .Input("var: Ref(T)")
@@ -595,7 +1018,32 @@ REGISTER_OP("ApplyFtrlV2")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyFtrlShapeFn(c, false /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Update '*var' according to the Ftrl-proximal scheme.
+
+grad_with_shrinkage = grad + 2 * l2_shrinkage * var
+accum_new = accum + grad_with_shrinkage * grad_with_shrinkage
+linear += grad_with_shrinkage +
+    (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
+quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
+var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
+accum = accum_new
+
+var: Should be from a Variable().
+accum: Should be from a Variable().
+linear: Should be from a Variable().
+grad: The gradient.
+lr: Scaling factor. Must be a scalar.
+l1: L1 regulariation. Must be a scalar.
+l2: online L2 regulariation. Must be a scalar.
+l2: L2 shrinkage regulariation. Must be a scalar.
+lr_power: Scaling factor. Must be a scalar.
+out: Same as "var".
+use_locking: If `True`, updating of the var and accum tensors will be protected
+  by a lock; otherwise the behavior is undefined, but may exhibit less
+  contention.
+)doc");
 
 REGISTER_OP("SparseApplyFtrlV2")
     .Input("var: Ref(T)")
@@ -614,7 +1062,34 @@ REGISTER_OP("SparseApplyFtrlV2")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyFtrlShapeFn(c, true /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Update relevant entries in '*var' according to the Ftrl-proximal scheme.
+
+That is for rows we have grad for, we update var, accum and linear as follows:
+grad_with_shrinkage = grad + 2 * l2_shrinkage * var
+accum_new = accum + grad_with_shrinkage * grad_with_shrinkage
+linear += grad_with_shrinkage +
+    (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
+quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
+var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
+accum = accum_new
+
+var: Should be from a Variable().
+accum: Should be from a Variable().
+linear: Should be from a Variable().
+grad: The gradient.
+indices: A vector of indices into the first dimension of var and accum.
+lr: Scaling factor. Must be a scalar.
+l1: L1 regularization. Must be a scalar.
+l2: onine L2 regularization. Must be a scalar.
+l2: L2 shrinkage regulariation. Must be a scalar.
+lr_power: Scaling factor. Must be a scalar.
+out: Same as "var".
+use_locking: If `True`, updating of the var and accum tensors will be protected
+  by a lock; otherwise the behavior is undefined, but may exhibit less
+  contention.
+)doc");
 
 REGISTER_OP("ResourceApplyFtrlV2")
     .Input("var: resource")
@@ -630,7 +1105,31 @@ REGISTER_OP("ResourceApplyFtrlV2")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyFtrlShapeFn(c, false /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Update '*var' according to the Ftrl-proximal scheme.
+
+grad_with_shrinkage = grad + 2 * l2_shrinkage * var
+accum_new = accum + grad_with_shrinkage * grad_with_shrinkage
+linear += grad_with_shrinkage +
+    (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
+quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
+var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
+accum = accum_new
+
+var: Should be from a Variable().
+accum: Should be from a Variable().
+linear: Should be from a Variable().
+grad: The gradient.
+lr: Scaling factor. Must be a scalar.
+l1: L1 regulariation. Must be a scalar.
+l2: onine L2 regularization. Must be a scalar.
+l2: L2 shrinkage regulariation. Must be a scalar.
+lr_power: Scaling factor. Must be a scalar.
+use_locking: If `True`, updating of the var and accum tensors will be protected
+  by a lock; otherwise the behavior is undefined, but may exhibit less
+  contention.
+)doc");
 
 REGISTER_OP("ResourceSparseApplyFtrlV2")
     .Input("var: resource")
@@ -648,7 +1147,33 @@ REGISTER_OP("ResourceSparseApplyFtrlV2")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyFtrlShapeFn(c, true /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Update relevant entries in '*var' according to the Ftrl-proximal scheme.
+
+That is for rows we have grad for, we update var, accum and linear as follows:
+grad_with_shrinkage = grad + 2 * l2_shrinkage * var
+accum_new = accum + grad_with_shrinkage * grad_with_shrinkage
+linear += grad_with_shrinkage +
+    (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
+quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
+var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
+accum = accum_new
+
+var: Should be from a Variable().
+accum: Should be from a Variable().
+linear: Should be from a Variable().
+grad: The gradient.
+indices: A vector of indices into the first dimension of var and accum.
+lr: Scaling factor. Must be a scalar.
+l1: L1 regularization. Must be a scalar.
+l2: onine L2 regularization. Must be a scalar.
+l2: L2 shrinkage regulariation. Must be a scalar.
+lr_power: Scaling factor. Must be a scalar.
+use_locking: If `True`, updating of the var and accum tensors will be protected
+  by a lock; otherwise the behavior is undefined, but may exhibit less
+  contention.
+)doc");
 
 static Status ApplyMomentumShapeFn(InferenceContext* c, bool sparse) {
   ShapeHandle unused;
@@ -677,7 +1202,27 @@ REGISTER_OP("ApplyMomentum")
     .Attr("use_nesterov: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyMomentumShapeFn(c, false /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Update '*var' according to the momentum scheme. Set use_nesterov = True if you
+want to use Nesterov momentum.
+
+accum = accum * momentum + grad
+var -= lr * accum
+
+var: Should be from a Variable().
+accum: Should be from a Variable().
+lr: Scaling factor. Must be a scalar.
+grad: The gradient.
+momentum: Momentum. Must be a scalar.
+out: Same as "var".
+use_locking: If `True`, updating of the var and accum tensors will be protected
+  by a lock; otherwise the behavior is undefined, but may exhibit less
+  contention.
+use_nesterov: If `True`, the tensor passed to compute grad will be
+var - lr * momentum * accum, so in the end, the var you get is actually
+var - lr * momentum * accum.
+)doc");
 
 REGISTER_OP("SparseApplyMomentum")
     .Input("var: Ref(T)")
@@ -693,7 +1238,30 @@ REGISTER_OP("SparseApplyMomentum")
     .Attr("use_nesterov: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyMomentumShapeFn(c, true /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Update relevant entries in '*var' and '*accum' according to the momentum scheme.
+Set use_nesterov = True if you want to use Nesterov momentum.
+
+That is for rows we have grad for, we update var and accum as follows:
+
+accum = accum * momentum + grad
+var -= lr * accum
+
+var: Should be from a Variable().
+accum: Should be from a Variable().
+lr: Learning rate. Must be a scalar.
+grad: The gradient.
+indices: A vector of indices into the first dimension of var and accum.
+momentum: Momentum. Must be a scalar.
+out: Same as "var".
+use_locking: If `True`, updating of the var and accum tensors will be protected
+  by a lock; otherwise the behavior is undefined, but may exhibit less
+  contention.
+use_nesterov: If `True`, the tensor passed to compute grad will be
+var - lr * momentum * accum, so in the end, the var you get is actually
+var - lr * momentum * accum.
+)doc");
 
 REGISTER_OP("ResourceApplyMomentum")
     .Input("var: resource")
@@ -706,7 +1274,26 @@ REGISTER_OP("ResourceApplyMomentum")
     .Attr("use_nesterov: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyMomentumShapeFn(c, false /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Update '*var' according to the momentum scheme. Set use_nesterov = True if you
+want to use Nesterov momentum.
+
+accum = accum * momentum + grad
+var -= lr * accum
+
+var: Should be from a Variable().
+accum: Should be from a Variable().
+lr: Scaling factor. Must be a scalar.
+grad: The gradient.
+momentum: Momentum. Must be a scalar.
+use_locking: If `True`, updating of the var and accum tensors will be protected
+  by a lock; otherwise the behavior is undefined, but may exhibit less
+  contention.
+use_nesterov: If `True`, the tensor passed to compute grad will be
+var - lr * momentum * accum, so in the end, the var you get is actually
+var - lr * momentum * accum.
+)doc");
 
 REGISTER_OP("ResourceSparseApplyMomentum")
     .Input("var: resource")
@@ -721,7 +1308,29 @@ REGISTER_OP("ResourceSparseApplyMomentum")
     .Attr("use_nesterov: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyMomentumShapeFn(c, true /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Update relevant entries in '*var' and '*accum' according to the momentum scheme.
+Set use_nesterov = True if you want to use Nesterov momentum.
+
+That is for rows we have grad for, we update var and accum as follows:
+
+accum = accum * momentum + grad
+var -= lr * accum
+
+var: Should be from a Variable().
+accum: Should be from a Variable().
+lr: Learning rate. Must be a scalar.
+grad: The gradient.
+indices: A vector of indices into the first dimension of var and accum.
+momentum: Momentum. Must be a scalar.
+use_locking: If `True`, updating of the var and accum tensors will be protected
+  by a lock; otherwise the behavior is undefined, but may exhibit less
+  contention.
+use_nesterov: If `True`, the tensor passed to compute grad will be
+var - lr * momentum * accum, so in the end, the var you get is actually
+var - lr * momentum * accum.
+)doc");
 
 static Status ApplyAdamShapeFn(InferenceContext* c, bool sparse) {
   ShapeHandle unused;
@@ -759,7 +1368,31 @@ REGISTER_OP("ApplyAdam")
     .Attr("use_nesterov: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyAdamShapeFn(c, false /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Update '*var' according to the Adam algorithm.
+
+lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t)
+m_t <- beta1 * m_{t-1} + (1 - beta1) * g_t
+v_t <- beta2 * v_{t-1} + (1 - beta2) * g_t * g_t
+variable <- variable - lr_t * m_t / (sqrt(v_t) + epsilon)
+
+var: Should be from a Variable().
+m: Should be from a Variable().
+v: Should be from a Variable().
+beta1_power: Must be a scalar.
+beta2_power: Must be a scalar.
+lr: Scaling factor. Must be a scalar.
+beta1: Momentum factor. Must be a scalar.
+beta2: Momentum factor. Must be a scalar.
+epsilon: Ridge term. Must be a scalar.
+grad: The gradient.
+out: Same as "var".
+use_locking: If `True`, updating of the var, m, and v tensors will be protected
+  by a lock; otherwise the behavior is undefined, but may exhibit less
+  contention.
+use_nesterov: If `True`, uses the nesterov update.
+)doc");
 
 REGISTER_OP("ResourceApplyAdam")
     .Input("var: resource")
@@ -777,7 +1410,30 @@ REGISTER_OP("ResourceApplyAdam")
     .Attr("use_nesterov: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyAdamShapeFn(c, false /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Update '*var' according to the Adam algorithm.
+
+lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t)
+m_t <- beta1 * m_{t-1} + (1 - beta1) * g_t
+v_t <- beta2 * v_{t-1} + (1 - beta2) * g_t * g_t
+variable <- variable - lr_t * m_t / (sqrt(v_t) + epsilon)
+
+var: Should be from a Variable().
+m: Should be from a Variable().
+v: Should be from a Variable().
+beta1_power: Must be a scalar.
+beta2_power: Must be a scalar.
+lr: Scaling factor. Must be a scalar.
+beta1: Momentum factor. Must be a scalar.
+beta2: Momentum factor. Must be a scalar.
+epsilon: Ridge term. Must be a scalar.
+grad: The gradient.
+use_locking: If `True`, updating of the var, m, and v tensors will be protected
+  by a lock; otherwise the behavior is undefined, but may exhibit less
+  contention.
+use_nesterov: If `True`, uses the nesterov update.
+)doc");
 
 static Status ApplyRMSPropShapeFn(InferenceContext* c, bool sparse) {
   ShapeHandle unused;
@@ -828,7 +1484,32 @@ REGISTER_OP("ApplyRMSProp")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyRMSPropShapeFn(c, false /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Update '*var' according to the RMSProp algorithm.
+Note that in dense implementation of this algorithm, ms and mom will
+update even if the grad is zero, but in this sparse implementation, ms
+and mom will not update in iterations during which the grad is zero.
+
+mean_square = decay * mean_square + (1-decay) * gradient ** 2
+Delta = learning_rate * gradient / sqrt(mean_square + epsilon)
+
+ms <- rho * ms_{t-1} + (1-rho) * grad * grad
+mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
+var <- var - mom
+
+var: Should be from a Variable().
+ms: Should be from a Variable().
+mom: Should be from a Variable().
+lr: Scaling factor. Must be a scalar.
+epsilon: Ridge term. Must be a scalar.
+rho: Decay rate. Must be a scalar.
+grad: The gradient.
+out: Same as "var".
+use_locking: If `True`, updating of the var, ms, and mom tensors is protected
+  by a lock; otherwise the behavior is undefined, but may exhibit less
+  contention.
+)doc");
 
 REGISTER_OP("ApplyCenteredRMSProp")
     .Input("var: Ref(T)")
@@ -845,7 +1526,41 @@ REGISTER_OP("ApplyCenteredRMSProp")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyCenteredRMSPropShapeFn(c, false /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Update '*var' according to the centered RMSProp algorithm.
+The centered RMSProp algorithm uses an estimate of the centered second moment
+(i.e., the variance) for normalization, as opposed to regular RMSProp, which
+uses the (uncentered) second moment. This often helps with training, but is
+slightly more expensive in terms of computation and memory.
+
+Note that in dense implementation of this algorithm, mg, ms, and mom will
+update even if the grad is zero, but in this sparse implementation, mg, ms,
+and mom will not update in iterations during which the grad is zero.
+
+mean_square = decay * mean_square + (1-decay) * gradient ** 2
+mean_grad = decay * mean_grad + (1-decay) * gradient
+
+Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2)
+
+mg <- rho * mg_{t-1} + (1-rho) * grad
+ms <- rho * ms_{t-1} + (1-rho) * grad * grad
+mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms - mg * mg + epsilon)
+var <- var - mom
+
+var: Should be from a Variable().
+mg: Should be from a Variable().
+ms: Should be from a Variable().
+mom: Should be from a Variable().
+lr: Scaling factor. Must be a scalar.
+epsilon: Ridge term. Must be a scalar.
+rho: Decay rate. Must be a scalar.
+grad: The gradient.
+out: Same as "var".
+use_locking: If `True`, updating of the var, mg, ms, and mom tensors is
+  protected by a lock; otherwise the behavior is undefined, but may exhibit less
+  contention.
+)doc");
 
 REGISTER_OP("SparseApplyRMSProp")
     .Input("var: Ref(T)")
@@ -863,7 +1578,33 @@ REGISTER_OP("SparseApplyRMSProp")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyRMSPropShapeFn(c, true /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Update '*var' according to the RMSProp algorithm.
+Note that in dense implementation of this algorithm, ms and mom will
+update even if the grad is zero, but in this sparse implementation, ms
+and mom will not update in iterations during which the grad is zero.
+
+mean_square = decay * mean_square + (1-decay) * gradient ** 2
+Delta = learning_rate * gradient / sqrt(mean_square + epsilon)
+
+ms <- rho * ms_{t-1} + (1-rho) * grad * grad
+mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
+var <- var - mom
+
+var: Should be from a Variable().
+ms: Should be from a Variable().
+mom: Should be from a Variable().
+lr: Scaling factor. Must be a scalar.
+epsilon: Ridge term. Must be a scalar.
+rho: Decay rate. Must be a scalar.
+grad: The gradient.
+indices: A vector of indices into the first dimension of var, ms and mom.
+out: Same as "var".
+use_locking: If `True`, updating of the var, ms, and mom tensors is protected
+  by a lock; otherwise the behavior is undefined, but may exhibit less
+  contention.
+)doc");
 
 REGISTER_OP("SparseApplyCenteredRMSProp")
     .Input("var: Ref(T)")
@@ -882,7 +1623,40 @@ REGISTER_OP("SparseApplyCenteredRMSProp")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyCenteredRMSPropShapeFn(c, true /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Update '*var' according to the centered RMSProp algorithm.
+The centered RMSProp algorithm uses an estimate of the centered second moment
+(i.e., the variance) for normalization, as opposed to regular RMSProp, which
+uses the (uncentered) second moment. This often helps with training, but is
+slightly more expensive in terms of computation and memory.
+
+Note that in dense implementation of this algorithm, mg, ms, and mom will
+update even if the grad is zero, but in this sparse implementation, mg, ms,
+and mom will not update in iterations during which the grad is zero.
+
+mean_square = decay * mean_square + (1-decay) * gradient ** 2
+mean_grad = decay * mean_grad + (1-decay) * gradient
+Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2)
+
+ms <- rho * ms_{t-1} + (1-rho) * grad * grad
+mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
+var <- var - mom
+
+var: Should be from a Variable().
+mg: Should be from a Variable().
+ms: Should be from a Variable().
+mom: Should be from a Variable().
+lr: Scaling factor. Must be a scalar.
+epsilon: Ridge term. Must be a scalar.
+rho: Decay rate. Must be a scalar.
+grad: The gradient.
+indices: A vector of indices into the first dimension of var, ms and mom.
+out: Same as "var".
+use_locking: If `True`, updating of the var, mg, ms, and mom tensors is
+  protected by a lock; otherwise the behavior is undefined, but may exhibit less
+  contention.
+)doc");
 
 REGISTER_OP("ResourceApplyRMSProp")
     .Input("var: resource")
@@ -897,7 +1671,31 @@ REGISTER_OP("ResourceApplyRMSProp")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyRMSPropShapeFn(c, false /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Update '*var' according to the RMSProp algorithm.
+Note that in dense implementation of this algorithm, ms and mom will
+update even if the grad is zero, but in this sparse implementation, ms
+and mom will not update in iterations during which the grad is zero.
+
+mean_square = decay * mean_square + (1-decay) * gradient ** 2
+Delta = learning_rate * gradient / sqrt(mean_square + epsilon)
+
+ms <- rho * ms_{t-1} + (1-rho) * grad * grad
+mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
+var <- var - mom
+
+var: Should be from a Variable().
+ms: Should be from a Variable().
+mom: Should be from a Variable().
+lr: Scaling factor. Must be a scalar.
+epsilon: Ridge term. Must be a scalar.
+rho: Decay rate. Must be a scalar.
+grad: The gradient.
+use_locking: If `True`, updating of the var, ms, and mom tensors is protected
+  by a lock; otherwise the behavior is undefined, but may exhibit less
+  contention.
+)doc");
 
 REGISTER_OP("ResourceApplyCenteredRMSProp")
     .Input("var: resource")
@@ -913,7 +1711,40 @@ REGISTER_OP("ResourceApplyCenteredRMSProp")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyCenteredRMSPropShapeFn(c, false /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Update '*var' according to the centered RMSProp algorithm.
+The centered RMSProp algorithm uses an estimate of the centered second moment
+(i.e., the variance) for normalization, as opposed to regular RMSProp, which
+uses the (uncentered) second moment. This often helps with training, but is
+slightly more expensive in terms of computation and memory.
+
+Note that in dense implementation of this algorithm, mg, ms, and mom will
+update even if the grad is zero, but in this sparse implementation, mg, ms,
+and mom will not update in iterations during which the grad is zero.
+
+mean_square = decay * mean_square + (1-decay) * gradient ** 2
+mean_grad = decay * mean_grad + (1-decay) * gradient
+
+Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2)
+
+mg <- rho * mg_{t-1} + (1-rho) * grad
+ms <- rho * ms_{t-1} + (1-rho) * grad * grad
+mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms - mg * mg + epsilon)
+var <- var - mom
+
+var: Should be from a Variable().
+mg: Should be from a Variable().
+ms: Should be from a Variable().
+mom: Should be from a Variable().
+lr: Scaling factor. Must be a scalar.
+epsilon: Ridge term. Must be a scalar.
+rho: Decay rate. Must be a scalar.
+grad: The gradient.
+use_locking: If `True`, updating of the var, mg, ms, and mom tensors is
+  protected by a lock; otherwise the behavior is undefined, but may exhibit less
+  contention.
+)doc");
 
 REGISTER_OP("ResourceSparseApplyRMSProp")
     .Input("var: resource")
@@ -930,7 +1761,32 @@ REGISTER_OP("ResourceSparseApplyRMSProp")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyRMSPropShapeFn(c, true /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Update '*var' according to the RMSProp algorithm.
+Note that in dense implementation of this algorithm, ms and mom will
+update even if the grad is zero, but in this sparse implementation, ms
+and mom will not update in iterations during which the grad is zero.
+
+mean_square = decay * mean_square + (1-decay) * gradient ** 2
+Delta = learning_rate * gradient / sqrt(mean_square + epsilon)
+
+ms <- rho * ms_{t-1} + (1-rho) * grad * grad
+mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
+var <- var - mom
+
+var: Should be from a Variable().
+ms: Should be from a Variable().
+mom: Should be from a Variable().
+lr: Scaling factor. Must be a scalar.
+epsilon: Ridge term. Must be a scalar.
+rho: Decay rate. Must be a scalar.
+grad: The gradient.
+indices: A vector of indices into the first dimension of var, ms and mom.
+use_locking: If `True`, updating of the var, ms, and mom tensors is protected
+  by a lock; otherwise the behavior is undefined, but may exhibit less
+  contention.
+)doc");
 
 REGISTER_OP("ResourceSparseApplyCenteredRMSProp")
     .Input("var: resource")
@@ -948,7 +1804,39 @@ REGISTER_OP("ResourceSparseApplyCenteredRMSProp")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyCenteredRMSPropShapeFn(c, true /* sparse */);
-    });
+    })
+    .Doc(R"doc(
+Update '*var' according to the centered RMSProp algorithm.
+The centered RMSProp algorithm uses an estimate of the centered second moment
+(i.e., the variance) for normalization, as opposed to regular RMSProp, which
+uses the (uncentered) second moment. This often helps with training, but is
+slightly more expensive in terms of computation and memory.
+
+Note that in dense implementation of this algorithm, mg, ms, and mom will
+update even if the grad is zero, but in this sparse implementation, mg, ms,
+and mom will not update in iterations during which the grad is zero.
+
+mean_square = decay * mean_square + (1-decay) * gradient ** 2
+mean_grad = decay * mean_grad + (1-decay) * gradient
+Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2)
+
+ms <- rho * ms_{t-1} + (1-rho) * grad * grad
+mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
+var <- var - mom
+
+var: Should be from a Variable().
+mg: Should be from a Variable().
+ms: Should be from a Variable().
+mom: Should be from a Variable().
+lr: Scaling factor. Must be a scalar.
+epsilon: Ridge term. Must be a scalar.
+rho: Decay rate. Must be a scalar.
+grad: The gradient.
+indices: A vector of indices into the first dimension of var, ms and mom.
+use_locking: If `True`, updating of the var, mg, ms, and mom tensors is
+  protected by a lock; otherwise the behavior is undefined, but may exhibit less
+  contention.
+)doc");
 
 static Status ApplyAddSignShapeFn(InferenceContext* c, bool sparse) {
   ShapeHandle unused;
@@ -979,7 +1867,8 @@ REGISTER_OP("ApplyAddSign")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyAddSignShapeFn(c, /*sparse=*/false);
-    });
+    })
+    .Doc(strings::StrCat(kAddSignCommonDocStr, kOutDocStr, kLockDocStr));
 
 REGISTER_OP("ResourceApplyAddSign")
     .Input("var: resource")
@@ -993,7 +1882,8 @@ REGISTER_OP("ResourceApplyAddSign")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyAddSignShapeFn(c, /*sparse=*/false);
-    });
+    })
+    .Doc(strings::StrCat(kAddSignCommonDocStr, kLockDocStr));
 
 static Status ApplyPowerSignShapeFn(InferenceContext* c, bool sparse) {
   ShapeHandle unused;
@@ -1024,7 +1914,8 @@ REGISTER_OP("ApplyPowerSign")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyPowerSignShapeFn(c, /*sparse=*/false);
-    });
+    })
+    .Doc(strings::StrCat(kPowerSignCommonDocStr, kOutDocStr, kLockDocStr));
 
 REGISTER_OP("ResourceApplyPowerSign")
     .Input("var: resource")
@@ -1038,6 +1929,8 @@ REGISTER_OP("ResourceApplyPowerSign")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyPowerSignShapeFn(c, /*sparse=*/false);
-    });
+    })
+    .Doc(strings::StrCat(kPowerSignCommonDocStr, kLockDocStr));
+
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/word2vec_ops.cc b/tensorflow/core/ops/word2vec_ops.cc
index ed685dcf0a..b6acc2213c 100644
--- a/tensorflow/core/ops/word2vec_ops.cc
+++ b/tensorflow/core/ops/word2vec_ops.cc
@@ -33,7 +33,25 @@ REGISTER_OP("Skipgram")
     .Attr("batch_size: int")
     .Attr("window_size: int = 5")
     .Attr("min_count: int = 5")
-    .Attr("subsample: float = 1e-3");
+    .Attr("subsample: float = 1e-3")
+    .Doc(R"doc(
+Parses a text file and creates a batch of examples.
+
+vocab_word: A vector of words in the corpus.
+vocab_freq: Frequencies of words. Sorted in the non-ascending order.
+words_per_epoch: Number of words per epoch in the data file.
+current_epoch: The current epoch number.
+total_words_processed: The total number of words processed so far.
+examples: A vector of word ids.
+labels: A vector of word ids.
+filename: The corpus's text file name.
+batch_size: The size of produced batch.
+window_size: The number of words to predict to the left and right of the target.
+min_count: The minimum number of word occurrences for it to be included in the
+    vocabulary.
+subsample: Threshold for word occurrence. Words that appear with higher
+    frequency will be randomly down-sampled. Set to 0 to disable.
+)doc");
 
 REGISTER_OP("NegTrain")
     .Deprecated(19,
@@ -46,6 +64,16 @@ REGISTER_OP("NegTrain")
     .Input("lr: float")
     .SetIsStateful()
     .Attr("vocab_count: list(int)")
-    .Attr("num_negative_samples: int");
+    .Attr("num_negative_samples: int")
+    .Doc(R"doc(
+Training via negative sampling.
+
+w_in: input word embedding.
+w_out: output word embedding.
+examples: A vector of word ids.
+labels: A vector of word ids.
+vocab_count: Count of words in the vocabulary.
+num_negative_samples: Number of negative samples per example.
+)doc");
 
 }  // end namespace tensorflow
diff --git a/tensorflow/core/user_ops/fact.cc b/tensorflow/core/user_ops/fact.cc
index 3a4fc8115a..800008e0b8 100644
--- a/tensorflow/core/user_ops/fact.cc
+++ b/tensorflow/core/user_ops/fact.cc
@@ -18,7 +18,11 @@ limitations under the License.
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 
-REGISTER_OP("Fact").Output("fact: string");
+REGISTER_OP("Fact")
+    .Output("fact: string")
+    .Doc(R"doc(
+Output a fact about factorials.
+)doc");
 
 class FactOp : public tensorflow::OpKernel {
  public:
author	Anna R <annarev@google.com>	2018-01-03 11:00:21 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2018-01-03 11:04:25 -0800
commit	613a160b55bf23d886e20f7512a79d57b9d2f83f (patch)
tree	16d579355fffc85cecc9ee0e257c6aea30c0e69f /tensorflow
parent	0f2fa9daa6b36e7dcad0b739ef4d08944e69ecce (diff)