From 2db9a1edc1695877b3f90181e74cfbd8b1a8cdc7 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 7 Sep 2016 06:35:58 -0800
Subject: C++ Gradients: Adds gradient functions and tests for Pack/Unpack, and
 takes care of multiple TODOS: *) Adds support and unit test for returning
 dependent gradient outputs. *) Adds support and unit test for stopping
 backprop at frontier of requested inputs. *) Adds support and unit test for
 returning gradients for nodes with multiple outputs. *) Moves common unit
 test code out into a testlib. *) Moves common gradient-specific unit test
 code out into a separate testlib. Change: 132434513

---
 tensorflow/cc/BUILD                        |  77 +++++++++++--
 tensorflow/cc/framework/cc_ops_test.cc     |  43 +++----
 tensorflow/cc/framework/gradients.cc       |  46 ++++----
 tensorflow/cc/framework/gradients_test.cc  | 173 +++++++++++++++++++++++++----
 tensorflow/cc/framework/testutil.cc        |  40 +++++++
 tensorflow/cc/framework/testutil.h         |  35 ++++++
 tensorflow/cc/gradients/array_grad.cc      |  55 +++++++++
 tensorflow/cc/gradients/array_grad_test.cc | 111 ++++++++++++++++++
 tensorflow/cc/gradients/grad_testutil.cc   |  37 ++++++
 tensorflow/cc/gradients/grad_testutil.h    |  35 ++++++
 tensorflow/cc/gradients/math_grad_test.cc  |  55 ++-------
 11 files changed, 584 insertions(+), 123 deletions(-)
 create mode 100644 tensorflow/cc/framework/testutil.cc
 create mode 100644 tensorflow/cc/framework/testutil.h
 create mode 100644 tensorflow/cc/gradients/array_grad.cc
 create mode 100644 tensorflow/cc/gradients/array_grad_test.cc
 create mode 100644 tensorflow/cc/gradients/grad_testutil.cc
 create mode 100644 tensorflow/cc/gradients/grad_testutil.h

diff --git a/tensorflow/cc/BUILD b/tensorflow/cc/BUILD
index 8f9ac46676..d48494698b 100644
--- a/tensorflow/cc/BUILD
+++ b/tensorflow/cc/BUILD
@@ -21,7 +21,6 @@ cc_library(
     deps = [
         ":cc_ops",
         ":grad_op_registry",
-        ":math_grad",
         ":ops",
         ":scope",
         "//tensorflow/core:core_cpu",
@@ -36,7 +35,9 @@ tf_cc_test(
     deps = [
         ":cc_ops",
         ":grad_op_registry",
+        ":grad_ops",
         ":gradients",
+        ":testutil",
         "//tensorflow/core:all_kernels",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:framework",
@@ -46,6 +47,26 @@ tf_cc_test(
     ],
 )
 
+cc_library(
+    name = "grad_ops",
+    deps = [
+        ":array_grad",
+        ":math_grad",
+    ],
+)
+
+cc_library(
+    name = "grad_testutil",
+    testonly = 1,
+    srcs = ["gradients/grad_testutil.cc"],
+    hdrs = ["gradients/grad_testutil.h"],
+    deps = [
+        ":grad_op_registry",
+        ":ops",
+        ":scope",
+    ],
+)
+
 cc_library(
     name = "ops",
     srcs = ["framework/ops.cc"],
@@ -146,6 +167,33 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "array_grad",
+    srcs = ["gradients/array_grad.cc"],
+    deps = [
+        ":cc_ops",
+        ":grad_op_registry",
+        ":ops",
+        ":scope",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:framework",
+    ],
+)
+
+tf_cc_test(
+    name = "gradients/array_grad_test",
+    deps = [
+        ":array_grad",
+        ":cc_ops",
+        ":grad_op_registry",
+        ":grad_testutil",
+        ":testutil",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+    ],
+)
+
 cc_library(
     name = "math_grad",
     srcs = ["gradients/math_grad.cc"],
@@ -164,12 +212,10 @@ tf_cc_test(
     deps = [
         ":cc_ops",
         ":grad_op_registry",
+        ":grad_testutil",
         ":math_grad",
-        "//tensorflow/core:all_kernels",
-        "//tensorflow/core:core_cpu",
-        "//tensorflow/core:framework",
+        ":testutil",
         "//tensorflow/core:lib_internal",
-        "//tensorflow/core:tensorflow",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
@@ -212,9 +258,7 @@ tf_cc_test(
         ":client_session",
         ":test_op",
         ":test_op_op_lib",
-        "//tensorflow/core:all_kernels",
-        "//tensorflow/core:core_cpu",
-        "//tensorflow/core:tensorflow",
+        ":testutil",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
@@ -254,6 +298,23 @@ cc_library(
     alwayslink = 1,
 )
 
+cc_library(
+    name = "testutil",
+    testonly = 1,
+    srcs = ["framework/testutil.cc"],
+    hdrs = ["framework/testutil.h"],
+    deps = [
+        ":client_session",
+        ":ops",
+        ":scope",
+        "//tensorflow/core:all_kernels",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core:tensorflow",
+        "//tensorflow/core:testlib",
+    ],
+)
+
 tf_gen_op_wrappers_cc(
     name = "test_op",
     op_lib_names = [
diff --git a/tensorflow/cc/framework/cc_ops_test.cc b/tensorflow/cc/framework/cc_ops_test.cc
index ded02e5297..8a304e9e9f 100644
--- a/tensorflow/cc/framework/cc_ops_test.cc
+++ b/tensorflow/cc/framework/cc_ops_test.cc
@@ -14,12 +14,11 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/cc/client/client_session.h"
+#include "tensorflow/cc/framework/testutil.h"
 #include "tensorflow/cc/ops/standard_ops.h"
 #include "tensorflow/cc/ops/test_op.h"
 #include "tensorflow/core/framework/tensor_testutil.h"
-#include "tensorflow/core/graph/default_device.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
-#include "tensorflow/core/platform/test.h"
 
 namespace tensorflow {
 using namespace ops;  // NOLINT(build/namespaces)
@@ -32,18 +31,6 @@ Output Linear(const Scope& scope, Input x, Input w, Input b) {
   return BiasAdd(cop_scopes.last, m, b);
 }
 
-void GetTensors(const Scope& scope, OutputList tensors,
-                std::vector<Tensor>* out) {
-  ClientSession session(scope);
-  TF_CHECK_OK(session.Run(tensors, out));
-}
-
-void GetTensor(const Scope& scope, Output tensor, Tensor* out) {
-  std::vector<Tensor> outputs;
-  GetTensors(scope, {tensor}, &outputs);
-  *out = outputs[0];
-}
-
 void GetColocationConstraints(Output tensor, std::vector<string>* constraints) {
   constraints->clear();
   const auto& attrs = tensor.op().node()->def().attr();
@@ -73,7 +60,7 @@ TEST(CCOpTest, Basic) {
   MatMul m(root, c, {{41}, {1}});
   TF_EXPECT_OK(root.status());
   Tensor out;
-  GetTensor(root, m, &out);
+  test::GetTensor(root, m, &out);
   test::ExpectTensorEqual<int>(out, test::AsTensor<int>({42}, {1, 1}));
 }
 
@@ -82,7 +69,7 @@ TEST(CCOpTest, Attrs) {
   auto m = MatMul(root, {{1}, {1}}, {{41}, {1}}, MatMul::TransposeA(true));
   TF_EXPECT_OK(root.status());
   Tensor out;
-  GetTensor(root, m, &out);
+  test::GetTensor(root, m, &out);
   test::ExpectTensorEqual<int>(out, test::AsTensor<int>({42}, {1, 1}));
 }
 
@@ -92,7 +79,7 @@ TEST(CCOpTest, SplitConcat) {
   auto c = Concat(root, 0, {p[0], p[1]});
   TF_EXPECT_OK(root.status());
   Tensor out;
-  GetTensor(root, c, &out);
+  test::GetTensor(root, c, &out);
   test::ExpectTensorEqual<int>(out, test::AsTensor<int>({1, 2}, {2, 1}));
 }
 
@@ -103,7 +90,7 @@ TEST(CCOpTest, CompositeOp) {
   TF_EXPECT_OK(root.status());
   EXPECT_EQ(l.node()->name(), "layer0");
   Tensor out;
-  GetTensor(root, l, &out);
+  test::GetTensor(root, l, &out);
   test::ExpectClose(out, test::AsTensor<float>({-0.3, 34.2}, {1, 2}));
 }
 
@@ -111,7 +98,7 @@ TEST(CCOpTest, MultiOutput) {
   Scope root = Scope::NewRootScope();
   auto u = Unique(root, {1, 2, 2, 4, 3, 2});
   std::vector<Tensor> outputs;
-  GetTensors(root, {u.y, u.idx}, &outputs);
+  test::GetTensors(root, {u.y, u.idx}, &outputs);
   test::ExpectTensorEqual<int>(outputs[0], test::AsTensor<int>({1, 2, 4, 3}));
   test::ExpectTensorEqual<int>(outputs[1],
                                test::AsTensor<int>({0, 1, 1, 2, 3, 1}));
@@ -134,7 +121,7 @@ TEST(CCOpTest, ExampleTrainer) {
   // y_normalized = y ./ y_norm
   auto y_normalized = Div(root.WithOpName("y_normalized"), y, y_norm);
   Tensor out;
-  GetTensor(root, y_normalized, &out);
+  test::GetTensor(root, y_normalized, &out);
   test::ExpectTensorNear<float>(
       out, test::AsTensor<float>({0.98058069, -0.19611613}, {2, 1}), 1e-5);
 }
@@ -161,7 +148,7 @@ TEST(CCOpTest, ControlDeps) {
 
   std::vector<Tensor> out;
 
-  GetTensors(root, {add}, &out);
+  test::GetTensors(root, {add}, &out);
   test::ExpectTensorNear<float>(out[0], test::AsTensor<float>({42.0f}, {}),
                                 1e-5);
 
@@ -169,7 +156,7 @@ TEST(CCOpTest, ControlDeps) {
   // Note : GetTensors creates a new session, so 'v' is uninitialized.
   // sub should have no control deps, so it should not cause the assign to run.
   // Hence is_inited should be false.
-  GetTensors(root, {sub, is_inited}, &out);
+  test::GetTensors(root, {sub, is_inited}, &out);
   test::ExpectTensorNear<float>(out[0], test::AsTensor<float>({1.0f}, {}),
                                 1e-5);
   test::ExpectTensorEqual<bool>(out[1], test::AsTensor<bool>({false}, {}));
@@ -220,12 +207,12 @@ TEST(CCOpTest, TemplatedConst) {
   TF_EXPECT_OK(root.status());
 
   Tensor out;
-  GetTensor(root, c1, &out);
+  test::GetTensor(root, c1, &out);
   test::ExpectTensorEqual<float>(
       out, test::AsTensor<float>({3.f, 2.f, -1.f, 0.f}, {2, 2}));
 
   auto c2 = ops::Const<string>(root, {{"this"}, {"is"}, {"a"}, {"constant"}});
-  GetTensor(root, c2, &out);
+  test::GetTensor(root, c2, &out);
   test::ExpectTensorEqual<string>(
       out, test::AsTensor<string>({"this", "is", "a", "constant"}, {4, 1}));
 }
@@ -237,22 +224,22 @@ TEST(CCOpTest, EmptyConst) {
   TF_CHECK_OK(root.status());
 
   Tensor out;
-  GetTensor(root, c1, &out);
+  test::GetTensor(root, c1, &out);
   test::ExpectTensorEqual<float>(out, Tensor(DT_FLOAT, {0}));
 
   auto c2 = ops::Const(root, {{}});
   TF_CHECK_OK(root.status());
-  GetTensor(root, c2, &out);
+  test::GetTensor(root, c2, &out);
   test::ExpectTensorEqual<float>(out, Tensor(DT_FLOAT, {1, 0}));
 
   auto c3 = ops::Const(root, {{{}, {}}});
   TF_CHECK_OK(root.status());
-  GetTensor(root, c3, &out);
+  test::GetTensor(root, c3, &out);
   test::ExpectTensorEqual<float>(out, Tensor(DT_FLOAT, {1, 2, 0}));
 
   auto c4 = ops::Const<int>(root, {{{}}});
   TF_CHECK_OK(root.status());
-  GetTensor(root, c4, &out);
+  test::GetTensor(root, c4, &out);
   test::ExpectTensorEqual<int>(out, Tensor(DT_INT32, {1, 1, 0}));
 
   ops::Const(root, {{}, {{}}});
diff --git a/tensorflow/cc/framework/gradients.cc b/tensorflow/cc/framework/gradients.cc
index ad944a95f4..22e64a7cfb 100644
--- a/tensorflow/cc/framework/gradients.cc
+++ b/tensorflow/cc/framework/gradients.cc
@@ -111,8 +111,8 @@ class SymbolicGradientBuilder {
   std::unordered_set<int> output_nodes_;
 
   // The set of node ids in `inputs_`. Used to identify nodes at backprop
-  // frontier.
-  std::unordered_set<int> input_nodes_;
+  // frontier. Maps from Output -> index into `grad_outputs_`.
+  std::unordered_map<Output, int, OutputHash, OutputEq> input_nodes_;
 
   TF_DISALLOW_COPY_AND_ASSIGN(SymbolicGradientBuilder);
 };
@@ -173,10 +173,10 @@ Status SymbolicGradientBuilder::Initialize() {
   for (int i = 0; i < outputs_.size(); ++i) {
     output_nodes_.insert(outputs_[i].node()->id());
   }
-  // Populate `input_nodes_` from node ids in `inputs_`.
+  // Populate `input_nodes_` from Outputs in `inputs_`.
   input_nodes_.reserve(inputs_.size());
   for (int i = 0; i < inputs_.size(); ++i) {
-    input_nodes_.insert(inputs_[i].node()->id());
+    input_nodes_.insert({inputs_[i], i});
   }
 
   // TODO(andydavis) Consider a more efficient data structure for `pending_` to
@@ -187,8 +187,10 @@ Status SymbolicGradientBuilder::Initialize() {
     std::unordered_set<Node*> visited;
     std::deque<Node*> queue;
     for (const Output& nout : inputs_) {
-      queue.push_back(nout.node());
-      visited.insert(nout.node());
+      if (visited.find(nout.node()) == visited.end()) {
+        queue.push_back(nout.node());
+        visited.insert(nout.node());
+      }
     }
 
     // Going forward to figure out which endpoints need backprop-ed.
@@ -286,19 +288,31 @@ Status SymbolicGradientBuilder::AddGradients() {
     Node* n = ready_.front();
     ready_.pop_front();
 
-    // Check if `n` is a member of `input_nodes_` where we terminate backprop.
-    auto iter = input_nodes_.find(n->id());
-    if (iter != input_nodes_.end()) {
-      // Stop backprop.
-      continue;
-    }
-
     // dy[i] is the sum of i-th output's backpropped gradients.
     const int num_y = n->num_outputs();
     dy.clear();
     dy.resize(num_y, {nullptr, 0});
     for (int i = 0; i < num_y; ++i) {
       TF_RETURN_IF_ERROR(SumGradients({n, i}, &dy[i]));
+      auto iter = input_nodes_.find({n, i});
+      if (iter != input_nodes_.end()) {
+        // Return gradients for Output in 'grad_outputs_'.
+        (*grad_outputs_)[iter->second] = dy[i];
+      }
+    }
+
+    // Stop backprop if none of the inputs to `n` are in `backprops_'.
+    bool stop_node = true;
+    for (const Edge* e : n->in_edges()) {
+      if (e->IsControlEdge()) continue;
+      if (backprops_.find({e->src(), e->src_output()}) != backprops_.end()) {
+        stop_node = false;
+        break;
+      }
+    }
+
+    if (stop_node) {
+      continue;
     }
 
     if (IsPrimitiveOpWithNoGrad(n->type_string())) {
@@ -331,12 +345,6 @@ Status SymbolicGradientBuilder::AddGradients() {
           BackpropAlongEdge(dx[dx_index++], {e->src(), e->src_output()}));
     }
   }
-
-  // Return gradients for `inputs_` in `grad_outputs_`.
-  for (int i = 0; i < inputs_.size(); ++i) {
-    TF_RETURN_IF_ERROR(SumGradients(inputs_[i], &(*grad_outputs_)[i]));
-  }
-
   return Status::OK();
 }
 
diff --git a/tensorflow/cc/framework/gradients_test.cc b/tensorflow/cc/framework/gradients_test.cc
index 5ea47bac7f..1e688dee1f 100644
--- a/tensorflow/cc/framework/gradients_test.cc
+++ b/tensorflow/cc/framework/gradients_test.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/cc/framework/gradients.h"
 #include "tensorflow/cc/framework/grad_op_registry.h"
+#include "tensorflow/cc/framework/testutil.h"
 #include "tensorflow/cc/ops/standard_ops.h"
 #include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/framework/tensor_testutil.h"
@@ -28,8 +29,6 @@ using namespace ops;  // NOLINT(build/namespaces)
 namespace {
 
 // TODO(andydavis) Add more unit tests once more gradient functions are ported.
-// TODO(andydavis) Add unit test that adds gradients to compute two Outputs,
-// where the gradient w.r.t. one Output depends on the other.
 class GradientsTest : public ::testing::Test {
  protected:
   GradientsTest()
@@ -38,9 +37,9 @@ class GradientsTest : public ::testing::Test {
 
   void CompareTestAndExpectedGraphs() {
     GraphDef gdef_test;
-    TF_EXPECT_OK(scope_test_.ToGraphDef(&gdef_test));
+    TF_ASSERT_OK(scope_test_.ToGraphDef(&gdef_test));
     GraphDef gdef_exp;
-    TF_EXPECT_OK(scope_expected_.ToGraphDef(&gdef_exp));
+    TF_ASSERT_OK(scope_expected_.ToGraphDef(&gdef_exp));
     TF_EXPECT_GRAPH_EQ(gdef_test, gdef_exp);
   }
 
@@ -74,13 +73,13 @@ class GradientsTest : public ::testing::Test {
 //
 
 TEST_F(GradientsTest, OneMatMul) {
-  bool expected = false;
-  for (Scope scope : {scope_test_, scope_expected_}) {
+  for (const bool expected : {false, true}) {
+    const Scope& scope = expected ? scope_expected_ : scope_test_;
     // Construct forward graph.
     auto x = Const(scope, {{1.0, 2.0}, {3.0, 4.0}});
     auto y = Const(scope, {{1.0, 0.0}, {0.0, 1.0}});
     auto z = MatMul(scope, x, y);
-    TF_EXPECT_OK(scope.status());
+    TF_ASSERT_OK(scope.status());
     CHECK_NOTNULL(z.node());
 
     if (expected) {
@@ -92,17 +91,16 @@ TEST_F(GradientsTest, OneMatMul) {
       // Call AddSymbolicGradients.
       auto dz = Const(scope, {{1.0, 1.0}, {1.0, 1.0}});
       std::vector<ops::Output> grad_outputs;
-      TF_EXPECT_OK(
+      TF_ASSERT_OK(
           AddSymbolicGradients(scope, {z}, {x, y}, {dz}, &grad_outputs));
     }
-    expected = true;
   }
   CompareTestAndExpectedGraphs();
 }
 
 TEST_F(GradientsTest, TwoMatMuls_Chained) {
-  bool expected = false;
-  for (Scope scope : {scope_test_, scope_expected_}) {
+  for (const bool expected : {false, true}) {
+    const Scope& scope = expected ? scope_expected_ : scope_test_;
     // Construct forward graph.
     auto u = Const(scope, {{1.0, 2.0}, {3.0, 4.0}});
     auto v = Const(scope, {{1.0, 0.0}, {0.0, 1.0}});
@@ -111,7 +109,7 @@ TEST_F(GradientsTest, TwoMatMuls_Chained) {
     auto y = Const(scope, {{1.0, 0.0}, {0.0, 1.0}});
     auto z = MatMul(scope, x, y);
 
-    TF_EXPECT_OK(scope.status());
+    TF_ASSERT_OK(scope.status());
     CHECK_NOTNULL(z.node());
 
     if (expected) {
@@ -126,28 +124,27 @@ TEST_F(GradientsTest, TwoMatMuls_Chained) {
       // Call AddSymbolicGradients.
       auto dz = Const(scope, {{1.0, 1.0}, {1.0, 1.0}});
       std::vector<ops::Output> grad_outputs;
-      TF_EXPECT_OK(
+      TF_ASSERT_OK(
           AddSymbolicGradients(scope, {z}, {u, v}, {dz}, &grad_outputs));
     }
-    expected = true;
   }
   CompareTestAndExpectedGraphs();
 }
 
 TEST_F(GradientsTest, TwoMatMuls_Independent) {
-  bool expected = false;
-  for (Scope scope : {scope_test_, scope_expected_}) {
+  for (const bool expected : {false, true}) {
+    const Scope& scope = expected ? scope_expected_ : scope_test_;
     // Construct forward graph.
     auto t = Const(scope, {{1.0, 2.0}, {3.0, 4.0}});
     auto u = Const(scope, {{1.0, 0.0}, {0.0, 1.0}});
     auto v = MatMul(scope, t, u);
-    TF_EXPECT_OK(scope.status());
+    TF_ASSERT_OK(scope.status());
     CHECK_NOTNULL(v.node());
 
     auto x = Const(scope, {{5.0, 6.0}, {7.0, 8.0}});
     auto y = Const(scope, {{1.0, 0.0}, {0.0, 1.0}});
     auto z = MatMul(scope, x, y);
-    TF_EXPECT_OK(scope.status());
+    TF_ASSERT_OK(scope.status());
     CHECK_NOTNULL(z.node());
 
     if (expected) {
@@ -161,16 +158,148 @@ TEST_F(GradientsTest, TwoMatMuls_Independent) {
       auto dy = MatMul(scope, x, dz, MatMul::TransposeA(true));
     } else {
       // Call AddSymbolicGradients.
-      auto dv = Const(scope_test_, {{1.0, 1.0}, {1.0, 1.0}});
-      auto dz = Const(scope_test_, {{1.0, 1.0}, {1.0, 1.0}});
+      auto dv = Const(scope, {{1.0, 1.0}, {1.0, 1.0}});
+      auto dz = Const(scope, {{1.0, 1.0}, {1.0, 1.0}});
       std::vector<ops::Output> grad_outputs;
-      TF_EXPECT_OK(AddSymbolicGradients(scope, {v, z}, {t, u, x, y}, {dv, dz},
+      TF_ASSERT_OK(AddSymbolicGradients(scope, {v, z}, {t, u, x, y}, {dv, dz},
                                         &grad_outputs));
     }
-    expected = true;
   }
   CompareTestAndExpectedGraphs();
 }
 
+TEST_F(GradientsTest, PackUnpack_Chained) {
+  for (const bool expected : {false, true}) {
+    const Scope& scope = expected ? scope_expected_ : scope_test_;
+    // Construct forward graph.
+    auto a = Const(scope, 1, {4, 2});
+    auto b = Const(scope, 2, {4, 2});
+    auto c = Const(scope, 3, {4, 2});
+
+    auto pack = Pack(scope, {a, b, c});
+    auto unpack = Unpack(scope, pack.output, 3);
+    TF_ASSERT_OK(scope.status());
+
+    // Construct grad inputs.
+    auto dx = Const(scope, 4, {4, 2});
+    auto dy = Const(scope, 5, {4, 2});
+    auto dz = Const(scope, 6, {4, 2});
+
+    if (expected) {
+      // Construct backward graph.
+      auto unpack_grad = Pack(scope, {dx, dy, dz});
+      auto pack_grad = Unpack(scope, unpack_grad.output, 3);
+    } else {
+      // Call AddSymbolicGradients.
+      std::vector<ops::Output> grad_outputs;
+      TF_ASSERT_OK(AddSymbolicGradients(scope, unpack.output, {a, b, c},
+                                        {dx, dy, dz}, &grad_outputs));
+    }
+  }
+  CompareTestAndExpectedGraphs();
+}
+
+TEST_F(GradientsTest, PackUnpack_StopBackprop) {
+  // Tests that backprop stops before calculating gradients for Pack (because
+  // only gradients w.r.t the output of Pack are requested).
+  for (const bool expected : {false, true}) {
+    const Scope& scope = expected ? scope_expected_ : scope_test_;
+    // Construct forward graph.
+    auto a = Const(scope, 1, {4, 2});
+    auto b = Const(scope, 2, {4, 2});
+    auto c = Const(scope, 3, {4, 2});
+
+    auto pack = Pack(scope, {a, b, c});
+    auto unpack = Unpack(scope, pack.output, 3);
+    TF_ASSERT_OK(scope.status());
+
+    // Construct grad inputs.
+    auto dx = Const(scope, 4, {4, 2});
+    auto dy = Const(scope, 5, {4, 2});
+    auto dz = Const(scope, 6, {4, 2});
+
+    if (expected) {
+      // Construct backward graph.
+      // NOTE: We should only expect the grad function for unpack in the
+      // gradients graph, based on the requested grad outputs.
+      auto unpack_grad = Pack(scope, {dx, dy, dz});
+    } else {
+      // Call AddSymbolicGradients.
+      std::vector<ops::Output> grad_outputs;
+      TF_ASSERT_OK(AddSymbolicGradients(scope, unpack.output, {pack},
+                                        {dx, dy, dz}, &grad_outputs));
+    }
+  }
+  CompareTestAndExpectedGraphs();
+}
+
+TEST_F(GradientsTest, DependentGradOutputs) {
+  // Tests that dependant gradients (in this case the gradients w.r.t to the
+  // output and one input of MatMul) are computed properly.
+
+  // Create two chained MatMul ops.
+  auto u = Const(scope_test_, {{2}});
+  auto v = Const(scope_test_, {{3}});
+  auto x = MatMul(scope_test_, u, v);
+
+  auto y = Const(scope_test_, {{4}});
+  auto z = MatMul(scope_test_, x, y);
+
+  TF_ASSERT_OK(scope_test_.status());
+  CHECK_NOTNULL(z.node());
+
+  // Call AddSymbolicGradients with '5' as initial gradients for 'dz'.
+  // The gradient w.r.t to 'v' (returned in grad_outputs[0]) is dependent on
+  // the gradient w.r.t. to 'x' (returned in grad_outputs[1]).
+  auto dz = Const(scope_test_, {{5}});
+  std::vector<ops::Output> grad_outputs;
+  TF_ASSERT_OK(
+      AddSymbolicGradients(scope_test_, {z}, {v, x}, {dz}, &grad_outputs));
+
+  std::vector<Tensor> outputs;
+  test::GetTensors(scope_test_, {grad_outputs[0], grad_outputs[1]}, &outputs);
+
+  // The gradients w.r.t to 'dz' are passed into AddSymbolicGradients as '5'.
+  // Since z = MatMul(x, y), the gradients w.r.t 'x' are computed as:
+  //   'dx' = 5 * 'y' = 5 * 4 = 20.
+  // Since x = MatMul(u, v), the gradients w.r.t. 'v' are computed as:
+  //   'dv' = 'dx' * 'u' = 20 * 2 = 40.
+  test::ExpectTensorEqual<int>(outputs[0], test::AsTensor<int>({40}, {1, 1}));
+  test::ExpectTensorEqual<int>(outputs[1], test::AsTensor<int>({20}, {1, 1}));
+}
+
+TEST_F(GradientsTest, MultipleNodeOutputGrads) {
+  // Tests that gradients for multiple outputs of the same node are returned.
+  auto x = Const(scope_test_, 1, {3, 4, 2});
+  auto unpack = Unpack(scope_test_, x, 3);
+  auto pack = Pack(scope_test_, unpack.output);
+
+  // clang-format off
+  auto dx = Const(scope_test_, {40, 41, 42, 43, 44, 45, 46, 47,
+                                50, 51, 52, 53, 55, 55, 56, 57,
+                                60, 61, 62, 63, 66, 66, 66, 67},
+                               {3, 4, 2});
+  // clang-format on
+
+  std::vector<ops::Output> grad_outputs;
+  TF_ASSERT_OK(AddSymbolicGradients(scope_test_, {pack}, unpack.output, {dx},
+                                    &grad_outputs));
+
+  std::vector<Tensor> outputs;
+  test::GetTensors(scope_test_,
+                   {grad_outputs[0], grad_outputs[1], grad_outputs[2]},
+                   &outputs);
+
+  test::ExpectTensorEqual<int>(
+      outputs[0],
+      test::AsTensor<int>({40, 41, 42, 43, 44, 45, 46, 47}, {4, 2}));
+  test::ExpectTensorEqual<int>(
+      outputs[1],
+      test::AsTensor<int>({50, 51, 52, 53, 55, 55, 56, 57}, {4, 2}));
+  test::ExpectTensorEqual<int>(
+      outputs[2],
+      test::AsTensor<int>({60, 61, 62, 63, 66, 66, 66, 67}, {4, 2}));
+}
+
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/cc/framework/testutil.cc b/tensorflow/cc/framework/testutil.cc
new file mode 100644
index 0000000000..58afc6b979
--- /dev/null
+++ b/tensorflow/cc/framework/testutil.cc
@@ -0,0 +1,40 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/cc/framework/testutil.h"
+
+#include "tensorflow/cc/client/client_session.h"
+#include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/graph/default_device.h"
+
+namespace tensorflow {
+using namespace ops;  // NOLINT(build/namespaces)
+
+namespace test {
+
+void GetTensors(const Scope& scope, OutputList tensors,
+                std::vector<Tensor>* out) {
+  ClientSession session(scope);
+  TF_CHECK_OK(session.Run(tensors, out));
+}
+
+void GetTensor(const Scope& scope, Output tensor, Tensor* out) {
+  std::vector<Tensor> outputs;
+  GetTensors(scope, {tensor}, &outputs);
+  *out = outputs[0];
+}
+
+}  // end namespace test
+}  // end namespace tensorflow
diff --git a/tensorflow/cc/framework/testutil.h b/tensorflow/cc/framework/testutil.h
new file mode 100644
index 0000000000..5e67ede6ab
--- /dev/null
+++ b/tensorflow/cc/framework/testutil.h
@@ -0,0 +1,35 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_CC_FRAMEWORK_TESTUTIL_H_
+#define THIRD_PARTY_TENSORFLOW_CC_FRAMEWORK_TESTUTIL_H_
+
+#include "tensorflow/cc/framework/ops.h"
+#include "tensorflow/cc/framework/scope.h"
+
+namespace tensorflow {
+namespace test {
+
+// Computes the outputs listed in 'tensors', returns the tensors in 'out'.
+void GetTensors(const Scope& scope, ops::OutputList tensors,
+                std::vector<Tensor>* out);
+
+// Computes the output 'tensor', returning the resulting tensor in 'out'.
+void GetTensor(const Scope& scope, ops::Output tensor, Tensor* out);
+
+}  // namespace test
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CC_FRAMEWORK_TESTUTIL_H_
diff --git a/tensorflow/cc/gradients/array_grad.cc b/tensorflow/cc/gradients/array_grad.cc
new file mode 100644
index 0000000000..262f8d6843
--- /dev/null
+++ b/tensorflow/cc/gradients/array_grad.cc
@@ -0,0 +1,55 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <vector>
+
+#include "tensorflow/cc/ops/standard_ops.h"
+
+#include "tensorflow/cc/framework/grad_op_registry.h"
+
+namespace tensorflow {
+namespace ops {
+namespace {
+
+Status PackGrad(const Scope& scope, const Operation& op,
+                const std::vector<Output>& grad_inputs,
+                std::vector<Output>* grad_outputs) {
+  int N;
+  TF_RETURN_IF_ERROR(GetNodeAttr(op.node()->def(), "N", &N));
+  int axis;
+  TF_RETURN_IF_ERROR(GetNodeAttr(op.node()->def(), "axis", &axis));
+
+  grad_outputs->reserve(N);
+  auto grad_op = Unpack(scope, grad_inputs[0], N, Unpack::Axis(axis));
+  for (const Output& o : grad_op.output) {
+    grad_outputs->emplace_back(o);
+  }
+  return Status::OK();
+}
+REGISTER_GRADIENT_OP("Pack", PackGrad);
+
+Status UnpackGrad(const Scope& scope, const Operation& op,
+                  const std::vector<Output>& grad_inputs,
+                  std::vector<Output>* grad_outputs) {
+  int axis;
+  TF_RETURN_IF_ERROR(GetNodeAttr(op.node()->def(), "axis", &axis));
+  grad_outputs->push_back(Pack(scope, grad_inputs, Pack::Axis(axis)));
+  return Status::OK();
+}
+REGISTER_GRADIENT_OP("Unpack", UnpackGrad);
+
+}  // anonymous namespace
+}  // namespace ops
+}  // namespace tensorflow
diff --git a/tensorflow/cc/gradients/array_grad_test.cc b/tensorflow/cc/gradients/array_grad_test.cc
new file mode 100644
index 0000000000..8166f64fed
--- /dev/null
+++ b/tensorflow/cc/gradients/array_grad_test.cc
@@ -0,0 +1,111 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/cc/framework/grad_op_registry.h"
+#include "tensorflow/cc/framework/testutil.h"
+#include "tensorflow/cc/gradients/grad_testutil.h"
+#include "tensorflow/cc/ops/standard_ops.h"
+#include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+
+namespace tensorflow {
+using namespace ops;  // NOLINT(build/namespaces)
+
+namespace {
+
+class PackGradTest : public ::testing::Test {
+ protected:
+  PackGradTest() : scope_(Scope::NewRootScope()) {}
+
+  void CheckGrad(const Output& grad_input, const int axis) {
+    auto a = ops::Const(scope_, 1, {2, 3});
+    auto b = ops::Const(scope_, 2, {2, 3});
+
+    auto pack = Pack(scope_, {a, b}, Pack::Axis(axis));
+    TF_ASSERT_OK(scope_.status());
+
+    std::vector<Output> grad_outputs;
+    TF_ASSERT_OK(test::CallGradFunction(scope_, Operation(pack.node()),
+                                        {grad_input}, &grad_outputs));
+
+    std::vector<Tensor> outputs;
+    test::GetTensors(scope_, {grad_outputs[0], grad_outputs[1]}, &outputs);
+
+    test::ExpectTensorEqual<int>(
+        outputs[0], test::AsTensor<int>({1, 2, 3, 4, 5, 6}, {2, 3}));
+    test::ExpectTensorEqual<int>(
+        outputs[1], test::AsTensor<int>({7, 8, 9, 10, 11, 12}, {2, 3}));
+  }
+
+  Scope scope_;
+};
+
+TEST_F(PackGradTest, Axis0) {
+  CheckGrad(
+      ops::Const(scope_, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, {2, 2, 3}),
+      0);
+}
+
+TEST_F(PackGradTest, Axis1) {
+  CheckGrad(
+      ops::Const(scope_, {1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12}, {2, 2, 3}),
+      1);
+}
+
+class UnpackGradTest : public ::testing::Test {
+ protected:
+  UnpackGradTest() : scope_(Scope::NewRootScope()) {}
+
+  void CheckGrad(const std::vector<Output>& grad_inputs, const int num,
+                 const int axis) {
+    auto a = ops::Const(scope_, 1, {4, 2, 3});
+
+    auto unpack = Unpack(scope_, a, num, Unpack::Axis(axis));
+    TF_ASSERT_OK(scope_.status());
+
+    std::vector<Output> grad_outputs;
+    TF_ASSERT_OK(test::CallGradFunction(scope_, Operation(unpack[0].node()),
+                                        grad_inputs, &grad_outputs));
+
+    Tensor expected_output(DT_INT32, {4, 2, 3});
+    test::FillIota<int32>(&expected_output, 1);
+
+    Tensor output;
+    test::GetTensor(scope_, grad_outputs[0], &output);
+
+    test::ExpectTensorEqual<int>(output, expected_output);
+  }
+
+  Scope scope_;
+};
+
+TEST_F(UnpackGradTest, Axis0) {
+  auto g0 = ops::Const(scope_, {1, 2, 3, 4, 5, 6}, {2, 3});
+  auto g1 = ops::Const(scope_, {7, 8, 9, 10, 11, 12}, {2, 3});
+  auto g2 = ops::Const(scope_, {13, 14, 15, 16, 17, 18}, {2, 3});
+  auto g3 = ops::Const(scope_, {19, 20, 21, 22, 23, 24}, {2, 3});
+  CheckGrad({g0, g1, g2, g3}, 4, 0);
+}
+
+TEST_F(UnpackGradTest, Axis1) {
+  auto g0 =
+      ops::Const(scope_, {{1, 2, 3}, {7, 8, 9}, {13, 14, 15}, {19, 20, 21}});
+  auto g1 =
+      ops::Const(scope_, {{4, 5, 6}, {10, 11, 12}, {16, 17, 18}, {22, 23, 24}});
+  CheckGrad({g0, g1}, 2, 1);
+}
+
+}  // namespace
+}  // namespace tensorflow
diff --git a/tensorflow/cc/gradients/grad_testutil.cc b/tensorflow/cc/gradients/grad_testutil.cc
new file mode 100644
index 0000000000..04b29d4e8b
--- /dev/null
+++ b/tensorflow/cc/gradients/grad_testutil.cc
@@ -0,0 +1,37 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/cc/gradients/grad_testutil.h"
+
+#include "tensorflow/cc/framework/grad_op_registry.h"
+
+namespace tensorflow {
+using namespace ops;  // NOLINT(build/namespaces)
+
+namespace test {
+
+Status CallGradFunction(const Scope& scope, const Operation& op,
+                        const std::vector<Output>& grad_inputs,
+                        std::vector<Output>* grad_outputs) {
+  GradFunc grad_fn;
+  TF_RETURN_IF_ERROR(
+      GradOpRegistry::Global()->Lookup(op.node()->type_string(), &grad_fn));
+  TF_RETURN_IF_ERROR(grad_fn(scope, op, grad_inputs, grad_outputs));
+  TF_RETURN_IF_ERROR(scope.status());
+  return Status::OK();
+}
+
+}  // end namespace test
+}  // end namespace tensorflow
diff --git a/tensorflow/cc/gradients/grad_testutil.h b/tensorflow/cc/gradients/grad_testutil.h
new file mode 100644
index 0000000000..7a925f9b0e
--- /dev/null
+++ b/tensorflow/cc/gradients/grad_testutil.h
@@ -0,0 +1,35 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_CC_GRADIENTS_GRAD_TESTUTIL_H_
+#define THIRD_PARTY_TENSORFLOW_CC_GRADIENTS_GRAD_TESTUTIL_H_
+
+#include "tensorflow/cc/framework/ops.h"
+#include "tensorflow/cc/framework/scope.h"
+
+namespace tensorflow {
+namespace test {
+
+// Calls the gradient function registered for 'op', adding gradient operations
+// to the graph associated with 'scope'. Gradient outputs for each 'op' input
+// are returned in 'grad_outputs'.
+Status CallGradFunction(const Scope& scope, const ops::Operation& op,
+                        const std::vector<ops::Output>& grad_inputs,
+                        std::vector<ops::Output>* grad_outputs);
+
+}  // namespace test
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CC_GRADIENTS_GRAD_TESTUTIL_H_
diff --git a/tensorflow/cc/gradients/math_grad_test.cc b/tensorflow/cc/gradients/math_grad_test.cc
index 6961c584a5..d10a96a4ab 100644
--- a/tensorflow/cc/gradients/math_grad_test.cc
+++ b/tensorflow/cc/gradients/math_grad_test.cc
@@ -14,14 +14,12 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/cc/framework/grad_op_registry.h"
+#include "tensorflow/cc/framework/testutil.h"
+#include "tensorflow/cc/gradients/grad_testutil.h"
 #include "tensorflow/cc/ops/standard_ops.h"
-#include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/framework/tensor_testutil.h"
-#include "tensorflow/core/graph/default_device.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/lib/random/random.h"
-#include "tensorflow/core/platform/test.h"
-#include "tensorflow/core/public/session.h"
 
 namespace tensorflow {
 using namespace ops;  // NOLINT(build/namespaces)
@@ -33,31 +31,22 @@ namespace {
 // to a testutil library.
 class MathGradTest : public ::testing::Test {
  protected:
-  MathGradTest() : root_(Scope::NewRootScope()) {}
+  MathGradTest() : root_(Scope::NewRootScope().WithDevice("/cpu:0")) {}
 
   void ComputeMatMulGrad(const Output& x, const bool t_x, const Output& y,
                          const bool t_y, const Output& dz,
                          std::vector<Tensor>* out) {
     // Compute forward MatMul: z = MatMul(x, y).
     auto z = MatMul(root_, x, y, MatMul::TransposeA(t_x).TransposeB(t_y));
-    TF_EXPECT_OK(root_.status());
+    TF_ASSERT_OK(root_.status());
     CHECK_NOTNULL(z.node());
     std::vector<Output> grad_outputs;
     // Call MatMulGrad which populates 'grad_outputs'.
-    CallGradFunction(Operation(z.node()), {dz}, &grad_outputs);
-    EXPECT_EQ(2, grad_outputs.size());
+    TF_ASSERT_OK(test::CallGradFunction(root_, Operation(z.node()), {dz},
+                                        &grad_outputs));
+    ASSERT_EQ(2, grad_outputs.size());
     // Run graph and return MatMul gradient tensors for 'dx' and 'dy' in 'out'.
-    GetTensors(root_, {grad_outputs[0], grad_outputs[1]}, out);
-  }
-
-  void CallGradFunction(const Operation& op,
-                        const std::vector<Output>& grad_inputs,
-                        std::vector<Output>* grad_outputs) {
-    GradFunc grad_fn;
-    TF_EXPECT_OK(
-        GradOpRegistry::Global()->Lookup(op.node()->type_string(), &grad_fn));
-    TF_EXPECT_OK(grad_fn(root_, op, grad_inputs, grad_outputs));
-    TF_EXPECT_OK(root_.status());
+    test::GetTensors(root_, {grad_outputs[0], grad_outputs[1]}, out);
   }
 
   Tensor ComputeMatMul(const Output& x, const bool t_x, const Output& y,
@@ -65,7 +54,7 @@ class MathGradTest : public ::testing::Test {
     auto z = MatMul(root_, x, y, MatMul::TransposeA(t_x).TransposeB(t_y));
     TF_EXPECT_OK(root_.status());
     Tensor out;
-    GetTensor(root_, z, &out);
+    test::GetTensor(root_, z, &out);
     return out;
   }
 
@@ -95,32 +84,6 @@ class MathGradTest : public ::testing::Test {
 
   int Rand() { return 1 + (random::New64() % 10); }
 
-  // TODO(andydavis) Move 'GetTensors/GetTensor' to some testutil class.
-  // Note: they should be moved to a general/non-grad specific testutil class.
-  void GetTensors(const Scope& scope, OutputList tensors,
-                  std::vector<Tensor>* out) {
-    SessionOptions options;
-    std::unique_ptr<Session> session(NewSession(options));
-    GraphDef def;
-    scope.graph()->ToGraphDef(&def);
-
-    graph::SetDefaultDevice("/cpu:0", &def);
-
-    TF_CHECK_OK(session->Create(def));
-    std::vector<string> names;
-    for (const auto& t : tensors) {
-      names.push_back(strings::StrCat(t.node()->name(), ":", t.index()));
-    }
-    TF_CHECK_OK(session->Run({}, names, {}, out));
-    TF_CHECK_OK(session->Close());
-  }
-
-  void GetTensor(const Scope& scope, Output tensor, Tensor* out) {
-    std::vector<Tensor> outputs;
-    GetTensors(scope, {tensor}, &outputs);
-    *out = outputs[0];
-  }
-
   Scope root_;
 };
 
-- 
cgit v1.2.3