Enforce uniqueness of custom prefixes for gradients

author: karl@kubx.ca <karl@kubx.ca> 2018-07-16 00:49:06 -0400
committer: karl@kubx.ca <karl@kubx.ca> 2018-07-25 21:10:30 -0400
commit: a278365e8848f5fcbccb42f95a3c523367c1602f (patch)
tree: 368044bbb27312ea44e0856548b84d260fc304d0
parent: 7ebdc9834bbc583bcc42551b660c8ed256ea7416 (diff)
10 files changed, 98 insertions, 76 deletions
diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc
index 32b0b70620..c1f4745e56 100644
--- a/tensorflow/c/c_api.cc
+++ b/tensorflow/c/c_api.cc
@@ -2411,9 +2411,24 @@ void TF_AddGradientsWithPrefix(TF_Graph* g, const char* prefix, TF_Output* y,
 
     const int first_new_node_id = g->graph.num_node_ids();
 
+    const char* child_scope_name = prefix;
+    if (child_scope_name != nullptr) {
+      // The operation should fail if the provided name prefix has already been
+      // used in this graph
+      for (const auto& pair: g->name_map) {
+        const string& name = pair.first;
+        if (name.compare(0, name.find_last_of('/'), prefix) == 0) {
+          status->status =
+              InvalidArgument("Duplicate node name in graph: '", prefix, "'");
+          return;
+        }
+      }
+    } else {
+      child_scope_name = "gradients";
+    }
     tensorflow::Scope scope =
         NewInternalScope(&g->graph, &status->status, &g->refiner)
-            .NewSubScope(prefix != nullptr ? prefix : "gradients");
+            .NewSubScope(child_scope_name);
 
     if (dx != nullptr) {
       std::vector<tensorflow::Output> dx_arg = OutputsFromTFOutputs(dx, ny);
diff --git a/tensorflow/c/c_api.h b/tensorflow/c/c_api.h
index 8e49158957..0a9fa9ddbc 100644
--- a/tensorflow/c/c_api.h
+++ b/tensorflow/c/c_api.h
@@ -1129,17 +1129,36 @@ TF_CAPI_EXPORT extern void TF_FinishWhile(const TF_WhileParams* params,
 // called after a successful TF_NewWhile() call.
 TF_CAPI_EXPORT extern void TF_AbortWhile(const TF_WhileParams* params);
 
-// Adds operations to compute the partial derivatives of sum of `y`s w.r.t `x`s.
+// Adds operations to compute the partial derivatives of sum of `y`s w.r.t `x`s,
+// i.e., d(y_1 + y_2 + ...)/dx_1, d(y_1 + y_2 + ...)/dx_2...
+//
+// `dx` are used as initial gradients (which represent the symbolic partial
+// derivatives of some loss function `L` w.r.t. `y`).
+// `dx` must be nullptr or have size `ny`.
+// If `dx` is nullptr, the implementation will use dx of `OnesLike` for all
+// shapes in `y`.
+// The partial derivatives are returned in `dy`. `dy` should be allocated to
+// size `nx`.
 //
-// This method is the equivalent of calling TF_AddGradientsWithPrefix with a
-// nullptr prefix (which will create all gradients operations under "gradients/"
-// by default). See TF_AddGradientsWithPrefix for more details.
+// Gradient nodes are automatically named under the "gradients/" prefix. To
+// guarantee name uniqueness, subsequent calls to the same graph will
+// append an incremental tag to the prefix: "gradients_1/", "gradients_2/", ...
+// See TF_AddGradientsWithPrefix, which provides a means to specify a custom
+// name prefix for operations added to a graph to compute the gradients.
+//
+// WARNING: This function does not yet support all the gradients that python
+// supports. See
+// https://www.tensorflow.org/code/tensorflow/cc/gradients/README.md
+// for instructions on how to add C++ more gradients.
 TF_CAPI_EXPORT void TF_AddGradients(TF_Graph* g, TF_Output* y, int ny,
                                     TF_Output* x, int nx, TF_Output* dx,
                                     TF_Status* status, TF_Output* dy);
 
 // Adds operations to compute the partial derivatives of sum of `y`s w.r.t `x`s,
 // i.e., d(y_1 + y_2 + ...)/dx_1, d(y_1 + y_2 + ...)/dx_2...
+// This is a variant of TF_AddGradients that allows to caller to pass a custom
+// name prefix to the operations added to a graph to compute the gradients.
+//
 // `dx` are used as initial gradients (which represent the symbolic partial
 // derivatives of some loss function `L` w.r.t. `y`).
 // `dx` must be nullptr or have size `ny`.
@@ -1148,7 +1167,9 @@ TF_CAPI_EXPORT void TF_AddGradients(TF_Graph* g, TF_Output* y, int ny,
 // The partial derivatives are returned in `dy`. `dy` should be allocated to
 // size `nx`.
 // `prefix` names the scope into which all gradients operations are being added.
-// If `prefix` is nullptr, "gradients" is used by default.
+// `prefix` must be unique within the provided graph otherwise this operation
+// will fail. If `prefix` is nullptr, the default prefixing behaviour takes
+// place, see TF_AddGradients for more details.
 //
 // WARNING: This function does not yet support all the gradients that python
 // supports. See
diff --git a/tensorflow/c/c_api_test.cc b/tensorflow/c/c_api_test.cc
index adcdefbaf3..7094d5d32d 100644
--- a/tensorflow/c/c_api_test.cc
+++ b/tensorflow/c/c_api_test.cc
@@ -1474,19 +1474,17 @@ class CApiGradientsTest : public ::testing::Test {
     TF_DeleteStatus(s_);
   }
 
-  void TestGradientsSuccess(bool grad_inputs_provided,
-      const char* prefix = nullptr) {
+  void TestGradientsSuccess(bool grad_inputs_provided) {
     TF_Output inputs[2];
     TF_Output outputs[1];
     TF_Output grad_outputs[2];
     TF_Output expected_grad_outputs[2];
 
     BuildSuccessGraph(inputs, outputs);
-    BuildExpectedGraph(grad_inputs_provided, prefix, expected_grad_outputs);
+    BuildExpectedGraph(grad_inputs_provided, expected_grad_outputs);
 
-    AddGradients(grad_inputs_provided, prefix, inputs, 2, outputs, 1,
+    AddGradients(grad_inputs_provided, nullptr, inputs, 2, outputs, 1,
                  grad_outputs);
-
     EXPECT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
 
     // Compare that the graphs match.
@@ -1604,7 +1602,6 @@ class CApiGradientsTest : public ::testing::Test {
   }
 
   void BuildExpectedGraph(bool grad_inputs_provided,
-                          const char* grad_prefix,
                           TF_Output* expected_grad_outputs) {
     // The expected graph looks like this if grad_inputs_provided.
     // If grad_inputs_provided is false, Const_0 will be a OnesLike op.
@@ -1633,10 +1630,6 @@ class CApiGradientsTest : public ::testing::Test {
     //
     const float const0_val[] = {1.0, 2.0, 3.0, 4.0};
     const float const1_val[] = {1.0, 0.0, 0.0, 1.0};
-    const char* prefix = grad_prefix;
-    if (prefix == nullptr) {
-      prefix = "gradients";
-    }
     TF_Operation* const0 =
         FloatConst2x2(expected_graph_, s_, const0_val, "Const_0");
     TF_Operation* const1 =
@@ -1649,14 +1642,13 @@ class CApiGradientsTest : public ::testing::Test {
       const float const3_val[] = {1.0, 1.0, 1.0, 1.0};
       const3 = FloatConst2x2(expected_graph_, s_, const3_val, "GradInputs");
     } else {
-      const3 = OnesLike(expected_graph_, s_, matmul,
-          strings::StrCat(prefix, "/OnesLike").c_str());
+      const3 = OnesLike(expected_graph_, s_, matmul, "gradients/OnesLike");
     }
 
     TF_Operation* matmul1 = MatMul(expected_graph_, s_, const3, const1,
-        strings::StrCat(prefix, "/MatMul").c_str(), false, true);
+                                   "gradients/MatMul", false, true);
     TF_Operation* matmul2 = MatMul(expected_graph_, s_, const0, const3,
-        strings::StrCat(prefix, "/MatMul_1").c_str(), true, false);
+                                   "gradients/MatMul_1", true, false);
     expected_grad_outputs[0] = {matmul1, 0};
     expected_grad_outputs[1] = {matmul2, 0};
   }
@@ -1727,10 +1719,6 @@ TEST_F(CApiGradientsTest, Gradients_NoGradInputs) {
   TestGradientsSuccess(false);
 }
 
-TEST_F(CApiGradientsTest, Gradients_NoGradInputsWithScopeName) {
-  TestGradientsSuccess(false, "gradscope");
-}
-
 TEST_F(CApiGradientsTest, OpWithNoGradientRegistered_GradInputs) {
   TestGradientsError(true);
 }
@@ -1739,6 +1727,22 @@ TEST_F(CApiGradientsTest, OpWithNoGradientRegistered_NoGradInputs) {
   TestGradientsError(false);
 }
 
+TEST_F(CApiGradientsTest, Gradients_WithPrefix) {
+  TF_Output inputs[2];
+  TF_Output outputs[1];
+  TF_Output grad_outputs[2];
+
+  BuildSuccessGraph(inputs, outputs);
+  AddGradients(false, "mygrads", inputs, 2, outputs, 1, grad_outputs);
+  EXPECT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+
+  AddGradients(false, "mygrads_1", inputs, 2, outputs, 1, grad_outputs);
+  ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+
+  AddGradients(false, "mygrads_1", inputs, 2, outputs, 1, grad_outputs);
+  ASSERT_EQ(TF_INVALID_ARGUMENT, TF_GetCode(s_)) << TF_Message(s_);
+}
+
 void ScalarFloatFromTensor(const TF_Tensor* t, float* f) {
   ASSERT_TRUE(t != nullptr);
   ASSERT_EQ(TF_FLOAT, TF_TensorType(t));
diff --git a/tensorflow/java/src/main/java/org/tensorflow/Graph.java b/tensorflow/java/src/main/java/org/tensorflow/Graph.java
index 353092701b..32853c1367 100644
--- a/tensorflow/java/src/main/java/org/tensorflow/Graph.java
+++ b/tensorflow/java/src/main/java/org/tensorflow/Graph.java
@@ -152,8 +152,14 @@ public final class Graph implements AutoCloseable {
    * <p>
    * If {@code dx} is null, the implementation will use dx of {@link org.tensorflow.op.core.OnesLike OnesLike} for all
    * shapes in {@code y}.
+   * <p>
+   * {@code prefix} is used as the name prefix applied to all nodes added to the graph to compute gradients. It must
+   * be unique within the provided graph or the operation will fail. 
+   * <p>
+   * If {@code prefix} is null, then the nodes will be added to under the default prefix, which is "gradients" for the 
+   * first invocation, then "gradients_1", "gradients_2", etc. for any subsequent calls to the same graph.
    * 
-   * @param prefix string prefix applied to names of nodes added to the graph to compute gradients.
+   * @param prefix unique string prefix applied before the names of nodes added to the graph to compute gradients.
    *               If null, defaults to "gradients".
    * @param y output of the function to derive
    * @param x inputs of the function for which partial derivatives are computed
diff --git a/tensorflow/java/src/main/java/org/tensorflow/op/NameScope.java b/tensorflow/java/src/main/java/org/tensorflow/op/NameScope.java
index 95a2a2f9f5..2e84cac1ac 100644
--- a/tensorflow/java/src/main/java/org/tensorflow/op/NameScope.java
+++ b/tensorflow/java/src/main/java/org/tensorflow/op/NameScope.java
@@ -56,10 +56,6 @@ final class NameScope {
     String actualName = (opName != null) ? opName : name;
     return fullyQualify(makeUnique(actualName));
   }
-  
-  String opPrefix() {
-    return opPrefix;
-  }
 
   /**
    * Create a new, root-level namescope.
diff --git a/tensorflow/java/src/main/java/org/tensorflow/op/Scope.java b/tensorflow/java/src/main/java/org/tensorflow/op/Scope.java
index cf0b3d98c1..563ea66ef1 100644
--- a/tensorflow/java/src/main/java/org/tensorflow/op/Scope.java
+++ b/tensorflow/java/src/main/java/org/tensorflow/op/Scope.java
@@ -135,17 +135,8 @@ public final class Scope {
    * }</pre>
    *
    * <p><b>Note:</b> if you provide a composite operator building class (i.e, a class that adds a
-   * set of related operations to the graph by calling other operator building code) you should also
-   * create a {@link #withSubScope(String)} scope for the underlying operators to group them under a
-   * meaningful name.
-   *
-   * <pre>{@code
-   * public static Stddev create(Scope scope, ...) {
-   *   // group sub-operations under a common name
-   *   Scope group = scope.withSubScope("stddev");
-   *   ... Sqrt.create(group, Mean.create(group, ...))
-   * }
-   * }</pre>
+   * set of related operations to the graph by calling other operator building code), the provided name 
+   * will act as a subscope to all underlying operators.
    *
    * @param defaultName name for the underlying operator.
    * @return unique name for the operator.
@@ -154,19 +145,6 @@ public final class Scope {
   public String makeOpName(String defaultName) {
     return nameScope.makeOpName(defaultName);
   }
-  
-  /**
-   * The name prefix of this scope.
-   * <p>
-   * This value is the combination of the name of this scope and all of its parents, seperated by a '/', e.g.
-   * <pre>{@code
-   * Scope scope = new Scope(graph);
-   * assertEquals(scope.withSubScope("sub1").withSubScope("sub2").prefix(), "sub1/sub2");
-   * }</pre>
-   */
-  public String prefix() {
-    return nameScope.opPrefix();
-  }
 
   private Scope(Graph graph, NameScope nameScope) {
     this.graph = graph;
diff --git a/tensorflow/java/src/main/java/org/tensorflow/op/core/Gradients.java b/tensorflow/java/src/main/java/org/tensorflow/op/core/Gradients.java
index 6d71ddfff0..5432ff244e 100644
--- a/tensorflow/java/src/main/java/org/tensorflow/op/core/Gradients.java
+++ b/tensorflow/java/src/main/java/org/tensorflow/op/core/Gradients.java
@@ -88,7 +88,11 @@ public class Gradients implements Op, Iterable<Operand<?>> {
         }
       }
     }
-    Output<?>[] dy = scope.graph().addGradients(scope.prefix(), Operands.asOutputs(y), Operands.asOutputs(x), dx);
+    Output<?>[] dy = scope.graph().addGradients(
+        scope.makeOpName("Gradients"), 
+        Operands.asOutputs(y), 
+        Operands.asOutputs(x), 
+        dx);
     return new Gradients(Arrays.asList(dy));
   }
 
diff --git a/tensorflow/java/src/test/java/org/tensorflow/GraphTest.java b/tensorflow/java/src/test/java/org/tensorflow/GraphTest.java
index c02336aebe..56c8f22daa 100644
--- a/tensorflow/java/src/test/java/org/tensorflow/GraphTest.java
+++ b/tensorflow/java/src/test/java/org/tensorflow/GraphTest.java
@@ -239,8 +239,20 @@ public class GraphTest {
       Output<?>[] grad0 = g.addGradients(null, toArray(y0), toArray(x), null);
       assertTrue(grad0[0].op().name().startsWith("gradients/"));
 
-      Output<?>[] grad1 = g.addGradients("more_gradients", toArray(y0), toArray(x), null);
-      assertTrue(grad1[0].op().name().startsWith("more_gradients/"));
+      Output<?>[] grad1 = g.addGradients(null, toArray(y0), toArray(x), null);
+      assertTrue(grad1[0].op().name().startsWith("gradients_1/"));
+
+      Output<?>[] grad2 = g.addGradients("more_gradients", toArray(y0), toArray(x), null);
+      assertTrue(grad2[0].op().name().startsWith("more_gradients/"));
+
+      Output<?>[] grad3 = g.addGradients("even_more_gradients", toArray(y0), toArray(x), null);
+      assertTrue(grad3[0].op().name().startsWith("even_more_gradients/"));
+
+      try {
+        g.addGradients("even_more_gradients", toArray(y0), toArray(x), null);
+      } catch (IllegalArgumentException e) {
+        // expected exception
+      }
     }
   }
   
diff --git a/tensorflow/java/src/test/java/org/tensorflow/op/ScopeTest.java b/tensorflow/java/src/test/java/org/tensorflow/op/ScopeTest.java
index 2fb2c1df48..0e9c7df697 100644
--- a/tensorflow/java/src/test/java/org/tensorflow/op/ScopeTest.java
+++ b/tensorflow/java/src/test/java/org/tensorflow/op/ScopeTest.java
@@ -17,7 +17,6 @@ package org.tensorflow.op;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNull;
 import static org.junit.Assert.fail;
 
 import java.util.HashMap;
@@ -183,20 +182,6 @@ public class ScopeTest {
       assertEquals(21704, result.intValue());
     }
   }
-  
-  @Test
-  public void prefix() {
-    try (Graph g = new Graph()) {
-      Scope s = new Scope(g);
-      assertNull(s.prefix());
-      
-      Scope sub1 = s.withSubScope("sub1");
-      assertEquals("sub1", sub1.prefix());
-
-      Scope sub2 = sub1.withSubScope("sub2");
-      assertEquals("sub1/sub2", sub2.prefix());
-    }
-  }
 
   // "handwritten" sample operator classes
   private static final class Const<T> {
diff --git a/tensorflow/java/src/test/java/org/tensorflow/op/core/GradientsTest.java b/tensorflow/java/src/test/java/org/tensorflow/op/core/GradientsTest.java
index 2ffc69c209..b75f79a421 100644
--- a/tensorflow/java/src/test/java/org/tensorflow/op/core/GradientsTest.java
+++ b/tensorflow/java/src/test/java/org/tensorflow/op/core/GradientsTest.java
@@ -108,17 +108,18 @@ public class GradientsTest {
   }
 
   @Test
-  public void createGradientsWithScopeName() {
+  public void validateGradientsNames() {
     try (Graph g = new Graph()) {
-      Scope scope = new Scope(g);
+      Scope scope = new Scope(g).withSubScope("sub");
 
       Output<Float> x = TestUtil.placeholder(g, "x1", Float.class);
       Output<Float> y = TestUtil.square(g, "y", x);
       
-      Scope gradScope = scope.withSubScope("grads").withSubScope("test");
-      Gradients grads = Gradients.create(gradScope, y, Arrays.asList(x));
+      Gradients grad0 = Gradients.create(scope, y, Arrays.asList(x));
+      assertTrue(grad0.dy(0).op().name().startsWith("sub/Gradients/"));
       
-      assertTrue(grads.dy(0).op().name().startsWith("grads/test/"));
+      Gradients grad1 = Gradients.create(scope.withName("MyGradients"), y, Arrays.asList(x));
+      assertTrue(grad1.dy(0).op().name().startsWith("sub/MyGradients/"));
     }
   }
 }
author	karl@kubx.ca <karl@kubx.ca>	2018-07-16 00:49:06 -0400
committer	karl@kubx.ca <karl@kubx.ca>	2018-07-25 21:10:30 -0400
commit	a278365e8848f5fcbccb42f95a3c523367c1602f (patch)
tree	368044bbb27312ea44e0856548b84d260fc304d0
parent	7ebdc9834bbc583bcc42551b660c8ed256ea7416 (diff)