From 2b303fddafec6b96a6868aaa76f55cc392b96586 Mon Sep 17 00:00:00 2001 From: "karl@kubx.ca" Date: Mon, 25 Jun 2018 22:12:24 -0400 Subject: Add scope name to TF_AddGradients --- .../java/src/main/java/org/tensorflow/Graph.java | 17 +++++++++-------- .../src/main/java/org/tensorflow/op/NameScope.java | 4 ++++ .../java/src/main/java/org/tensorflow/op/Scope.java | 13 +++++++++++++ .../main/java/org/tensorflow/op/core/Gradients.java | 4 ++-- tensorflow/java/src/main/native/graph_jni.cc | 14 +++++++++++--- tensorflow/java/src/main/native/graph_jni.h | 6 +++--- .../java/src/test/java/org/tensorflow/GraphTest.java | 19 +++++++++++++++++-- .../src/test/java/org/tensorflow/op/ScopeTest.java | 17 +++++++++++++++++ 8 files changed, 76 insertions(+), 18 deletions(-) (limited to 'tensorflow/java') diff --git a/tensorflow/java/src/main/java/org/tensorflow/Graph.java b/tensorflow/java/src/main/java/org/tensorflow/Graph.java index 7d19696749..f2bd3e99a5 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/Graph.java +++ b/tensorflow/java/src/main/java/org/tensorflow/Graph.java @@ -153,12 +153,13 @@ public final class Graph implements AutoCloseable { * If {@code dx} is null, the implementation will use dx of {@link org.tensorflow.op.core.OnesLike OnesLike} for all * shapes in {@code y}. * + * @param scopeName name of the subscope into which gradients operations are added. If null, defaults to "gradients". * @param y output of the function to derive * @param x inputs of the function for which partial derivatives are computed * @param dx if not null, the partial derivatives of some loss function {@code L} w.r.t. {@code y} * @return the partial derivatives {@code dy} with the size of {@code x} */ - public Output[] addGradients(Output[] y, Output[] x, Output[] dx) { + public Output[] addGradients(String scopeName, Output[] y, Output[] x, Output[] dx) { Output[] dy = new Output[x.length]; final long[] yHandles = new long[y.length]; final int[] yIndices = new int[y.length]; @@ -185,12 +186,12 @@ public final class Graph implements AutoCloseable { dxIndices[i] = dx[i].index(); } } - // Gradient outputs are returned in two continuous arrays concatenated into one. The first holds the native handles - // of the gradient operations while the second holds the index of their output - // e.g. given xHandles = [x0Handle, x1Handle, ...] and xIndices = [x0Index, x1Index, ..], we obtain + // Gradient outputs are returned in two continuous arrays concatenated into one. The first holds the native + // handles of the gradient operations while the second holds the index of their output e.g. given + // xHandles = [x0Handle, x1Handle, ...] and xIndices = [x0Index, x1Index, ..], we obtain // dy = [dy0Handle, dy1Handle, ..., dy0Index, dy1Index, ...] long[] dyHandlesAndIndices = - addGradients(ref.nativeHandle(), yHandles, yIndices, xHandles, xIndices, dxHandles, dxIndices); + addGradients(ref.nativeHandle(), scopeName, yHandles, yIndices, xHandles, xIndices, dxHandles, dxIndices); int ndy = dyHandlesAndIndices.length >> 1; if (ndy != dy.length) { throw new IllegalStateException(String.valueOf(ndy) + " gradients were added to the graph when " + dy.length @@ -209,14 +210,14 @@ public final class Graph implements AutoCloseable { * i.e., {@code dy/dx_1, dy/dx_2...} *

* This is a simplified version of {@link #addGradients(Output[], Output[], Output[]) where {@code y} is - * a single output and {@code dx} is null. + * a single output, {@code dx} is null and {@code scopeName} is null. * * @param y output of the function to derive * @param x inputs of the function for which partial derivatives are computed * @return the partial derivatives {@code dy} with the size of {@code x} */ public Output[] addGradients(Output y, Output[] x) { - return addGradients(new Output[]{y}, x, null); + return addGradients(null, new Output[]{y}, x, null); } private final Object nativeHandleLock = new Object(); @@ -330,7 +331,7 @@ public final class Graph implements AutoCloseable { private static native byte[] toGraphDef(long handle); - private static native long[] addGradients(long handle, long[] inputHandles, int[] inputIndices, + private static native long[] addGradients(long handle, String scopeName, long[] inputHandles, int[] inputIndices, long[] outputHandles, int[] outputIndices, long[] gradInputHandles, int[] gradInputIndices); static { diff --git a/tensorflow/java/src/main/java/org/tensorflow/op/NameScope.java b/tensorflow/java/src/main/java/org/tensorflow/op/NameScope.java index 2e84cac1ac..92e05d2d6d 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/op/NameScope.java +++ b/tensorflow/java/src/main/java/org/tensorflow/op/NameScope.java @@ -56,6 +56,10 @@ final class NameScope { String actualName = (opName != null) ? opName : name; return fullyQualify(makeUnique(actualName)); } + + String prefix() { + return opPrefix; + } /** * Create a new, root-level namescope. diff --git a/tensorflow/java/src/main/java/org/tensorflow/op/Scope.java b/tensorflow/java/src/main/java/org/tensorflow/op/Scope.java index 8de2eaeb79..d1ab44c3b2 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/op/Scope.java +++ b/tensorflow/java/src/main/java/org/tensorflow/op/Scope.java @@ -154,6 +154,19 @@ public final class Scope { public String makeOpName(String defaultName) { return nameScope.makeOpName(defaultName); } + + /** + * The name prefix of this scope + *

+ * This value is the combination of the name of this scope and all of its parents, seperated by a '/', e.g. + *

{@code
+   * Scope scope = new Scope(graph);
+   * assertEquals(scope.withSubScope("sub1").withSubScope("sub2").prefix(), "sub1/sub2");
+   * }
+ */ + public String prefix() { + return nameScope.prefix(); + } private Scope(Graph graph, NameScope nameScope) { this.graph = graph; diff --git a/tensorflow/java/src/main/java/org/tensorflow/op/core/Gradients.java b/tensorflow/java/src/main/java/org/tensorflow/op/core/Gradients.java index f4671c8af9..d88dc3ba46 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/op/core/Gradients.java +++ b/tensorflow/java/src/main/java/org/tensorflow/op/core/Gradients.java @@ -88,8 +88,8 @@ public class Gradients implements Op, Iterable> { } } } - Output[] gradOutputs = scope.graph().addGradients(Operands.asOutputs(y), Operands.asOutputs(x), dx); - return new Gradients(Arrays.asList(gradOutputs)); + Output[] dy = scope.graph().addGradients(scope.prefix(), Operands.asOutputs(y), Operands.asOutputs(x), dx); + return new Gradients(Arrays.asList(dy)); } /** diff --git a/tensorflow/java/src/main/native/graph_jni.cc b/tensorflow/java/src/main/native/graph_jni.cc index dac6a345e9..a9b2ef6494 100644 --- a/tensorflow/java/src/main/native/graph_jni.cc +++ b/tensorflow/java/src/main/native/graph_jni.cc @@ -135,7 +135,7 @@ Java_org_tensorflow_Graph_toGraphDef(JNIEnv* env, jclass clazz, jlong handle) { JNIEXPORT jlongArray JNICALL Java_org_tensorflow_Graph_addGradients(JNIEnv* env, jclass clazz, jlong handle, - jlongArray y_handles, jintArray y_indices, + jstring scope_name, jlongArray y_handles, jintArray y_indices, jlongArray x_handles, jintArray x_indices, jlongArray dx_handles, jintArray dx_indices) { @@ -163,9 +163,17 @@ Java_org_tensorflow_Graph_addGradients(JNIEnv* env, jclass clazz, jlong handle, } if (env->ExceptionCheck()) return nullptr; + jboolean is_copy; + const char* cscope_name = nullptr; + if (scope_name != nullptr) { + cscope_name = env->GetStringUTFChars(scope_name, &is_copy); + } TF_Status* status = TF_NewStatus(); - TF_AddGradients(g, y.get(), ny, x.get(), nx, dx.get(), status, dy.get()); - + TF_AddGradients(g, cscope_name, y.get(), ny, x.get(), nx, dx.get(), status, + dy.get()); + if (scope_name != nullptr) { + env->ReleaseStringUTFChars(scope_name, cscope_name); + } if (!throwExceptionIfNotOK(env, status)) { TF_DeleteStatus(status); return nullptr; diff --git a/tensorflow/java/src/main/native/graph_jni.h b/tensorflow/java/src/main/native/graph_jni.h index 4f87e8d5a7..e483bf953b 100644 --- a/tensorflow/java/src/main/native/graph_jni.h +++ b/tensorflow/java/src/main/native/graph_jni.h @@ -76,11 +76,11 @@ JNIEXPORT jbyteArray JNICALL Java_org_tensorflow_Graph_toGraphDef(JNIEnv *, /* * Class: org_tensorflow_Graph * Method: name - * Signature: (J[J[I[J[I[J[I)[J + * Signature: (JLjava/lang/String;[J[I[J[I[J[I)[J */ JNIEXPORT jlongArray JNICALL Java_org_tensorflow_Graph_addGradients(JNIEnv *, - jclass, jlong, jlongArray, jintArray, jlongArray, jintArray, jlongArray, - jintArray); + jclass, jlong, jstring, jlongArray, jintArray, jlongArray, jintArray, + jlongArray, jintArray); #ifdef __cplusplus } // extern "C" diff --git a/tensorflow/java/src/test/java/org/tensorflow/GraphTest.java b/tensorflow/java/src/test/java/org/tensorflow/GraphTest.java index c2e52c22c6..c02336aebe 100644 --- a/tensorflow/java/src/test/java/org/tensorflow/GraphTest.java +++ b/tensorflow/java/src/test/java/org/tensorflow/GraphTest.java @@ -181,7 +181,7 @@ public class GraphTest { Output y0 = TestUtil.square(g, "y0", x); Output y1 = TestUtil.square(g, "y1", y0); - Output[] grad = g.addGradients(toArray(y0, y1), toArray(x), null); + Output[] grad = g.addGradients(null, toArray(y0, y1), toArray(x), null); assertNotNull(grad); assertEquals(1, grad.length); assertEquals(DataType.FLOAT, grad[0].dataType()); @@ -212,7 +212,7 @@ public class GraphTest { assertEquals(1, grad0.length); assertEquals(DataType.FLOAT, grad0[0].dataType()); - Output[] grad1 = g.addGradients(toArray(y0), toArray(x), toArray(grad0[0])); + Output[] grad1 = g.addGradients(null, toArray(y0), toArray(x), toArray(grad0[0])); assertNotNull(grad1); assertEquals(1, grad1.length); assertEquals(DataType.FLOAT, grad1[0].dataType()); @@ -229,6 +229,21 @@ public class GraphTest { } } + @Test + public void validateGradientsNames() { + try (Graph g = new Graph()) { + + Output x = TestUtil.placeholder(g, "x", Float.class); + Output y0 = TestUtil.square(g, "y0", x); + + Output[] grad0 = g.addGradients(null, toArray(y0), toArray(x), null); + assertTrue(grad0[0].op().name().startsWith("gradients/")); + + Output[] grad1 = g.addGradients("more_gradients", toArray(y0), toArray(x), null); + assertTrue(grad1[0].op().name().startsWith("more_gradients/")); + } + } + private static Output[] toArray(Output... outputs) { return outputs; } diff --git a/tensorflow/java/src/test/java/org/tensorflow/op/ScopeTest.java b/tensorflow/java/src/test/java/org/tensorflow/op/ScopeTest.java index 125de73554..2057007499 100644 --- a/tensorflow/java/src/test/java/org/tensorflow/op/ScopeTest.java +++ b/tensorflow/java/src/test/java/org/tensorflow/op/ScopeTest.java @@ -17,10 +17,12 @@ package org.tensorflow.op; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.util.HashMap; import java.util.Map; + import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; @@ -181,6 +183,21 @@ public class ScopeTest { assertEquals(21704, result.intValue()); } } + + @Test + public void prefix() { + try (Graph g = new Graph()) { + Scope s = new Scope(g); + assertNotNull(s.prefix()); + assertTrue(s.prefix().isEmpty()); + + Scope sub1 = s.withSubScope("sub1"); + assertEquals("sub1", sub1.prefix()); + + Scope sub2 = sub1.withSubScope("sub2"); + assertEquals("sub1/sub2", sub2.prefix()); + } + } // "handwritten" sample operator classes private static final class Const { -- cgit v1.2.3 From ab063cd57d7eda73bcbaf11d43f8b2e6708979a3 Mon Sep 17 00:00:00 2001 From: "karl@kubx.ca" Date: Fri, 6 Jul 2018 23:36:13 -0400 Subject: Add unit tests for Gradients --- tensorflow/java/BUILD | 13 +++ .../src/main/java/org/tensorflow/op/NameScope.java | 2 +- .../src/main/java/org/tensorflow/op/Scope.java | 2 +- .../java/org/tensorflow/op/core/Gradients.java | 12 +- .../src/test/java/org/tensorflow/TestUtil.java | 2 +- .../src/test/java/org/tensorflow/op/ScopeTest.java | 5 +- .../java/org/tensorflow/op/core/GradientsTest.java | 124 +++++++++++++++++++++ 7 files changed, 148 insertions(+), 12 deletions(-) create mode 100644 tensorflow/java/src/test/java/org/tensorflow/op/core/GradientsTest.java (limited to 'tensorflow/java') diff --git a/tensorflow/java/BUILD b/tensorflow/java/BUILD index 73e210fae0..7ceba3903d 100644 --- a/tensorflow/java/BUILD +++ b/tensorflow/java/BUILD @@ -292,6 +292,19 @@ tf_java_test( ], ) +tf_java_test( + name = "GradientsTest", + size = "small", + srcs = ["src/test/java/org/tensorflow/op/core/GradientsTest.java"], + javacopts = JAVACOPTS, + test_class = "org.tensorflow.op.core.GradientsTest", + deps = [ + ":tensorflow", + ":testutil", + "@junit", + ], +) + filegroup( name = "processor_test_resources", srcs = glob([ diff --git a/tensorflow/java/src/main/java/org/tensorflow/op/NameScope.java b/tensorflow/java/src/main/java/org/tensorflow/op/NameScope.java index 92e05d2d6d..95a2a2f9f5 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/op/NameScope.java +++ b/tensorflow/java/src/main/java/org/tensorflow/op/NameScope.java @@ -57,7 +57,7 @@ final class NameScope { return fullyQualify(makeUnique(actualName)); } - String prefix() { + String opPrefix() { return opPrefix; } diff --git a/tensorflow/java/src/main/java/org/tensorflow/op/Scope.java b/tensorflow/java/src/main/java/org/tensorflow/op/Scope.java index d1ab44c3b2..51a6ce8318 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/op/Scope.java +++ b/tensorflow/java/src/main/java/org/tensorflow/op/Scope.java @@ -165,7 +165,7 @@ public final class Scope { * } */ public String prefix() { - return nameScope.prefix(); + return nameScope.opPrefix(); } private Scope(Graph graph, NameScope nameScope) { diff --git a/tensorflow/java/src/main/java/org/tensorflow/op/core/Gradients.java b/tensorflow/java/src/main/java/org/tensorflow/op/core/Gradients.java index d88dc3ba46..6d71ddfff0 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/op/core/Gradients.java +++ b/tensorflow/java/src/main/java/org/tensorflow/op/core/Gradients.java @@ -59,12 +59,12 @@ public class Gradients implements Op, Iterable> { * @param dx partial derivatives of some loss function {@code L} w.r.t. {@code y} * @return this option builder */ - public Options dx(Iterable> dx) { + public Options dx(Iterable> dx) { this.dx = dx; return this; } - private Iterable> dx; + private Iterable> dx; private Options() { } @@ -79,7 +79,7 @@ public class Gradients implements Op, Iterable> { * @param options carries optional attributes values * @return a new instance of {@code Gradients} */ - public static Gradients create(Scope scope, Iterable> y, Iterable> x, Options... options) { + public static Gradients create(Scope scope, Iterable> y, Iterable> x, Options... options) { Output[] dx = null; if (options != null) { for (Options opts : options) { @@ -105,7 +105,7 @@ public class Gradients implements Op, Iterable> { * @return a new instance of {@code Gradients} */ @SuppressWarnings({"unchecked", "rawtypes"}) - public static Gradients create(Scope scope, Operand y, Iterable> x, Options... options) { + public static Gradients create(Scope scope, Operand y, Iterable> x, Options... options) { return create(scope, (Iterable) Arrays.asList(y), x, options); } @@ -113,7 +113,7 @@ public class Gradients implements Op, Iterable> { * @param dx partial derivatives of some loss function {@code L} w.r.t. {@code y} * @return builder to add more options to this operation */ - public Options dx(Iterable> dx) { + public static Options dx(Iterable> dx) { return new Options().dx(dx); } @@ -135,7 +135,7 @@ public class Gradients implements Op, Iterable> { *

* Warning: Does not check that the type of the tensor matches T. It is recommended to call * this method with an explicit type parameter rather than letting it be inferred, e.g. {@code - * gradients.dy(0)} + * gradients.dy(0)} * * @param The expected element type of the tensors produced by this output. * @param index The index of the output among the gradients added by this operation diff --git a/tensorflow/java/src/test/java/org/tensorflow/TestUtil.java b/tensorflow/java/src/test/java/org/tensorflow/TestUtil.java index 4e84886416..f984c508ee 100644 --- a/tensorflow/java/src/test/java/org/tensorflow/TestUtil.java +++ b/tensorflow/java/src/test/java/org/tensorflow/TestUtil.java @@ -24,7 +24,7 @@ public class TestUtil { public static final class AutoCloseableList extends ArrayList implements AutoCloseable { - AutoCloseableList(Collection c) { + public AutoCloseableList(Collection c) { super(c); } diff --git a/tensorflow/java/src/test/java/org/tensorflow/op/ScopeTest.java b/tensorflow/java/src/test/java/org/tensorflow/op/ScopeTest.java index 2057007499..2fb2c1df48 100644 --- a/tensorflow/java/src/test/java/org/tensorflow/op/ScopeTest.java +++ b/tensorflow/java/src/test/java/org/tensorflow/op/ScopeTest.java @@ -17,7 +17,7 @@ package org.tensorflow.op; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertNull; import static org.junit.Assert.fail; import java.util.HashMap; @@ -188,8 +188,7 @@ public class ScopeTest { public void prefix() { try (Graph g = new Graph()) { Scope s = new Scope(g); - assertNotNull(s.prefix()); - assertTrue(s.prefix().isEmpty()); + assertNull(s.prefix()); Scope sub1 = s.withSubScope("sub1"); assertEquals("sub1", sub1.prefix()); diff --git a/tensorflow/java/src/test/java/org/tensorflow/op/core/GradientsTest.java b/tensorflow/java/src/test/java/org/tensorflow/op/core/GradientsTest.java new file mode 100644 index 0000000000..2ffc69c209 --- /dev/null +++ b/tensorflow/java/src/test/java/org/tensorflow/op/core/GradientsTest.java @@ -0,0 +1,124 @@ +package org.tensorflow.op.core; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import java.util.Arrays; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; +import org.tensorflow.Graph; +import org.tensorflow.Output; +import org.tensorflow.Session; +import org.tensorflow.Tensor; +import org.tensorflow.Tensors; +import org.tensorflow.TestUtil; +import org.tensorflow.op.Scope; + +@RunWith(JUnit4.class) +public class GradientsTest { + + @Test + public void createGradients() { + try (Graph g = new Graph(); + Session sess = new Session(g)) { + Scope scope = new Scope(g); + + Output x = TestUtil.placeholder(g, "x1", Float.class); + Output y0 = TestUtil.square(g, "y0", x); + Output y1 = TestUtil.square(g, "y1", y0); + + Gradients grads = Gradients.create(scope, y1, Arrays.asList(x, y0)); + + assertNotNull(grads); + assertNotNull(grads.dy()); + assertEquals(2, grads.dy().size()); + + try (Tensor c = Tensors.create(3.0f); + TestUtil.AutoCloseableList> outputs = new TestUtil.AutoCloseableList<>( + sess.runner() + .feed(x, c) + .fetch(grads.dy(0)) + .fetch(grads.dy(1)) + .run())) { + + assertEquals(108.0f, outputs.get(0).floatValue(), 0.0f); + assertEquals(18.0f, outputs.get(1).floatValue(), 0.0f); + } + } + } + + @Test + public void createGradientsWithSum() { + try (Graph g = new Graph(); + Session sess = new Session(g)) { + Scope scope = new Scope(g); + + Output x = TestUtil.placeholder(g, "x1", Float.class); + Output y0 = TestUtil.square(g, "y0", x); + Output y1 = TestUtil.square(g, "y1", y0); + + Gradients grads = Gradients.create(scope, Arrays.asList(y0, y1), Arrays.asList(x)); + + assertNotNull(grads); + assertNotNull(grads.dy()); + assertEquals(1, grads.dy().size()); + + try (Tensor c = Tensors.create(3.0f); + TestUtil.AutoCloseableList> outputs = new TestUtil.AutoCloseableList<>( + sess.runner() + .feed(x, c) + .fetch(grads.dy(0)) + .run())) { + + assertEquals(114.0f, outputs.get(0).floatValue(), 0.0f); + } + } + } + + @Test + public void createGradientsWithInitialValues() { + try (Graph g = new Graph(); + Session sess = new Session(g)) { + Scope scope = new Scope(g); + + Output x = TestUtil.placeholder(g, "x1", Float.class); + Output y0 = TestUtil.square(g, "y0", x); + Output y1 = TestUtil.square(g, "y1", y0); + + Gradients grads0 = Gradients.create(scope, y1, Arrays.asList(y0)); + Gradients grads1 = Gradients.create(scope, y0, Arrays.asList(x), Gradients.dx(grads0.dy())); + + assertNotNull(grads1); + assertNotNull(grads1.dy()); + assertEquals(1, grads1.dy().size()); + + try (Tensor c = Tensors.create(3.0f); + TestUtil.AutoCloseableList> outputs = new TestUtil.AutoCloseableList<>( + sess.runner() + .feed(x, c) + .fetch(grads1.dy(0)) + .run())) { + + assertEquals(108.0f, outputs.get(0).floatValue(), 0.0f); + } + } + } + + @Test + public void createGradientsWithScopeName() { + try (Graph g = new Graph()) { + Scope scope = new Scope(g); + + Output x = TestUtil.placeholder(g, "x1", Float.class); + Output y = TestUtil.square(g, "y", x); + + Scope gradScope = scope.withSubScope("grads").withSubScope("test"); + Gradients grads = Gradients.create(gradScope, y, Arrays.asList(x)); + + assertTrue(grads.dy(0).op().name().startsWith("grads/test/")); + } + } +} -- cgit v1.2.3 From 7ebdc9834bbc583bcc42551b660c8ed256ea7416 Mon Sep 17 00:00:00 2001 From: "karl@kubx.ca" Date: Sun, 8 Jul 2018 00:21:45 -0400 Subject: 1st code review: rename 'scope_name' to 'prefix', etc. --- tensorflow/c/c_api.cc | 9 ++++-- tensorflow/c/c_api.h | 23 ++++++++++----- tensorflow/c/c_api_test.cc | 34 +++++++++++----------- tensorflow/c/while_loop_test.cc | 4 +-- .../java/src/main/java/org/tensorflow/Graph.java | 11 +++---- .../src/main/java/org/tensorflow/op/Scope.java | 2 +- tensorflow/java/src/main/native/graph_jni.cc | 17 +++++------ 7 files changed, 57 insertions(+), 43 deletions(-) (limited to 'tensorflow/java') diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc index 96653154e5..32b0b70620 100644 --- a/tensorflow/c/c_api.cc +++ b/tensorflow/c/c_api.cc @@ -2387,7 +2387,12 @@ void TF_FinishWhile(const TF_WhileParams* params, TF_Status* status, void TF_AbortWhile(const TF_WhileParams* params) { FreeWhileResources(params); } -void TF_AddGradients(TF_Graph* g, const char* scope_name, TF_Output* y, +void TF_AddGradients(TF_Graph* g, TF_Output* y, int ny, TF_Output* x, int nx, + TF_Output* dx, TF_Status* status, TF_Output* dy) { + TF_AddGradientsWithPrefix(g, nullptr, y, ny, x, nx, dx, status, dy); +} + +void TF_AddGradientsWithPrefix(TF_Graph* g, const char* prefix, TF_Output* y, int ny, TF_Output* x, int nx, TF_Output* dx, TF_Status* status, TF_Output* dy) { #ifdef __ANDROID__ @@ -2408,7 +2413,7 @@ void TF_AddGradients(TF_Graph* g, const char* scope_name, TF_Output* y, tensorflow::Scope scope = NewInternalScope(&g->graph, &status->status, &g->refiner) - .NewSubScope(scope_name != nullptr ? scope_name : "gradients"); + .NewSubScope(prefix != nullptr ? prefix : "gradients"); if (dx != nullptr) { std::vector dx_arg = OutputsFromTFOutputs(dx, ny); diff --git a/tensorflow/c/c_api.h b/tensorflow/c/c_api.h index e896f68ce0..8e49158957 100644 --- a/tensorflow/c/c_api.h +++ b/tensorflow/c/c_api.h @@ -1129,6 +1129,15 @@ TF_CAPI_EXPORT extern void TF_FinishWhile(const TF_WhileParams* params, // called after a successful TF_NewWhile() call. TF_CAPI_EXPORT extern void TF_AbortWhile(const TF_WhileParams* params); +// Adds operations to compute the partial derivatives of sum of `y`s w.r.t `x`s. +// +// This method is the equivalent of calling TF_AddGradientsWithPrefix with a +// nullptr prefix (which will create all gradients operations under "gradients/" +// by default). See TF_AddGradientsWithPrefix for more details. +TF_CAPI_EXPORT void TF_AddGradients(TF_Graph* g, TF_Output* y, int ny, + TF_Output* x, int nx, TF_Output* dx, + TF_Status* status, TF_Output* dy); + // Adds operations to compute the partial derivatives of sum of `y`s w.r.t `x`s, // i.e., d(y_1 + y_2 + ...)/dx_1, d(y_1 + y_2 + ...)/dx_2... // `dx` are used as initial gradients (which represent the symbolic partial @@ -1138,18 +1147,18 @@ TF_CAPI_EXPORT extern void TF_AbortWhile(const TF_WhileParams* params); // shapes in `y`. // The partial derivatives are returned in `dy`. `dy` should be allocated to // size `nx`. -// `scope_name` names the scope (or sub-scope) into which all gradients -// operations are added. If `scope_name` is nullptr, "gradients" is used by -// default. +// `prefix` names the scope into which all gradients operations are being added. +// If `prefix` is nullptr, "gradients" is used by default. // // WARNING: This function does not yet support all the gradients that python // supports. See // https://www.tensorflow.org/code/tensorflow/cc/gradients/README.md // for instructions on how to add C++ more gradients. -TF_CAPI_EXPORT void TF_AddGradients(TF_Graph* g, const char* scope_name, - TF_Output* y, int ny, - TF_Output* x, int nx, TF_Output* dx, - TF_Status* status, TF_Output* dy); +TF_CAPI_EXPORT void TF_AddGradientsWithPrefix(TF_Graph* g, const char* prefix, + TF_Output* y, int ny, + TF_Output* x, int nx, + TF_Output* dx, TF_Status* status, + TF_Output* dy); // Create a TF_Function from a TF_Graph // diff --git a/tensorflow/c/c_api_test.cc b/tensorflow/c/c_api_test.cc index 2fe9e91583..adcdefbaf3 100644 --- a/tensorflow/c/c_api_test.cc +++ b/tensorflow/c/c_api_test.cc @@ -1475,16 +1475,16 @@ class CApiGradientsTest : public ::testing::Test { } void TestGradientsSuccess(bool grad_inputs_provided, - const char* scope_name = nullptr) { + const char* prefix = nullptr) { TF_Output inputs[2]; TF_Output outputs[1]; TF_Output grad_outputs[2]; TF_Output expected_grad_outputs[2]; BuildSuccessGraph(inputs, outputs); - BuildExpectedGraph(grad_inputs_provided, scope_name, expected_grad_outputs); + BuildExpectedGraph(grad_inputs_provided, prefix, expected_grad_outputs); - AddGradients(grad_inputs_provided, scope_name, inputs, 2, outputs, 1, + AddGradients(grad_inputs_provided, prefix, inputs, 2, outputs, 1, grad_outputs); EXPECT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_); @@ -1552,7 +1552,7 @@ class CApiGradientsTest : public ::testing::Test { EXPECT_EQ(*a_data, *b_data); } - void AddGradients(bool grad_inputs_provided, const char* scope_name, + void AddGradients(bool grad_inputs_provided, const char* prefix, TF_Output* inputs, int ninputs, TF_Output* outputs, int noutputs, TF_Output* grad_outputs) { if (grad_inputs_provided) { @@ -1561,11 +1561,11 @@ class CApiGradientsTest : public ::testing::Test { TF_Operation* grad_inputs_op = FloatConst2x2(graph_, s_, grad_inputs_val, "GradInputs"); grad_inputs[0] = TF_Output{grad_inputs_op, 0}; - TF_AddGradients(graph_, scope_name, outputs, noutputs, inputs, ninputs, - grad_inputs, s_, grad_outputs); + TF_AddGradientsWithPrefix(graph_, prefix, outputs, noutputs, inputs, + ninputs, grad_inputs, s_, grad_outputs); } else { - TF_AddGradients(graph_, scope_name, outputs, noutputs, inputs, ninputs, - nullptr, s_, grad_outputs); + TF_AddGradientsWithPrefix(graph_, prefix, outputs, noutputs, inputs, + ninputs, nullptr, s_, grad_outputs); } } @@ -1604,7 +1604,7 @@ class CApiGradientsTest : public ::testing::Test { } void BuildExpectedGraph(bool grad_inputs_provided, - const char* grad_scope_name, + const char* grad_prefix, TF_Output* expected_grad_outputs) { // The expected graph looks like this if grad_inputs_provided. // If grad_inputs_provided is false, Const_0 will be a OnesLike op. @@ -1633,9 +1633,9 @@ class CApiGradientsTest : public ::testing::Test { // const float const0_val[] = {1.0, 2.0, 3.0, 4.0}; const float const1_val[] = {1.0, 0.0, 0.0, 1.0}; - const char* grad_prefix = grad_scope_name; - if (grad_scope_name == nullptr) { - grad_prefix = "gradients"; + const char* prefix = grad_prefix; + if (prefix == nullptr) { + prefix = "gradients"; } TF_Operation* const0 = FloatConst2x2(expected_graph_, s_, const0_val, "Const_0"); @@ -1650,13 +1650,13 @@ class CApiGradientsTest : public ::testing::Test { const3 = FloatConst2x2(expected_graph_, s_, const3_val, "GradInputs"); } else { const3 = OnesLike(expected_graph_, s_, matmul, - strings::StrCat(grad_prefix, "/OnesLike").c_str()); + strings::StrCat(prefix, "/OnesLike").c_str()); } TF_Operation* matmul1 = MatMul(expected_graph_, s_, const3, const1, - strings::StrCat(grad_prefix, "/MatMul").c_str(), false, true); + strings::StrCat(prefix, "/MatMul").c_str(), false, true); TF_Operation* matmul2 = MatMul(expected_graph_, s_, const0, const3, - strings::StrCat(grad_prefix, "/MatMul_1").c_str(), true, false); + strings::StrCat(prefix, "/MatMul_1").c_str(), true, false); expected_grad_outputs[0] = {matmul1, 0}; expected_grad_outputs[1] = {matmul2, 0}; } @@ -1757,11 +1757,11 @@ TEST_F(CApiGradientsTest, MultipleCallsToAddGradients) { TF_Output outputs[1] = {{xy, 0}}; TF_Output inputs[1] = {{x, 0}}; - TF_AddGradients(graph_, nullptr, outputs, 1, inputs, 1, nullptr, s_, &dxy_dx); + TF_AddGradients(graph_, outputs, 1, inputs, 1, nullptr, s_, &dxy_dx); ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_); inputs[0] = {y, 0}; - TF_AddGradients(graph_, nullptr, outputs, 1, inputs, 1, nullptr, s_, &dxy_dy); + TF_AddGradients(graph_, outputs, 1, inputs, 1, nullptr, s_, &dxy_dy); ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_); TF_SessionOptions* opts = TF_NewSessionOptions(); diff --git a/tensorflow/c/while_loop_test.cc b/tensorflow/c/while_loop_test.cc index 12225fd1cb..d2d887f32c 100644 --- a/tensorflow/c/while_loop_test.cc +++ b/tensorflow/c/while_loop_test.cc @@ -431,8 +431,8 @@ TEST_F(CApiWhileLoopTest, Gradients) { // Create backprop graph TF_Output grad_output; - TF_AddGradients(graph_, nullptr, outputs_.data(), outputs_.size(), - inputs_.data(), 1, nullptr, s_, &grad_output); + TF_AddGradients(graph_, outputs_.data(), outputs_.size(), inputs_.data(), 1, + nullptr, s_, &grad_output); ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_); // Run gradient diff --git a/tensorflow/java/src/main/java/org/tensorflow/Graph.java b/tensorflow/java/src/main/java/org/tensorflow/Graph.java index f2bd3e99a5..353092701b 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/Graph.java +++ b/tensorflow/java/src/main/java/org/tensorflow/Graph.java @@ -153,13 +153,14 @@ public final class Graph implements AutoCloseable { * If {@code dx} is null, the implementation will use dx of {@link org.tensorflow.op.core.OnesLike OnesLike} for all * shapes in {@code y}. * - * @param scopeName name of the subscope into which gradients operations are added. If null, defaults to "gradients". + * @param prefix string prefix applied to names of nodes added to the graph to compute gradients. + * If null, defaults to "gradients". * @param y output of the function to derive * @param x inputs of the function for which partial derivatives are computed * @param dx if not null, the partial derivatives of some loss function {@code L} w.r.t. {@code y} * @return the partial derivatives {@code dy} with the size of {@code x} */ - public Output[] addGradients(String scopeName, Output[] y, Output[] x, Output[] dx) { + public Output[] addGradients(String prefix, Output[] y, Output[] x, Output[] dx) { Output[] dy = new Output[x.length]; final long[] yHandles = new long[y.length]; final int[] yIndices = new int[y.length]; @@ -191,7 +192,7 @@ public final class Graph implements AutoCloseable { // xHandles = [x0Handle, x1Handle, ...] and xIndices = [x0Index, x1Index, ..], we obtain // dy = [dy0Handle, dy1Handle, ..., dy0Index, dy1Index, ...] long[] dyHandlesAndIndices = - addGradients(ref.nativeHandle(), scopeName, yHandles, yIndices, xHandles, xIndices, dxHandles, dxIndices); + addGradients(ref.nativeHandle(), prefix, yHandles, yIndices, xHandles, xIndices, dxHandles, dxIndices); int ndy = dyHandlesAndIndices.length >> 1; if (ndy != dy.length) { throw new IllegalStateException(String.valueOf(ndy) + " gradients were added to the graph when " + dy.length @@ -210,7 +211,7 @@ public final class Graph implements AutoCloseable { * i.e., {@code dy/dx_1, dy/dx_2...} *

* This is a simplified version of {@link #addGradients(Output[], Output[], Output[]) where {@code y} is - * a single output, {@code dx} is null and {@code scopeName} is null. + * a single output, {@code dx} is null and {@code prefix} is null. * * @param y output of the function to derive * @param x inputs of the function for which partial derivatives are computed @@ -331,7 +332,7 @@ public final class Graph implements AutoCloseable { private static native byte[] toGraphDef(long handle); - private static native long[] addGradients(long handle, String scopeName, long[] inputHandles, int[] inputIndices, + private static native long[] addGradients(long handle, String prefix, long[] inputHandles, int[] inputIndices, long[] outputHandles, int[] outputIndices, long[] gradInputHandles, int[] gradInputIndices); static { diff --git a/tensorflow/java/src/main/java/org/tensorflow/op/Scope.java b/tensorflow/java/src/main/java/org/tensorflow/op/Scope.java index 51a6ce8318..cf0b3d98c1 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/op/Scope.java +++ b/tensorflow/java/src/main/java/org/tensorflow/op/Scope.java @@ -156,7 +156,7 @@ public final class Scope { } /** - * The name prefix of this scope + * The name prefix of this scope. *

* This value is the combination of the name of this scope and all of its parents, seperated by a '/', e.g. *

{@code
diff --git a/tensorflow/java/src/main/native/graph_jni.cc b/tensorflow/java/src/main/native/graph_jni.cc
index a9b2ef6494..1bbda52641 100644
--- a/tensorflow/java/src/main/native/graph_jni.cc
+++ b/tensorflow/java/src/main/native/graph_jni.cc
@@ -135,7 +135,7 @@ Java_org_tensorflow_Graph_toGraphDef(JNIEnv* env, jclass clazz, jlong handle) {
 
 JNIEXPORT jlongArray JNICALL
 Java_org_tensorflow_Graph_addGradients(JNIEnv* env, jclass clazz, jlong handle,
-    jstring scope_name, jlongArray y_handles, jintArray y_indices,
+    jstring prefix, jlongArray y_handles, jintArray y_indices,
     jlongArray x_handles, jintArray x_indices,
     jlongArray dx_handles, jintArray dx_indices) {
 
@@ -163,16 +163,15 @@ Java_org_tensorflow_Graph_addGradients(JNIEnv* env, jclass clazz, jlong handle,
   }
   if (env->ExceptionCheck()) return nullptr;
 
-  jboolean is_copy;
-  const char* cscope_name = nullptr;
-  if (scope_name != nullptr) {
-    cscope_name = env->GetStringUTFChars(scope_name, &is_copy);
+  const char* cprefix = nullptr;
+  if (prefix != nullptr) {
+    cprefix = env->GetStringUTFChars(prefix, nullptr);
   }
   TF_Status* status = TF_NewStatus();
-  TF_AddGradients(g, cscope_name, y.get(), ny, x.get(), nx, dx.get(), status,
-      dy.get());
-  if (scope_name != nullptr) {
-    env->ReleaseStringUTFChars(scope_name, cscope_name);
+  TF_AddGradientsWithPrefix(g, cprefix, y.get(), ny, x.get(), nx, dx.get(),
+                            status, dy.get());
+  if (prefix != nullptr) {
+    env->ReleaseStringUTFChars(prefix, cprefix);
   }
   if (!throwExceptionIfNotOK(env, status)) {
     TF_DeleteStatus(status);
-- 
cgit v1.2.3


From a278365e8848f5fcbccb42f95a3c523367c1602f Mon Sep 17 00:00:00 2001
From: "karl@kubx.ca" 
Date: Mon, 16 Jul 2018 00:49:06 -0400
Subject: Enforce uniqueness of custom prefixes for gradients

---
 tensorflow/c/c_api.cc                              | 17 ++++++++-
 tensorflow/c/c_api.h                               | 31 ++++++++++++++---
 tensorflow/c/c_api_test.cc                         | 40 ++++++++++++----------
 .../java/src/main/java/org/tensorflow/Graph.java   |  8 ++++-
 .../src/main/java/org/tensorflow/op/NameScope.java |  4 ---
 .../src/main/java/org/tensorflow/op/Scope.java     | 26 ++------------
 .../java/org/tensorflow/op/core/Gradients.java     |  6 +++-
 .../src/test/java/org/tensorflow/GraphTest.java    | 16 +++++++--
 .../src/test/java/org/tensorflow/op/ScopeTest.java | 15 --------
 .../java/org/tensorflow/op/core/GradientsTest.java | 11 +++---
 10 files changed, 98 insertions(+), 76 deletions(-)

(limited to 'tensorflow/java')

diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc
index 32b0b70620..c1f4745e56 100644
--- a/tensorflow/c/c_api.cc
+++ b/tensorflow/c/c_api.cc
@@ -2411,9 +2411,24 @@ void TF_AddGradientsWithPrefix(TF_Graph* g, const char* prefix, TF_Output* y,
 
     const int first_new_node_id = g->graph.num_node_ids();
 
+    const char* child_scope_name = prefix;
+    if (child_scope_name != nullptr) {
+      // The operation should fail if the provided name prefix has already been
+      // used in this graph
+      for (const auto& pair: g->name_map) {
+        const string& name = pair.first;
+        if (name.compare(0, name.find_last_of('/'), prefix) == 0) {
+          status->status =
+              InvalidArgument("Duplicate node name in graph: '", prefix, "'");
+          return;
+        }
+      }
+    } else {
+      child_scope_name = "gradients";
+    }
     tensorflow::Scope scope =
         NewInternalScope(&g->graph, &status->status, &g->refiner)
-            .NewSubScope(prefix != nullptr ? prefix : "gradients");
+            .NewSubScope(child_scope_name);
 
     if (dx != nullptr) {
       std::vector dx_arg = OutputsFromTFOutputs(dx, ny);
diff --git a/tensorflow/c/c_api.h b/tensorflow/c/c_api.h
index 8e49158957..0a9fa9ddbc 100644
--- a/tensorflow/c/c_api.h
+++ b/tensorflow/c/c_api.h
@@ -1129,17 +1129,36 @@ TF_CAPI_EXPORT extern void TF_FinishWhile(const TF_WhileParams* params,
 // called after a successful TF_NewWhile() call.
 TF_CAPI_EXPORT extern void TF_AbortWhile(const TF_WhileParams* params);
 
-// Adds operations to compute the partial derivatives of sum of `y`s w.r.t `x`s.
+// Adds operations to compute the partial derivatives of sum of `y`s w.r.t `x`s,
+// i.e., d(y_1 + y_2 + ...)/dx_1, d(y_1 + y_2 + ...)/dx_2...
+//
+// `dx` are used as initial gradients (which represent the symbolic partial
+// derivatives of some loss function `L` w.r.t. `y`).
+// `dx` must be nullptr or have size `ny`.
+// If `dx` is nullptr, the implementation will use dx of `OnesLike` for all
+// shapes in `y`.
+// The partial derivatives are returned in `dy`. `dy` should be allocated to
+// size `nx`.
 //
-// This method is the equivalent of calling TF_AddGradientsWithPrefix with a
-// nullptr prefix (which will create all gradients operations under "gradients/"
-// by default). See TF_AddGradientsWithPrefix for more details.
+// Gradient nodes are automatically named under the "gradients/" prefix. To
+// guarantee name uniqueness, subsequent calls to the same graph will
+// append an incremental tag to the prefix: "gradients_1/", "gradients_2/", ...
+// See TF_AddGradientsWithPrefix, which provides a means to specify a custom
+// name prefix for operations added to a graph to compute the gradients.
+//
+// WARNING: This function does not yet support all the gradients that python
+// supports. See
+// https://www.tensorflow.org/code/tensorflow/cc/gradients/README.md
+// for instructions on how to add C++ more gradients.
 TF_CAPI_EXPORT void TF_AddGradients(TF_Graph* g, TF_Output* y, int ny,
                                     TF_Output* x, int nx, TF_Output* dx,
                                     TF_Status* status, TF_Output* dy);
 
 // Adds operations to compute the partial derivatives of sum of `y`s w.r.t `x`s,
 // i.e., d(y_1 + y_2 + ...)/dx_1, d(y_1 + y_2 + ...)/dx_2...
+// This is a variant of TF_AddGradients that allows to caller to pass a custom
+// name prefix to the operations added to a graph to compute the gradients.
+//
 // `dx` are used as initial gradients (which represent the symbolic partial
 // derivatives of some loss function `L` w.r.t. `y`).
 // `dx` must be nullptr or have size `ny`.
@@ -1148,7 +1167,9 @@ TF_CAPI_EXPORT void TF_AddGradients(TF_Graph* g, TF_Output* y, int ny,
 // The partial derivatives are returned in `dy`. `dy` should be allocated to
 // size `nx`.
 // `prefix` names the scope into which all gradients operations are being added.
-// If `prefix` is nullptr, "gradients" is used by default.
+// `prefix` must be unique within the provided graph otherwise this operation
+// will fail. If `prefix` is nullptr, the default prefixing behaviour takes
+// place, see TF_AddGradients for more details.
 //
 // WARNING: This function does not yet support all the gradients that python
 // supports. See
diff --git a/tensorflow/c/c_api_test.cc b/tensorflow/c/c_api_test.cc
index adcdefbaf3..7094d5d32d 100644
--- a/tensorflow/c/c_api_test.cc
+++ b/tensorflow/c/c_api_test.cc
@@ -1474,19 +1474,17 @@ class CApiGradientsTest : public ::testing::Test {
     TF_DeleteStatus(s_);
   }
 
-  void TestGradientsSuccess(bool grad_inputs_provided,
-      const char* prefix = nullptr) {
+  void TestGradientsSuccess(bool grad_inputs_provided) {
     TF_Output inputs[2];
     TF_Output outputs[1];
     TF_Output grad_outputs[2];
     TF_Output expected_grad_outputs[2];
 
     BuildSuccessGraph(inputs, outputs);
-    BuildExpectedGraph(grad_inputs_provided, prefix, expected_grad_outputs);
+    BuildExpectedGraph(grad_inputs_provided, expected_grad_outputs);
 
-    AddGradients(grad_inputs_provided, prefix, inputs, 2, outputs, 1,
+    AddGradients(grad_inputs_provided, nullptr, inputs, 2, outputs, 1,
                  grad_outputs);
-
     EXPECT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
 
     // Compare that the graphs match.
@@ -1604,7 +1602,6 @@ class CApiGradientsTest : public ::testing::Test {
   }
 
   void BuildExpectedGraph(bool grad_inputs_provided,
-                          const char* grad_prefix,
                           TF_Output* expected_grad_outputs) {
     // The expected graph looks like this if grad_inputs_provided.
     // If grad_inputs_provided is false, Const_0 will be a OnesLike op.
@@ -1633,10 +1630,6 @@ class CApiGradientsTest : public ::testing::Test {
     //
     const float const0_val[] = {1.0, 2.0, 3.0, 4.0};
     const float const1_val[] = {1.0, 0.0, 0.0, 1.0};
-    const char* prefix = grad_prefix;
-    if (prefix == nullptr) {
-      prefix = "gradients";
-    }
     TF_Operation* const0 =
         FloatConst2x2(expected_graph_, s_, const0_val, "Const_0");
     TF_Operation* const1 =
@@ -1649,14 +1642,13 @@ class CApiGradientsTest : public ::testing::Test {
       const float const3_val[] = {1.0, 1.0, 1.0, 1.0};
       const3 = FloatConst2x2(expected_graph_, s_, const3_val, "GradInputs");
     } else {
-      const3 = OnesLike(expected_graph_, s_, matmul,
-          strings::StrCat(prefix, "/OnesLike").c_str());
+      const3 = OnesLike(expected_graph_, s_, matmul, "gradients/OnesLike");
     }
 
     TF_Operation* matmul1 = MatMul(expected_graph_, s_, const3, const1,
-        strings::StrCat(prefix, "/MatMul").c_str(), false, true);
+                                   "gradients/MatMul", false, true);
     TF_Operation* matmul2 = MatMul(expected_graph_, s_, const0, const3,
-        strings::StrCat(prefix, "/MatMul_1").c_str(), true, false);
+                                   "gradients/MatMul_1", true, false);
     expected_grad_outputs[0] = {matmul1, 0};
     expected_grad_outputs[1] = {matmul2, 0};
   }
@@ -1727,10 +1719,6 @@ TEST_F(CApiGradientsTest, Gradients_NoGradInputs) {
   TestGradientsSuccess(false);
 }
 
-TEST_F(CApiGradientsTest, Gradients_NoGradInputsWithScopeName) {
-  TestGradientsSuccess(false, "gradscope");
-}
-
 TEST_F(CApiGradientsTest, OpWithNoGradientRegistered_GradInputs) {
   TestGradientsError(true);
 }
@@ -1739,6 +1727,22 @@ TEST_F(CApiGradientsTest, OpWithNoGradientRegistered_NoGradInputs) {
   TestGradientsError(false);
 }
 
+TEST_F(CApiGradientsTest, Gradients_WithPrefix) {
+  TF_Output inputs[2];
+  TF_Output outputs[1];
+  TF_Output grad_outputs[2];
+
+  BuildSuccessGraph(inputs, outputs);
+  AddGradients(false, "mygrads", inputs, 2, outputs, 1, grad_outputs);
+  EXPECT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+
+  AddGradients(false, "mygrads_1", inputs, 2, outputs, 1, grad_outputs);
+  ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+
+  AddGradients(false, "mygrads_1", inputs, 2, outputs, 1, grad_outputs);
+  ASSERT_EQ(TF_INVALID_ARGUMENT, TF_GetCode(s_)) << TF_Message(s_);
+}
+
 void ScalarFloatFromTensor(const TF_Tensor* t, float* f) {
   ASSERT_TRUE(t != nullptr);
   ASSERT_EQ(TF_FLOAT, TF_TensorType(t));
diff --git a/tensorflow/java/src/main/java/org/tensorflow/Graph.java b/tensorflow/java/src/main/java/org/tensorflow/Graph.java
index 353092701b..32853c1367 100644
--- a/tensorflow/java/src/main/java/org/tensorflow/Graph.java
+++ b/tensorflow/java/src/main/java/org/tensorflow/Graph.java
@@ -152,8 +152,14 @@ public final class Graph implements AutoCloseable {
    * 

* If {@code dx} is null, the implementation will use dx of {@link org.tensorflow.op.core.OnesLike OnesLike} for all * shapes in {@code y}. + *

+ * {@code prefix} is used as the name prefix applied to all nodes added to the graph to compute gradients. It must + * be unique within the provided graph or the operation will fail. + *

+ * If {@code prefix} is null, then the nodes will be added to under the default prefix, which is "gradients" for the + * first invocation, then "gradients_1", "gradients_2", etc. for any subsequent calls to the same graph. * - * @param prefix string prefix applied to names of nodes added to the graph to compute gradients. + * @param prefix unique string prefix applied before the names of nodes added to the graph to compute gradients. * If null, defaults to "gradients". * @param y output of the function to derive * @param x inputs of the function for which partial derivatives are computed diff --git a/tensorflow/java/src/main/java/org/tensorflow/op/NameScope.java b/tensorflow/java/src/main/java/org/tensorflow/op/NameScope.java index 95a2a2f9f5..2e84cac1ac 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/op/NameScope.java +++ b/tensorflow/java/src/main/java/org/tensorflow/op/NameScope.java @@ -56,10 +56,6 @@ final class NameScope { String actualName = (opName != null) ? opName : name; return fullyQualify(makeUnique(actualName)); } - - String opPrefix() { - return opPrefix; - } /** * Create a new, root-level namescope. diff --git a/tensorflow/java/src/main/java/org/tensorflow/op/Scope.java b/tensorflow/java/src/main/java/org/tensorflow/op/Scope.java index cf0b3d98c1..563ea66ef1 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/op/Scope.java +++ b/tensorflow/java/src/main/java/org/tensorflow/op/Scope.java @@ -135,17 +135,8 @@ public final class Scope { * }

* *

Note: if you provide a composite operator building class (i.e, a class that adds a - * set of related operations to the graph by calling other operator building code) you should also - * create a {@link #withSubScope(String)} scope for the underlying operators to group them under a - * meaningful name. - * - *

{@code
-   * public static Stddev create(Scope scope, ...) {
-   *   // group sub-operations under a common name
-   *   Scope group = scope.withSubScope("stddev");
-   *   ... Sqrt.create(group, Mean.create(group, ...))
-   * }
-   * }
+ * set of related operations to the graph by calling other operator building code), the provided name + * will act as a subscope to all underlying operators. * * @param defaultName name for the underlying operator. * @return unique name for the operator. @@ -154,19 +145,6 @@ public final class Scope { public String makeOpName(String defaultName) { return nameScope.makeOpName(defaultName); } - - /** - * The name prefix of this scope. - *

- * This value is the combination of the name of this scope and all of its parents, seperated by a '/', e.g. - *

{@code
-   * Scope scope = new Scope(graph);
-   * assertEquals(scope.withSubScope("sub1").withSubScope("sub2").prefix(), "sub1/sub2");
-   * }
- */ - public String prefix() { - return nameScope.opPrefix(); - } private Scope(Graph graph, NameScope nameScope) { this.graph = graph; diff --git a/tensorflow/java/src/main/java/org/tensorflow/op/core/Gradients.java b/tensorflow/java/src/main/java/org/tensorflow/op/core/Gradients.java index 6d71ddfff0..5432ff244e 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/op/core/Gradients.java +++ b/tensorflow/java/src/main/java/org/tensorflow/op/core/Gradients.java @@ -88,7 +88,11 @@ public class Gradients implements Op, Iterable> { } } } - Output[] dy = scope.graph().addGradients(scope.prefix(), Operands.asOutputs(y), Operands.asOutputs(x), dx); + Output[] dy = scope.graph().addGradients( + scope.makeOpName("Gradients"), + Operands.asOutputs(y), + Operands.asOutputs(x), + dx); return new Gradients(Arrays.asList(dy)); } diff --git a/tensorflow/java/src/test/java/org/tensorflow/GraphTest.java b/tensorflow/java/src/test/java/org/tensorflow/GraphTest.java index c02336aebe..56c8f22daa 100644 --- a/tensorflow/java/src/test/java/org/tensorflow/GraphTest.java +++ b/tensorflow/java/src/test/java/org/tensorflow/GraphTest.java @@ -239,8 +239,20 @@ public class GraphTest { Output[] grad0 = g.addGradients(null, toArray(y0), toArray(x), null); assertTrue(grad0[0].op().name().startsWith("gradients/")); - Output[] grad1 = g.addGradients("more_gradients", toArray(y0), toArray(x), null); - assertTrue(grad1[0].op().name().startsWith("more_gradients/")); + Output[] grad1 = g.addGradients(null, toArray(y0), toArray(x), null); + assertTrue(grad1[0].op().name().startsWith("gradients_1/")); + + Output[] grad2 = g.addGradients("more_gradients", toArray(y0), toArray(x), null); + assertTrue(grad2[0].op().name().startsWith("more_gradients/")); + + Output[] grad3 = g.addGradients("even_more_gradients", toArray(y0), toArray(x), null); + assertTrue(grad3[0].op().name().startsWith("even_more_gradients/")); + + try { + g.addGradients("even_more_gradients", toArray(y0), toArray(x), null); + } catch (IllegalArgumentException e) { + // expected exception + } } } diff --git a/tensorflow/java/src/test/java/org/tensorflow/op/ScopeTest.java b/tensorflow/java/src/test/java/org/tensorflow/op/ScopeTest.java index 2fb2c1df48..0e9c7df697 100644 --- a/tensorflow/java/src/test/java/org/tensorflow/op/ScopeTest.java +++ b/tensorflow/java/src/test/java/org/tensorflow/op/ScopeTest.java @@ -17,7 +17,6 @@ package org.tensorflow.op; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; import static org.junit.Assert.fail; import java.util.HashMap; @@ -183,20 +182,6 @@ public class ScopeTest { assertEquals(21704, result.intValue()); } } - - @Test - public void prefix() { - try (Graph g = new Graph()) { - Scope s = new Scope(g); - assertNull(s.prefix()); - - Scope sub1 = s.withSubScope("sub1"); - assertEquals("sub1", sub1.prefix()); - - Scope sub2 = sub1.withSubScope("sub2"); - assertEquals("sub1/sub2", sub2.prefix()); - } - } // "handwritten" sample operator classes private static final class Const { diff --git a/tensorflow/java/src/test/java/org/tensorflow/op/core/GradientsTest.java b/tensorflow/java/src/test/java/org/tensorflow/op/core/GradientsTest.java index 2ffc69c209..b75f79a421 100644 --- a/tensorflow/java/src/test/java/org/tensorflow/op/core/GradientsTest.java +++ b/tensorflow/java/src/test/java/org/tensorflow/op/core/GradientsTest.java @@ -108,17 +108,18 @@ public class GradientsTest { } @Test - public void createGradientsWithScopeName() { + public void validateGradientsNames() { try (Graph g = new Graph()) { - Scope scope = new Scope(g); + Scope scope = new Scope(g).withSubScope("sub"); Output x = TestUtil.placeholder(g, "x1", Float.class); Output y = TestUtil.square(g, "y", x); - Scope gradScope = scope.withSubScope("grads").withSubScope("test"); - Gradients grads = Gradients.create(gradScope, y, Arrays.asList(x)); + Gradients grad0 = Gradients.create(scope, y, Arrays.asList(x)); + assertTrue(grad0.dy(0).op().name().startsWith("sub/Gradients/")); - assertTrue(grads.dy(0).op().name().startsWith("grads/test/")); + Gradients grad1 = Gradients.create(scope.withName("MyGradients"), y, Arrays.asList(x)); + assertTrue(grad1.dy(0).op().name().startsWith("sub/MyGradients/")); } } } -- cgit v1.2.3 From c01cfe7ced91dabc19b2392696cf0598a5df70f9 Mon Sep 17 00:00:00 2001 From: "karl@kubx.ca" Date: Fri, 27 Jul 2018 18:04:43 -0400 Subject: 2nd review: Cover more prefix conflict cases --- tensorflow/c/c_api.cc | 32 ++++++++--- tensorflow/c/c_api_test.cc | 66 +++++++++++++++++++--- .../java/src/main/java/org/tensorflow/Graph.java | 5 +- 3 files changed, 84 insertions(+), 19 deletions(-) (limited to 'tensorflow/java') diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc index c1f4745e56..bcecbb0bc6 100644 --- a/tensorflow/c/c_api.cc +++ b/tensorflow/c/c_api.cc @@ -53,6 +53,7 @@ limitations under the License. #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/gtl/array_slice.h" #include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/mem.h" #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/protobuf.h" @@ -2411,20 +2412,25 @@ void TF_AddGradientsWithPrefix(TF_Graph* g, const char* prefix, TF_Output* y, const int first_new_node_id = g->graph.num_node_ids(); - const char* child_scope_name = prefix; - if (child_scope_name != nullptr) { + string prefix_cmp; + const char* child_scope_name; + if (prefix == nullptr) { + child_scope_name = "gradients"; + } else { + prefix_cmp = string(prefix) + "/"; // The operation should fail if the provided name prefix has already been // used in this graph for (const auto& pair: g->name_map) { const string& name = pair.first; - if (name.compare(0, name.find_last_of('/'), prefix) == 0) { - status->status = - InvalidArgument("Duplicate node name in graph: '", prefix, "'"); + if (name.compare(prefix) == 0 || + tensorflow::str_util::StartsWith(name, prefix_cmp)) { + status->status = InvalidArgument("prefix [", prefix, + "] conflicts with existing node in the graph named [", + name, "]"); return; } } - } else { - child_scope_name = "gradients"; + child_scope_name = prefix; } tensorflow::Scope scope = NewInternalScope(&g->graph, &status->status, &g->refiner) @@ -2443,6 +2449,18 @@ void TF_AddGradientsWithPrefix(TF_Graph* g, const char* prefix, TF_Output* y, for (int i = first_new_node_id; i < g->graph.num_node_ids(); ++i) { Node* n = g->graph.FindNodeId(i); if (n == nullptr) continue; + + // Adding the gradients to the graph can alter the prefix to prevent + // name collisions only if this prefix has not been provided explicitly + // by the user. If it was provided, assert that it remained intact. + if (prefix != nullptr && + !tensorflow::str_util::StartsWith(n->name(), prefix_cmp)) { + status->status = tensorflow::errors::Internal( + "BUG: The gradients prefix have been unexpectedly altered when " + "adding the nodes to the graph. This is a bug. Please file an " + "issue at https://github.com/tensorflow/tensorflow/issues."); + return; + } // We have a convoluted scheme here: Using the C++ graph construction API // to add potentially many nodes to the graph without running the checks // (such as uniqueness of the names of nodes) we run with other functions diff --git a/tensorflow/c/c_api_test.cc b/tensorflow/c/c_api_test.cc index 7094d5d32d..d8d2533c60 100644 --- a/tensorflow/c/c_api_test.cc +++ b/tensorflow/c/c_api_test.cc @@ -1708,6 +1708,20 @@ class CApiGradientsTest : public ::testing::Test { return op; } + void BuildGraphAndAddGradientsWithPrefixes(const char* prefix1, + const char* prefix2 = nullptr) { + TF_Output inputs[2]; + TF_Output outputs[1]; + TF_Output grad_outputs[2]; + + BuildSuccessGraph(inputs, outputs); + + AddGradients(false, prefix1, inputs, 2, outputs, 1, grad_outputs); + if (prefix2 != nullptr) { + AddGradients(false, prefix2, inputs, 2, outputs, 1, grad_outputs); + } + } + TF_Status* s_; TF_Graph* graph_; TF_Graph* expected_graph_; @@ -1727,19 +1741,53 @@ TEST_F(CApiGradientsTest, OpWithNoGradientRegistered_NoGradInputs) { TestGradientsError(false); } -TEST_F(CApiGradientsTest, Gradients_WithPrefix) { - TF_Output inputs[2]; - TF_Output outputs[1]; - TF_Output grad_outputs[2]; +TEST_F(CApiGradientsTest, GradientsPrefix_PrefixIsOk) { + BuildGraphAndAddGradientsWithPrefixes("gradients"); + ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_); +} - BuildSuccessGraph(inputs, outputs); - AddGradients(false, "mygrads", inputs, 2, outputs, 1, grad_outputs); - EXPECT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_); +TEST_F(CApiGradientsTest, GradientsPrefix_TwoGradientsWithDistinctPrefixes) { + BuildGraphAndAddGradientsWithPrefixes("gradients", "gradients_1"); + ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_); +} + +TEST_F(CApiGradientsTest, GradientsPrefix_TwoGradientsInSameScope) { + BuildGraphAndAddGradientsWithPrefixes("scope/gradients", "scope/gradients_1"); + ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_); +} - AddGradients(false, "mygrads_1", inputs, 2, outputs, 1, grad_outputs); +TEST_F(CApiGradientsTest, GradientsPrefix_TwoGradientsInDifferentScopes) { + BuildGraphAndAddGradientsWithPrefixes("scope/gradients", "scope_1/gradients"); ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_); +} + +TEST_F(CApiGradientsTest, GradientsPrefix_2ndGradientsAsSubScopeOf1st) { + BuildGraphAndAddGradientsWithPrefixes("gradients", "gradients/sub"); + ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_); +} + +TEST_F(CApiGradientsTest, GradientsPrefix_PrefixMatchesExistingNodeName) { + BuildGraphAndAddGradientsWithPrefixes("Const_0"); + ASSERT_EQ(TF_INVALID_ARGUMENT, TF_GetCode(s_)) << TF_Message(s_); +} + +TEST_F(CApiGradientsTest, GradientsPrefix_TwoGradientsWithIdenticalPrefixes) { + BuildGraphAndAddGradientsWithPrefixes("gradients", "gradients"); + ASSERT_EQ(TF_INVALID_ARGUMENT, TF_GetCode(s_)) << TF_Message(s_); +} + +TEST_F(CApiGradientsTest, GradientsPrefix_2ndGradientsMatchingNodeOf1st) { + BuildGraphAndAddGradientsWithPrefixes("gradients", "gradients/MatMul"); + ASSERT_EQ(TF_INVALID_ARGUMENT, TF_GetCode(s_)) << TF_Message(s_); +} + +TEST_F(CApiGradientsTest, GradientsPrefix_1stGradientsMatchingNodeOf2nd) { + BuildGraphAndAddGradientsWithPrefixes("gradients/MatMul", "gradients"); + ASSERT_EQ(TF_INVALID_ARGUMENT, TF_GetCode(s_)) << TF_Message(s_); +} - AddGradients(false, "mygrads_1", inputs, 2, outputs, 1, grad_outputs); +TEST_F(CApiGradientsTest, GradientsPrefix_2ndGradientsAsParentScopeOf1st) { + BuildGraphAndAddGradientsWithPrefixes("gradients/sub", "gradients"); ASSERT_EQ(TF_INVALID_ARGUMENT, TF_GetCode(s_)) << TF_Message(s_); } diff --git a/tensorflow/java/src/main/java/org/tensorflow/Graph.java b/tensorflow/java/src/main/java/org/tensorflow/Graph.java index 32853c1367..abca956b97 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/Graph.java +++ b/tensorflow/java/src/main/java/org/tensorflow/Graph.java @@ -156,11 +156,10 @@ public final class Graph implements AutoCloseable { * {@code prefix} is used as the name prefix applied to all nodes added to the graph to compute gradients. It must * be unique within the provided graph or the operation will fail. *

- * If {@code prefix} is null, then the nodes will be added to under the default prefix, which is "gradients" for the - * first invocation, then "gradients_1", "gradients_2", etc. for any subsequent calls to the same graph. + * If {@code prefix} is null, then one will be chosen automatically. * * @param prefix unique string prefix applied before the names of nodes added to the graph to compute gradients. - * If null, defaults to "gradients". + * If null, a default one will be chosen. * @param y output of the function to derive * @param x inputs of the function for which partial derivatives are computed * @param dx if not null, the partial derivatives of some loss function {@code L} w.r.t. {@code y} -- cgit v1.2.3