From 775dc85b6546af942247c3eb76d98ae5d614fe9a Mon Sep 17 00:00:00 2001
From: Eugene Brevdo <ebrevdo@google.com>
Date: Tue, 28 Jun 2016 08:14:48 -0800
Subject: BayesFlow: Add simple example of using REINFORCE via Stochastic
 Computation Graphs. Change: 126081162

---
 tensorflow/contrib/bayesflow/BUILD                 |  15 ++-
 .../reinforce_simple/reinforce_simple_example.py   | 143 +++++++++++++++++++++
 2 files changed, 156 insertions(+), 2 deletions(-)
 create mode 100644 tensorflow/contrib/bayesflow/examples/reinforce_simple/reinforce_simple_example.py

(limited to 'tensorflow/contrib/bayesflow')

diff --git a/tensorflow/contrib/bayesflow/BUILD b/tensorflow/contrib/bayesflow/BUILD
index ca2f120569..98e7f875c6 100644
--- a/tensorflow/contrib/bayesflow/BUILD
+++ b/tensorflow/contrib/bayesflow/BUILD
@@ -8,7 +8,7 @@ exports_files(["LICENSE"])
 
 package(default_visibility = ["//tensorflow:__subpackages__"])
 
-load("//tensorflow:tensorflow.bzl", "cuda_py_tests")
+load("//tensorflow:tensorflow.bzl", "cuda_py_test")
 
 py_library(
     name = "bayesflow_py",
@@ -16,7 +16,7 @@ py_library(
     srcs_version = "PY2AND3",
 )
 
-cuda_py_tests(
+cuda_py_test(
     name = "stochastic_graph_test",
     size = "small",
     srcs = ["python/kernel_tests/stochastic_graph_test.py"],
@@ -27,6 +27,17 @@ cuda_py_tests(
     ],
 )
 
+cuda_py_test(
+    name = "reinforce_simple_example",
+    size = "small",
+    srcs = ["examples/reinforce_simple/reinforce_simple_example.py"],
+    additional_deps = [
+        ":bayesflow_py",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:platform_test",
+    ],
+)
+
 filegroup(
     name = "all_files",
     srcs = glob(
diff --git a/tensorflow/contrib/bayesflow/examples/reinforce_simple/reinforce_simple_example.py b/tensorflow/contrib/bayesflow/examples/reinforce_simple/reinforce_simple_example.py
new file mode 100644
index 0000000000..1760113528
--- /dev/null
+++ b/tensorflow/contrib/bayesflow/examples/reinforce_simple/reinforce_simple_example.py
@@ -0,0 +1,143 @@
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Simple examples of the REINFORCE algorithm."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+
+distributions = tf.contrib.distributions
+sg = tf.contrib.bayesflow.stochastic_graph
+
+
+def split_apply_merge(inp, partitions, fns):
+  """Split input according to partitions.  Pass results through fns and merge.
+
+  Args:
+    inp: the input vector
+    partitions: tensor of same length as input vector, having values 0, 1
+    fns: the two functions.
+
+  Returns:
+    the vector routed, where routed[i] = fns[partitions[i]](inp[i])
+  """
+  new_inputs = tf.dynamic_partition(inp, partitions, len(fns))
+  new_outputs = [fns[i](x) for i, x in enumerate(new_inputs)]
+  new_indices = tf.dynamic_partition(
+      tf.range(0, inp.get_shape()[0]), partitions, len(fns))
+  return tf.dynamic_stitch(new_indices, new_outputs)
+
+
+def plus_1(inputs):
+  return inputs + 1.0
+
+
+def minus_1(inputs):
+  return inputs - 1.0
+
+
+def build_split_apply_merge_model():
+  """Build the Split-Apply-Merge Model.
+
+  Route each value of input [-1, -1, 1, 1] through one of the
+  functions, plus_1, minus_1.  The decision for routing is made by
+  4 Bernoulli R.V.s whose parameters are determined by a neural network
+  applied to the input.  REINFORCE is used to update the NN parameters.
+
+  Returns:
+    The 3-tuple (route_selection, routing_loss, final_loss), where:
+
+      - route_selection is an int 4-vector
+      - routing_loss is a float 4-vector
+      - final_loss is a float scalar.
+  """
+  inputs = tf.constant([[-1.0], [-1.0], [1.0], [1.0]])
+  targets = tf.constant([[0.0], [0.0], [0.0], [0.0]])
+  paths = [plus_1, minus_1]
+  weights = tf.get_variable("w", [1, 2])
+  bias = tf.get_variable("b", [1, 1])
+  logits = tf.matmul(inputs, weights) + bias
+
+  # REINFORCE forward step
+  route_selection = sg.DistributionTensor(
+      distributions.Categorical, logits=logits)
+
+  # Accessing route_selection as a Tensor below forces a sample of
+  # the Categorical distribution based on its logits.
+  # This is equivalent to calling route_selection.value().
+  #
+  # route_selection.value() returns an int32 4-vector with random
+  # values in {0, 1}
+  # COPY+ROUTE+PASTE
+  outputs = split_apply_merge(inputs, route_selection, paths)
+
+  # flatten routing_loss to a row vector (from a column vector)
+  routing_loss = tf.reshape(tf.square(outputs - targets), shape=[-1])
+
+  # returns
+  #  [stop_gradient(routing_loss) *
+  #   route_selection.log_pmf(stop_gradients(route_selection.value()))],
+  # where log_pmf has gradients going all the way back to weights and bias.
+
+  # REINFORCE loss
+  score_function_losses = sg.surrogate_losses([routing_loss])
+
+  # calculate the entire loss:
+  #   routing_loss, and the score function loss.
+  # in this case, the routing_loss depends on the variables only through
+  # "route_selection", which has a stop_gradients on it.  so the
+  # gradient of the loss really come through the score function
+  all_loss = score_function_losses + [routing_loss]
+  final_loss = tf.reduce_sum(tf.add_n(all_loss))
+
+  return (route_selection, routing_loss, final_loss)
+
+
+class REINFORCESimpleExample(tf.test.TestCase):
+
+  def testSplitApplyMerge(self):
+    # Repeatability.  SGD has a tendency to jump around, even here.
+    tf.set_random_seed(1)
+
+    with self.test_session() as sess:
+      # Use sampling to train REINFORCE
+      with sg.value_type(sg.SampleAndReshapeValue(n=1)):
+        (route_selection,
+         routing_loss,
+         final_loss) = build_split_apply_merge_model()
+
+      sgd = tf.train.GradientDescentOptimizer(1.0).minimize(final_loss)
+
+      tf.initialize_all_variables().run()
+
+      for i in range(10):
+        # Run loss and inference step.  This toy problem converges VERY quickly.
+        (routing_loss_v, final_loss_v, route_selection_v, _) = sess.run(
+            [routing_loss, final_loss, tf.identity(route_selection), sgd])
+        print(
+            "Iteration %d, routing loss: %s, final_loss: %s, "
+            "route selection: %s"
+            % (i, routing_loss_v, final_loss_v, route_selection_v))
+
+      self.assertAllEqual([0, 0, 1, 1], route_selection_v)
+      self.assertAllClose([0.0, 0.0, 0.0, 0.0], routing_loss_v)
+      self.assertAllClose(0.0, final_loss_v)
+
+
+if __name__ == "__main__":
+  tf.test.main()
-- 
cgit v1.2.3