Fix tf.pow gradient at zero

d(x ^ y)/dy mentions log(x), which creates false singularities if x = y. Instead, use x > 0 ? log(x) : 0. Fixes #2295. Change: 127762504
author: Geoffrey Irving <geoffreyi@google.com> 2016-07-18 13:56:52 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2016-07-18 15:03:31 -0700
commit: a33ebe0bfc0c6d0c617d48a71f6f5136be881041 (patch)
tree: a2ab36aacb8335a22d18a4be592f2d38476c32f8
parent: d72d45b6701279e12a0dd8325f143162b9060f33 (diff)
5 files changed, 45 insertions, 36 deletions
diff --git a/tensorflow/core/framework/tensor_testutil.cc b/tensorflow/core/framework/tensor_testutil.cc
index b2f5329c7d..e307d25268 100644
--- a/tensorflow/core/framework/tensor_testutil.cc
+++ b/tensorflow/core/framework/tensor_testutil.cc
@@ -21,7 +21,8 @@ namespace test {
 
 template <typename T>
 bool IsClose(const T& x, const T& y, double atol, double rtol) {
-  return fabs(x - y) < atol + rtol * fabs(x);
+  // Need x == y so that infinities are close to themselves
+  return x == y || fabs(x - y) < atol + rtol * fabs(x);
 }
 
 template <typename T>
diff --git a/tensorflow/core/ops/math_grad.cc b/tensorflow/core/ops/math_grad.cc
index 5d64b0986e..1d8f45ea7a 100644
--- a/tensorflow/core/ops/math_grad.cc
+++ b/tensorflow/core/ops/math_grad.cc
@@ -378,16 +378,21 @@ Status PowGrad(const AttrSlice& attrs, FunctionDef* g) {
   return GradForBinaryCwise(g, {
       {{"z"}, "Pow", {"x", "y"}},
       // dz * y * Pow(x, y - 1)
-      FDH::Const("const", 1.0f),
-      {{"one"}, "Cast", {"const"}, {{"SrcT", DT_FLOAT}, {"DstT", "$T"}}},
+      FDH::Const("const_zero", 0.0f),
+      FDH::Const("const_one", 1.0f),
+      {{"zero"}, "Cast", {"const_zero"}, {{"SrcT", DT_FLOAT}, {"DstT", "$T"}}},
+      {{"one"}, "Cast", {"const_one"}, {{"SrcT", DT_FLOAT}, {"DstT", "$T"}}},
       {{"t0"}, "Sub", {"y", "one"}, {}, {"dz"}},
       {{"t1"}, "Pow", {"x", "t0"}},
       {{"t2"}, "Mul", {"dz", "y"}},
       {{"gx"}, "Mul", {"t1", "t2"}},
-      // dz * z * Log(x)
-      {{"t3"}, "Log", {"x"}, {}, {"dz"}},
+      // dz * z * (x > 0 ? Log(x) : 0)
+      {{"pos_x"}, "Greater", {"x", "zero"}},
+      {{"unsafe_log"}, "Log", {"x"}, {}, {"dz"}},
+      {{"zeros"}, "ZerosLike", {"x"}},
+      {{"safe_log"}, "Select", {"pos_x", "unsafe_log", "zeros"}},
       {{"t4"}, "Mul", {"dz", "z"}},
-      {{"gy"}, "Mul", {"t3", "t4"}},
+      {{"gy"}, "Mul", {"safe_log", "t4"}},
   });
   // clang-format on
 }
diff --git a/tensorflow/core/ops/math_grad_test.cc b/tensorflow/core/ops/math_grad_test.cc
index b38dd2b548..9af73b2da0 100644
--- a/tensorflow/core/ops/math_grad_test.cc
+++ b/tensorflow/core/ops/math_grad_test.cc
@@ -651,44 +651,36 @@ TEST_F(MathGradTest, Div) {
 }
 
 TEST_F(MathGradTest, Pow) {
-  auto x = test::AsTensor<float>({1.f, 2.f, 3.f, 4.f, 5.f, 6.f},
+  auto x = test::AsTensor<float>({0.f, 1.f, 2.f, 3.f, 4.f, 5.f},
                                  TensorShape({2, 3}));
   auto y = test::AsTensor<float>({.5f, 2.f}, TensorShape({2, 1}));
   Tensor dx;
   Tensor dy;
+  auto g = [](float x, float y) { return y * std::pow(x, y - 1); };
+  auto h = [](float x, float y) {
+    return std::pow(x, y) * (x ? std::log(x) : 0);
+  };
   {
     SymGrad("Pow", x, y, &dx, &dy);
-    {
-      auto g = [](float x, float y) { return y * std::pow(x, y - 1); };
-      test::ExpectClose(
-          dx, test::AsTensor<float>({g(1.f, .5f), g(2.f, .5f), g(3.f, .5f),
-                                     g(4.f, 2.f), g(5.f, 2.f), g(6.f, 2.f)},
-                                    TensorShape({2, 3})));
-    }
-    {
-      auto g = [](float x, float y) { return std::pow(x, y) * std::log(x); };
-      test::ExpectClose(
-          dy, test::AsTensor<float>({g(1.f, .5f) + g(2.f, .5f) + g(3.f, .5f),
-                                     g(4.f, 2.f) + g(5.f, 2.f) + g(6.f, 2.f)},
-                                    TensorShape({2, 1})));
-    }
+    test::ExpectClose(
+        dx, test::AsTensor<float>({g(0.f, .5f), g(1.f, .5f), g(2.f, .5f),
+                                   g(3.f, 2.f), g(4.f, 2.f), g(5.f, 2.f)},
+                                  TensorShape({2, 3})));
+    test::ExpectClose(
+        dy, test::AsTensor<float>({h(0.f, .5f) + h(1.f, .5f) + h(2.f, .5f),
+                                   h(3.f, 2.f) + h(4.f, 2.f) + h(5.f, 2.f)},
+                                  TensorShape({2, 1})));
   }
   {  // Swap x and y
     SymGrad("Pow", y, x, &dy, &dx);
-    {
-      auto g = [](float x, float y) { return y * std::pow(x, y - 1); };
-      test::ExpectClose(
-          dy, test::AsTensor<float>({g(.5f, 1.f) + g(.5f, 2.f) + g(.5f, 3.f),
-                                     g(2.f, 4.f) + g(2.f, 5.f) + g(2.f, 6.f)},
-                                    TensorShape({2, 1})));
-    }
-    {
-      auto g = [](float x, float y) { return std::pow(x, y) * std::log(x); };
-      test::ExpectClose(
-          dx, test::AsTensor<float>({g(.5f, 1.f), g(.5f, 2.f), g(.5f, 3.f),
-                                     g(2.f, 4.f), g(2.f, 5.f), g(2.f, 6.f)},
-                                    TensorShape({2, 3})));
-    }
+    test::ExpectClose(
+        dy, test::AsTensor<float>({g(.5f, 0.f) + g(.5f, 1.f) + g(.5f, 2.f),
+                                   g(2.f, 3.f) + g(2.f, 4.f) + g(2.f, 5.f)},
+                                  TensorShape({2, 1})));
+    test::ExpectClose(
+        dx, test::AsTensor<float>({h(.5f, 0.f), h(.5f, 1.f), h(.5f, 2.f),
+                                   h(2.f, 3.f), h(2.f, 4.f), h(2.f, 5.f)},
+                                  TensorShape({2, 3})));
   }
 }
 
diff --git a/tensorflow/python/kernel_tests/cwise_ops_test.py b/tensorflow/python/kernel_tests/cwise_ops_test.py
index 093da97469..e5406ff87a 100644
--- a/tensorflow/python/kernel_tests/cwise_ops_test.py
+++ b/tensorflow/python/kernel_tests/cwise_ops_test.py
@@ -990,6 +990,15 @@ class BinaryOpTest(tf.test.TestCase):
         func(tf.convert_to_tensor([10.0, 20.0, 30.0]),
              tf.convert_to_tensor([[40.0, 50.0], [60.0, 70.0]]))
 
+  def testZeroPowGrad(self):
+    with self.test_session():
+      for dtype in np.float16, np.float32, np.float64:
+        x = tf.constant(0.0, dtype=dtype)
+        y = tf.constant(2.0, dtype=dtype)
+        z = tf.pow(x, y)
+        error = tf.test.compute_gradient_error(y, [], z, [])
+        self.assertEqual(error, 0)
+
 
 class ComparisonOpTest(tf.test.TestCase):
 
diff --git a/tensorflow/python/ops/math_grad.py b/tensorflow/python/ops/math_grad.py
index 78843e33e7..9e7a922b2a 100644
--- a/tensorflow/python/ops/math_grad.py
+++ b/tensorflow/python/ops/math_grad.py
@@ -523,8 +523,10 @@ def _PowGrad(op, grad):
   rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy)
   gx = array_ops.reshape(
       math_ops.reduce_sum(grad * y * math_ops.pow(x, y - 1), rx), sx)
+  # Avoid false singularity at x = 0
+  log_x = math_ops.select(x > 0, math_ops.log(x), array_ops.zeros_like(x))
   gy = array_ops.reshape(
-      math_ops.reduce_sum(grad * z * math_ops.log(x), ry), sy)
+      math_ops.reduce_sum(grad * z * log_x, ry), sy)
   return gx, gy
author	Geoffrey Irving <geoffreyi@google.com>	2016-07-18 13:56:52 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2016-07-18 15:03:31 -0700
commit	a33ebe0bfc0c6d0c617d48a71f6f5136be881041 (patch)
tree	a2ab36aacb8335a22d18a4be592f2d38476c32f8
parent	d72d45b6701279e12a0dd8325f143162b9060f33 (diff)