diff options
author | Geoffrey Irving <geoffreyi@google.com> | 2016-07-18 13:56:52 -0800 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2016-07-18 15:03:31 -0700 |
commit | a33ebe0bfc0c6d0c617d48a71f6f5136be881041 (patch) | |
tree | a2ab36aacb8335a22d18a4be592f2d38476c32f8 | |
parent | d72d45b6701279e12a0dd8325f143162b9060f33 (diff) |
Fix tf.pow gradient at zero
d(x ^ y)/dy mentions log(x), which creates false singularities if x = y.
Instead, use x > 0 ? log(x) : 0.
Fixes #2295.
Change: 127762504
-rw-r--r-- | tensorflow/core/framework/tensor_testutil.cc | 3 | ||||
-rw-r--r-- | tensorflow/core/ops/math_grad.cc | 15 | ||||
-rw-r--r-- | tensorflow/core/ops/math_grad_test.cc | 50 | ||||
-rw-r--r-- | tensorflow/python/kernel_tests/cwise_ops_test.py | 9 | ||||
-rw-r--r-- | tensorflow/python/ops/math_grad.py | 4 |
5 files changed, 45 insertions, 36 deletions
diff --git a/tensorflow/core/framework/tensor_testutil.cc b/tensorflow/core/framework/tensor_testutil.cc index b2f5329c7d..e307d25268 100644 --- a/tensorflow/core/framework/tensor_testutil.cc +++ b/tensorflow/core/framework/tensor_testutil.cc @@ -21,7 +21,8 @@ namespace test { template <typename T> bool IsClose(const T& x, const T& y, double atol, double rtol) { - return fabs(x - y) < atol + rtol * fabs(x); + // Need x == y so that infinities are close to themselves + return x == y || fabs(x - y) < atol + rtol * fabs(x); } template <typename T> diff --git a/tensorflow/core/ops/math_grad.cc b/tensorflow/core/ops/math_grad.cc index 5d64b0986e..1d8f45ea7a 100644 --- a/tensorflow/core/ops/math_grad.cc +++ b/tensorflow/core/ops/math_grad.cc @@ -378,16 +378,21 @@ Status PowGrad(const AttrSlice& attrs, FunctionDef* g) { return GradForBinaryCwise(g, { {{"z"}, "Pow", {"x", "y"}}, // dz * y * Pow(x, y - 1) - FDH::Const("const", 1.0f), - {{"one"}, "Cast", {"const"}, {{"SrcT", DT_FLOAT}, {"DstT", "$T"}}}, + FDH::Const("const_zero", 0.0f), + FDH::Const("const_one", 1.0f), + {{"zero"}, "Cast", {"const_zero"}, {{"SrcT", DT_FLOAT}, {"DstT", "$T"}}}, + {{"one"}, "Cast", {"const_one"}, {{"SrcT", DT_FLOAT}, {"DstT", "$T"}}}, {{"t0"}, "Sub", {"y", "one"}, {}, {"dz"}}, {{"t1"}, "Pow", {"x", "t0"}}, {{"t2"}, "Mul", {"dz", "y"}}, {{"gx"}, "Mul", {"t1", "t2"}}, - // dz * z * Log(x) - {{"t3"}, "Log", {"x"}, {}, {"dz"}}, + // dz * z * (x > 0 ? Log(x) : 0) + {{"pos_x"}, "Greater", {"x", "zero"}}, + {{"unsafe_log"}, "Log", {"x"}, {}, {"dz"}}, + {{"zeros"}, "ZerosLike", {"x"}}, + {{"safe_log"}, "Select", {"pos_x", "unsafe_log", "zeros"}}, {{"t4"}, "Mul", {"dz", "z"}}, - {{"gy"}, "Mul", {"t3", "t4"}}, + {{"gy"}, "Mul", {"safe_log", "t4"}}, }); // clang-format on } diff --git a/tensorflow/core/ops/math_grad_test.cc b/tensorflow/core/ops/math_grad_test.cc index b38dd2b548..9af73b2da0 100644 --- a/tensorflow/core/ops/math_grad_test.cc +++ b/tensorflow/core/ops/math_grad_test.cc @@ -651,44 +651,36 @@ TEST_F(MathGradTest, Div) { } TEST_F(MathGradTest, Pow) { - auto x = test::AsTensor<float>({1.f, 2.f, 3.f, 4.f, 5.f, 6.f}, + auto x = test::AsTensor<float>({0.f, 1.f, 2.f, 3.f, 4.f, 5.f}, TensorShape({2, 3})); auto y = test::AsTensor<float>({.5f, 2.f}, TensorShape({2, 1})); Tensor dx; Tensor dy; + auto g = [](float x, float y) { return y * std::pow(x, y - 1); }; + auto h = [](float x, float y) { + return std::pow(x, y) * (x ? std::log(x) : 0); + }; { SymGrad("Pow", x, y, &dx, &dy); - { - auto g = [](float x, float y) { return y * std::pow(x, y - 1); }; - test::ExpectClose( - dx, test::AsTensor<float>({g(1.f, .5f), g(2.f, .5f), g(3.f, .5f), - g(4.f, 2.f), g(5.f, 2.f), g(6.f, 2.f)}, - TensorShape({2, 3}))); - } - { - auto g = [](float x, float y) { return std::pow(x, y) * std::log(x); }; - test::ExpectClose( - dy, test::AsTensor<float>({g(1.f, .5f) + g(2.f, .5f) + g(3.f, .5f), - g(4.f, 2.f) + g(5.f, 2.f) + g(6.f, 2.f)}, - TensorShape({2, 1}))); - } + test::ExpectClose( + dx, test::AsTensor<float>({g(0.f, .5f), g(1.f, .5f), g(2.f, .5f), + g(3.f, 2.f), g(4.f, 2.f), g(5.f, 2.f)}, + TensorShape({2, 3}))); + test::ExpectClose( + dy, test::AsTensor<float>({h(0.f, .5f) + h(1.f, .5f) + h(2.f, .5f), + h(3.f, 2.f) + h(4.f, 2.f) + h(5.f, 2.f)}, + TensorShape({2, 1}))); } { // Swap x and y SymGrad("Pow", y, x, &dy, &dx); - { - auto g = [](float x, float y) { return y * std::pow(x, y - 1); }; - test::ExpectClose( - dy, test::AsTensor<float>({g(.5f, 1.f) + g(.5f, 2.f) + g(.5f, 3.f), - g(2.f, 4.f) + g(2.f, 5.f) + g(2.f, 6.f)}, - TensorShape({2, 1}))); - } - { - auto g = [](float x, float y) { return std::pow(x, y) * std::log(x); }; - test::ExpectClose( - dx, test::AsTensor<float>({g(.5f, 1.f), g(.5f, 2.f), g(.5f, 3.f), - g(2.f, 4.f), g(2.f, 5.f), g(2.f, 6.f)}, - TensorShape({2, 3}))); - } + test::ExpectClose( + dy, test::AsTensor<float>({g(.5f, 0.f) + g(.5f, 1.f) + g(.5f, 2.f), + g(2.f, 3.f) + g(2.f, 4.f) + g(2.f, 5.f)}, + TensorShape({2, 1}))); + test::ExpectClose( + dx, test::AsTensor<float>({h(.5f, 0.f), h(.5f, 1.f), h(.5f, 2.f), + h(2.f, 3.f), h(2.f, 4.f), h(2.f, 5.f)}, + TensorShape({2, 3}))); } } diff --git a/tensorflow/python/kernel_tests/cwise_ops_test.py b/tensorflow/python/kernel_tests/cwise_ops_test.py index 093da97469..e5406ff87a 100644 --- a/tensorflow/python/kernel_tests/cwise_ops_test.py +++ b/tensorflow/python/kernel_tests/cwise_ops_test.py @@ -990,6 +990,15 @@ class BinaryOpTest(tf.test.TestCase): func(tf.convert_to_tensor([10.0, 20.0, 30.0]), tf.convert_to_tensor([[40.0, 50.0], [60.0, 70.0]])) + def testZeroPowGrad(self): + with self.test_session(): + for dtype in np.float16, np.float32, np.float64: + x = tf.constant(0.0, dtype=dtype) + y = tf.constant(2.0, dtype=dtype) + z = tf.pow(x, y) + error = tf.test.compute_gradient_error(y, [], z, []) + self.assertEqual(error, 0) + class ComparisonOpTest(tf.test.TestCase): diff --git a/tensorflow/python/ops/math_grad.py b/tensorflow/python/ops/math_grad.py index 78843e33e7..9e7a922b2a 100644 --- a/tensorflow/python/ops/math_grad.py +++ b/tensorflow/python/ops/math_grad.py @@ -523,8 +523,10 @@ def _PowGrad(op, grad): rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) gx = array_ops.reshape( math_ops.reduce_sum(grad * y * math_ops.pow(x, y - 1), rx), sx) + # Avoid false singularity at x = 0 + log_x = math_ops.select(x > 0, math_ops.log(x), array_ops.zeros_like(x)) gy = array_ops.reshape( - math_ops.reduce_sum(grad * z * math_ops.log(x), ry), sy) + math_ops.reduce_sum(grad * z * log_x, ry), sy) return gx, gy |