aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Geoffrey Irving <geoffreyi@google.com>2016-07-18 13:56:52 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-07-18 15:03:31 -0700
commita33ebe0bfc0c6d0c617d48a71f6f5136be881041 (patch)
treea2ab36aacb8335a22d18a4be592f2d38476c32f8
parentd72d45b6701279e12a0dd8325f143162b9060f33 (diff)
Fix tf.pow gradient at zero
d(x ^ y)/dy mentions log(x), which creates false singularities if x = y. Instead, use x > 0 ? log(x) : 0. Fixes #2295. Change: 127762504
-rw-r--r--tensorflow/core/framework/tensor_testutil.cc3
-rw-r--r--tensorflow/core/ops/math_grad.cc15
-rw-r--r--tensorflow/core/ops/math_grad_test.cc50
-rw-r--r--tensorflow/python/kernel_tests/cwise_ops_test.py9
-rw-r--r--tensorflow/python/ops/math_grad.py4
5 files changed, 45 insertions, 36 deletions
diff --git a/tensorflow/core/framework/tensor_testutil.cc b/tensorflow/core/framework/tensor_testutil.cc
index b2f5329c7d..e307d25268 100644
--- a/tensorflow/core/framework/tensor_testutil.cc
+++ b/tensorflow/core/framework/tensor_testutil.cc
@@ -21,7 +21,8 @@ namespace test {
template <typename T>
bool IsClose(const T& x, const T& y, double atol, double rtol) {
- return fabs(x - y) < atol + rtol * fabs(x);
+ // Need x == y so that infinities are close to themselves
+ return x == y || fabs(x - y) < atol + rtol * fabs(x);
}
template <typename T>
diff --git a/tensorflow/core/ops/math_grad.cc b/tensorflow/core/ops/math_grad.cc
index 5d64b0986e..1d8f45ea7a 100644
--- a/tensorflow/core/ops/math_grad.cc
+++ b/tensorflow/core/ops/math_grad.cc
@@ -378,16 +378,21 @@ Status PowGrad(const AttrSlice& attrs, FunctionDef* g) {
return GradForBinaryCwise(g, {
{{"z"}, "Pow", {"x", "y"}},
// dz * y * Pow(x, y - 1)
- FDH::Const("const", 1.0f),
- {{"one"}, "Cast", {"const"}, {{"SrcT", DT_FLOAT}, {"DstT", "$T"}}},
+ FDH::Const("const_zero", 0.0f),
+ FDH::Const("const_one", 1.0f),
+ {{"zero"}, "Cast", {"const_zero"}, {{"SrcT", DT_FLOAT}, {"DstT", "$T"}}},
+ {{"one"}, "Cast", {"const_one"}, {{"SrcT", DT_FLOAT}, {"DstT", "$T"}}},
{{"t0"}, "Sub", {"y", "one"}, {}, {"dz"}},
{{"t1"}, "Pow", {"x", "t0"}},
{{"t2"}, "Mul", {"dz", "y"}},
{{"gx"}, "Mul", {"t1", "t2"}},
- // dz * z * Log(x)
- {{"t3"}, "Log", {"x"}, {}, {"dz"}},
+ // dz * z * (x > 0 ? Log(x) : 0)
+ {{"pos_x"}, "Greater", {"x", "zero"}},
+ {{"unsafe_log"}, "Log", {"x"}, {}, {"dz"}},
+ {{"zeros"}, "ZerosLike", {"x"}},
+ {{"safe_log"}, "Select", {"pos_x", "unsafe_log", "zeros"}},
{{"t4"}, "Mul", {"dz", "z"}},
- {{"gy"}, "Mul", {"t3", "t4"}},
+ {{"gy"}, "Mul", {"safe_log", "t4"}},
});
// clang-format on
}
diff --git a/tensorflow/core/ops/math_grad_test.cc b/tensorflow/core/ops/math_grad_test.cc
index b38dd2b548..9af73b2da0 100644
--- a/tensorflow/core/ops/math_grad_test.cc
+++ b/tensorflow/core/ops/math_grad_test.cc
@@ -651,44 +651,36 @@ TEST_F(MathGradTest, Div) {
}
TEST_F(MathGradTest, Pow) {
- auto x = test::AsTensor<float>({1.f, 2.f, 3.f, 4.f, 5.f, 6.f},
+ auto x = test::AsTensor<float>({0.f, 1.f, 2.f, 3.f, 4.f, 5.f},
TensorShape({2, 3}));
auto y = test::AsTensor<float>({.5f, 2.f}, TensorShape({2, 1}));
Tensor dx;
Tensor dy;
+ auto g = [](float x, float y) { return y * std::pow(x, y - 1); };
+ auto h = [](float x, float y) {
+ return std::pow(x, y) * (x ? std::log(x) : 0);
+ };
{
SymGrad("Pow", x, y, &dx, &dy);
- {
- auto g = [](float x, float y) { return y * std::pow(x, y - 1); };
- test::ExpectClose(
- dx, test::AsTensor<float>({g(1.f, .5f), g(2.f, .5f), g(3.f, .5f),
- g(4.f, 2.f), g(5.f, 2.f), g(6.f, 2.f)},
- TensorShape({2, 3})));
- }
- {
- auto g = [](float x, float y) { return std::pow(x, y) * std::log(x); };
- test::ExpectClose(
- dy, test::AsTensor<float>({g(1.f, .5f) + g(2.f, .5f) + g(3.f, .5f),
- g(4.f, 2.f) + g(5.f, 2.f) + g(6.f, 2.f)},
- TensorShape({2, 1})));
- }
+ test::ExpectClose(
+ dx, test::AsTensor<float>({g(0.f, .5f), g(1.f, .5f), g(2.f, .5f),
+ g(3.f, 2.f), g(4.f, 2.f), g(5.f, 2.f)},
+ TensorShape({2, 3})));
+ test::ExpectClose(
+ dy, test::AsTensor<float>({h(0.f, .5f) + h(1.f, .5f) + h(2.f, .5f),
+ h(3.f, 2.f) + h(4.f, 2.f) + h(5.f, 2.f)},
+ TensorShape({2, 1})));
}
{ // Swap x and y
SymGrad("Pow", y, x, &dy, &dx);
- {
- auto g = [](float x, float y) { return y * std::pow(x, y - 1); };
- test::ExpectClose(
- dy, test::AsTensor<float>({g(.5f, 1.f) + g(.5f, 2.f) + g(.5f, 3.f),
- g(2.f, 4.f) + g(2.f, 5.f) + g(2.f, 6.f)},
- TensorShape({2, 1})));
- }
- {
- auto g = [](float x, float y) { return std::pow(x, y) * std::log(x); };
- test::ExpectClose(
- dx, test::AsTensor<float>({g(.5f, 1.f), g(.5f, 2.f), g(.5f, 3.f),
- g(2.f, 4.f), g(2.f, 5.f), g(2.f, 6.f)},
- TensorShape({2, 3})));
- }
+ test::ExpectClose(
+ dy, test::AsTensor<float>({g(.5f, 0.f) + g(.5f, 1.f) + g(.5f, 2.f),
+ g(2.f, 3.f) + g(2.f, 4.f) + g(2.f, 5.f)},
+ TensorShape({2, 1})));
+ test::ExpectClose(
+ dx, test::AsTensor<float>({h(.5f, 0.f), h(.5f, 1.f), h(.5f, 2.f),
+ h(2.f, 3.f), h(2.f, 4.f), h(2.f, 5.f)},
+ TensorShape({2, 3})));
}
}
diff --git a/tensorflow/python/kernel_tests/cwise_ops_test.py b/tensorflow/python/kernel_tests/cwise_ops_test.py
index 093da97469..e5406ff87a 100644
--- a/tensorflow/python/kernel_tests/cwise_ops_test.py
+++ b/tensorflow/python/kernel_tests/cwise_ops_test.py
@@ -990,6 +990,15 @@ class BinaryOpTest(tf.test.TestCase):
func(tf.convert_to_tensor([10.0, 20.0, 30.0]),
tf.convert_to_tensor([[40.0, 50.0], [60.0, 70.0]]))
+ def testZeroPowGrad(self):
+ with self.test_session():
+ for dtype in np.float16, np.float32, np.float64:
+ x = tf.constant(0.0, dtype=dtype)
+ y = tf.constant(2.0, dtype=dtype)
+ z = tf.pow(x, y)
+ error = tf.test.compute_gradient_error(y, [], z, [])
+ self.assertEqual(error, 0)
+
class ComparisonOpTest(tf.test.TestCase):
diff --git a/tensorflow/python/ops/math_grad.py b/tensorflow/python/ops/math_grad.py
index 78843e33e7..9e7a922b2a 100644
--- a/tensorflow/python/ops/math_grad.py
+++ b/tensorflow/python/ops/math_grad.py
@@ -523,8 +523,10 @@ def _PowGrad(op, grad):
rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy)
gx = array_ops.reshape(
math_ops.reduce_sum(grad * y * math_ops.pow(x, y - 1), rx), sx)
+ # Avoid false singularity at x = 0
+ log_x = math_ops.select(x > 0, math_ops.log(x), array_ops.zeros_like(x))
gy = array_ops.reshape(
- math_ops.reduce_sum(grad * z * math_ops.log(x), ry), sy)
+ math_ops.reduce_sum(grad * z * log_x, ry), sy)
return gx, gy