Add float16 support to tf.nn.fused_batch_norm on the GPU.

Scale, offset, mean, and variance must still be float32 if the input is float16. PiperOrigin-RevId: 170239448
author: Reed Wanderman-Milne <reedwm@google.com> 2017-09-27 12:58:14 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2017-09-27 13:04:57 -0700
commit: 759690f026a1a08b3ac5cc84d8498c05c32b2a7d (patch)
tree: 9c7ba12fef51b97226f4e0a07b9aa0eff7fccff1 /tensorflow/python/ops/nn_fused_batchnorm_test.py
parent: 20370104cd8adf4c3f9068dfe95bde54cccadfa5 (diff)
1 files changed, 279 insertions, 111 deletions
diff --git a/tensorflow/python/ops/nn_fused_batchnorm_test.py b/tensorflow/python/ops/nn_fused_batchnorm_test.py
index 1c1554e9f3..1fcd0384da 100644
--- a/tensorflow/python/ops/nn_fused_batchnorm_test.py
+++ b/tensorflow/python/ops/nn_fused_batchnorm_test.py
@@ -21,9 +21,11 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gradient_checker
 from tensorflow.python.ops import gradients_impl
+from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_grad
 from tensorflow.python.ops import nn_impl
 from tensorflow.python.platform import test
@@ -31,28 +33,38 @@ from tensorflow.python.platform import test
 
 class BatchNormalizationTest(test.TestCase):
 
+  def _batch_norm(self, x, mean, var, offset, scale, epsilon):
+    # We compute the batch norm manually in this function because
+    # nn_impl.batch_normalization does not support float16 yet.
+    # TODO(reedwm): Add float16 support to nn_impl.batch_normalization.
+    inv = math_ops.rsqrt(var + epsilon) * scale
+    y = math_ops.cast(x, scale.dtype) * inv + (offset - mean * inv)
+    return math_ops.cast(y, x.dtype)
+
   def _inference_ref(self, x, scale, offset, mean, var, epsilon, data_format):
     if data_format not in ['NHWC', 'NCHW']:
       raise ValueError('data_format must be NCHW or NHWC, '
                        'got %s.' % data_format)
     if data_format == 'NCHW':
       x = array_ops.transpose(x, [0, 2, 3, 1])
-    y = nn_impl.batch_normalization(x, mean, var, offset, scale, epsilon)
+    y = self._batch_norm(x, mean, var, offset, scale, epsilon)
     if data_format == 'NCHW':
       y = array_ops.transpose(y, [0, 3, 1, 2])
     return y.eval()
 
   def _test_inference(self,
                       x_shape,
+                      x_dtype,
                       scale_shape,
+                      scale_dtype,
                       use_gpu=True,
                       data_format='NHWC'):
     np.random.seed(1)
-    x_val = np.random.random_sample(x_shape).astype(np.float32)
-    scale_val = np.random.random_sample(scale_shape).astype(np.float32)
-    offset_val = np.random.random_sample(scale_shape).astype(np.float32)
-    mean_val = np.random.random_sample(scale_shape).astype(np.float32)
-    var_val = np.random.random_sample(scale_shape).astype(np.float32)
+    x_val = np.random.random_sample(x_shape).astype(x_dtype)
+    scale_val = np.random.random_sample(scale_shape).astype(scale_dtype)
+    offset_val = np.random.random_sample(scale_shape).astype(scale_dtype)
+    mean_val = np.random.random_sample(scale_shape).astype(scale_dtype)
+    var_val = np.random.random_sample(scale_shape).astype(scale_dtype)
 
     with self.test_session(use_gpu=use_gpu) as sess:
       x = constant_op.constant(x_val, name='x')
@@ -73,7 +85,11 @@ class BatchNormalizationTest(test.TestCase):
       y_val = sess.run(y)
       y_ref = self._inference_ref(x, scale, offset, mean, var, epsilon,
                                   data_format)
-    self.assertAllClose(y_ref, y_val, atol=1e-3)
+    # An atol value of 1e-3 is too small for float16's, because some adjacent
+    # float16 values that y_val can take are greater than 1e-3 apart, e.g.
+    # 2.16602 and 2.16797.
+    atol = 2e-3 if x_dtype == np.float16 else 1e-3
+    self.assertAllClose(y_ref, y_val, atol=atol)
 
   def _training_ref(self, x, scale, offset, epsilon, data_format):
     if data_format not in ['NHWC', 'NCHW']:
@@ -81,21 +97,24 @@ class BatchNormalizationTest(test.TestCase):
                        'got %s.' % data_format)
     if data_format == 'NCHW':
       x = array_ops.transpose(x, [0, 2, 3, 1])
-    mean, var = nn_impl.moments(x, [0, 1, 2], keep_dims=False)
-    y = nn_impl.batch_normalization(x, mean, var, offset, scale, epsilon)
+    mean, var = nn_impl.moments(
+        math_ops.cast(x, scale.dtype), [0, 1, 2], keep_dims=False)
+    y = self._batch_norm(x, mean, var, offset, scale, epsilon)
     if data_format == 'NCHW':
       y = array_ops.transpose(y, [0, 3, 1, 2])
     return y.eval(), mean.eval(), var.eval()
 
   def _test_training(self,
                      x_shape,
+                     x_dtype,
                      scale_shape,
+                     scale_dtype,
                      use_gpu=True,
                      data_format='NHWC'):
     np.random.seed(1)
-    x_val = np.random.random_sample(x_shape).astype(np.float32)
-    scale_val = np.random.random_sample(scale_shape).astype(np.float32)
-    offset_val = np.random.random_sample(scale_shape).astype(np.float32)
+    x_val = np.random.random_sample(x_shape).astype(x_dtype)
+    scale_val = np.random.random_sample(scale_shape).astype(scale_dtype)
+    offset_val = np.random.random_sample(scale_shape).astype(scale_dtype)
     with self.test_session(use_gpu=use_gpu) as sess:
       x = constant_op.constant(x_val, name='x')
       scale = constant_op.constant(scale_val, name='scale')
@@ -111,7 +130,8 @@ class BatchNormalizationTest(test.TestCase):
       y_val, mean_val, var_val = sess.run([y, mean, var])
       y_ref, mean_ref, var_ref = self._training_ref(x, scale, offset, epsilon,
                                                     data_format)
-    self.assertAllClose(y_ref, y_val, atol=1e-3)
+    y_atol = 2e-3 if x_dtype == np.float16 else 1e-3
+    self.assertAllClose(y_ref, y_val, atol=y_atol)
     self.assertAllClose(mean_ref, mean_val, atol=1e-3)
     # This is for Bessel's correction. tf.nn.moments uses n, instead of n-1, as
     # the denominator in the formula to calculate variance, while
@@ -120,16 +140,51 @@ class BatchNormalizationTest(test.TestCase):
     var_ref = var_ref * sample_size / (max(sample_size - 1.0, 1.0))
     self.assertAllClose(var_ref, var_val, atol=1e-3)
 
+  def _compute_gradient_error_float16(self, x, x32, x_shape, y, y32, y_shape):
+    """Computes the gradient error for float16 inputs and/or outputs.
+
+    This returns the same value as gradient_checker.compute_gradient_error. The
+    difference is that gradient_checker.compute_gradient_error does not
+    numerically compute the gradients in a numerically stable way for float16
+    tensors. To fix this, this function requires float32 versions of x and y to
+    numerically compute the gradients, to compare with the float16 symbolically
+    computed gradients.
+
+    Args:
+      x: The input tensor.
+      x32: A float32 version of x.
+      x_shape: The shape of x.
+      y: The output tensor.
+      y32: A float32 version of y. Must be calculated based on x32, not x.
+      y_shape: The shape of y.
+
+    Returns:
+      The maximum error in between the two Jacobians, as in
+      gradient_checker.compute_gradient_error.
+    """
+    x_init_val = np.random.random_sample(x_shape).astype(np.float16)
+    x32_init_val = x_init_val.astype(np.float32)
+
+    # TODO(reedwm): Do not perform the unnecessary computations in
+    # compute_gradient, since they double the computation time of this function.
+    theoretical_grad, _ = gradient_checker.compute_gradient(
+        x, x_shape, y, y_shape, delta=1e-3, x_init_value=x_init_val)
+    _, numerical_grad = gradient_checker.compute_gradient(
+        x32, x_shape, y32, y_shape, delta=1e-3, x_init_value=x32_init_val)
+    return np.fabs(theoretical_grad - numerical_grad).max()
+
   def _test_gradient(self,
                      x_shape,
+                     x_dtype,
                      scale_shape,
+                     scale_dtype,
                      use_gpu=True,
                      data_format='NHWC',
                      is_training=True):
     np.random.seed(1)
-    x_val = np.random.random_sample(x_shape).astype(np.float32)
-    scale_val = np.random.random_sample(scale_shape).astype(np.float32)
-    offset_val = np.random.random_sample(scale_shape).astype(np.float32)
+    x_val = np.random.random_sample(x_shape).astype(x_dtype)
+    scale_val = np.random.random_sample(scale_shape).astype(scale_dtype)
+    offset_val = np.random.random_sample(scale_shape).astype(scale_dtype)
 
     with self.test_session(use_gpu=use_gpu):
       x = constant_op.constant(x_val, name='x')
@@ -139,8 +194,8 @@ class BatchNormalizationTest(test.TestCase):
         pop_mean = None
         pop_var = None
       else:
-        pop_mean = np.random.random_sample(scale_shape).astype(np.float32)
-        pop_var = np.random.random_sample(scale_shape).astype(np.float32)
+        pop_mean = np.random.random_sample(scale_shape).astype(scale_dtype)
+        pop_var = np.random.random_sample(scale_shape).astype(scale_dtype)
       y, _, _ = nn_impl.fused_batch_norm(
           x,
           scale,
@@ -149,28 +204,49 @@ class BatchNormalizationTest(test.TestCase):
           variance=pop_var,
           data_format=data_format,
           is_training=is_training)
-      err_x = gradient_checker.compute_gradient_error(x, x_shape, y, x_shape)
-      err_scale = gradient_checker.compute_gradient_error(scale, scale_shape, y,
-                                                          x_shape)
-      err_offset = gradient_checker.compute_gradient_error(offset, scale_shape,
-                                                           y, x_shape)
-    err_tolerance = 1e-3
-    self.assertLess(err_x, err_tolerance)
-    self.assertLess(err_scale, err_tolerance)
-    self.assertLess(err_offset, err_tolerance)
+      if x_dtype != np.float16:
+        err_x = gradient_checker.compute_gradient_error(x, x_shape, y, x_shape)
+        err_scale = gradient_checker.compute_gradient_error(
+            scale, scale_shape, y, x_shape)
+        err_offset = gradient_checker.compute_gradient_error(
+            offset, scale_shape, y, x_shape)
+      else:
+        x32 = constant_op.constant(x_val, name='x32', dtype=dtypes.float32)
+        y32, _, _ = nn_impl.fused_batch_norm(
+            x32,
+            scale,
+            offset,
+            mean=pop_mean,
+            variance=pop_var,
+            data_format=data_format,
+            is_training=is_training)
+        err_x = self._compute_gradient_error_float16(x, x32, x_shape, y, y32,
+                                                     x_shape)
+        err_scale = self._compute_gradient_error_float16(
+            scale, scale, scale_shape, y, y32, x_shape)
+        err_offset = self._compute_gradient_error_float16(
+            offset, offset, scale_shape, y, y32, x_shape)
+
+    x_err_tolerance = 2e-3 if x_dtype == np.float16 else 1e-3
+    scale_err_tolerance = 1e-3
+    self.assertLess(err_x, x_err_tolerance)
+    self.assertLess(err_scale, scale_err_tolerance)
+    self.assertLess(err_offset, scale_err_tolerance)
 
   def _test_grad_grad(self,
                       x_shape,
+                      x_dtype,
                       scale_shape,
+                      scale_dtype,
                       use_gpu=True,
                       data_format='NHWC',
                       is_training=True,
                       err_tolerance=1e-3):
     np.random.seed(1)
-    x_val = np.random.random_sample(x_shape).astype(np.float32)
-    grad_y_val = np.random.random_sample(x_shape).astype(np.float32)
-    scale_val = np.random.random_sample(scale_shape).astype(np.float32)
-    offset_val = np.random.random_sample(scale_shape).astype(np.float32)
+    x_val = np.random.random_sample(x_shape).astype(x_dtype)
+    grad_y_val = np.random.random_sample(x_shape).astype(x_dtype)
+    scale_val = np.random.random_sample(scale_shape).astype(scale_dtype)
+    offset_val = np.random.random_sample(scale_shape).astype(scale_dtype)
 
     with self.test_session(use_gpu=use_gpu) as sess:
       x = constant_op.constant(x_val, name='x')
@@ -181,8 +257,8 @@ class BatchNormalizationTest(test.TestCase):
         pop_mean = None
         pop_var = None
       else:
-        pop_mean = np.random.random_sample(scale_shape).astype(np.float32)
-        pop_var = np.random.random_sample(scale_shape).astype(np.float32)
+        pop_mean = np.random.random_sample(scale_shape).astype(scale_dtype)
+        pop_var = np.random.random_sample(scale_shape).astype(scale_dtype)
       y, _, _ = nn_impl.fused_batch_norm(
           x,
           scale,
@@ -203,21 +279,51 @@ class BatchNormalizationTest(test.TestCase):
         for grad_val, grad_internal_val in zip(grad_vals, grad_internal_vals):
           self.assertAllClose(grad_val, grad_internal_val, atol=err_tolerance)
 
-      err_grad_grad_y_1 = gradient_checker.compute_gradient_error(
-          grad_y, x_shape, grad_x, x_shape)
-      err_grad_grad_y_2 = gradient_checker.compute_gradient_error(
-          grad_y, x_shape, grad_scale, scale_shape)
-      err_grad_grad_y_3 = gradient_checker.compute_gradient_error(
-          grad_y, x_shape, grad_offset, scale_shape)
-      # In freeze mode, grad_x is not a function of x.
-      if is_training:
-        err_grad_x_1 = gradient_checker.compute_gradient_error(
-            x, x_shape, grad_x, x_shape)
-      err_grad_x_2 = gradient_checker.compute_gradient_error(
-          x, x_shape, grad_scale, scale_shape)
-
-      err_grad_scale = gradient_checker.compute_gradient_error(
-          scale, scale_shape, grad_x, x_shape)
+      if x_dtype != np.float16:
+        err_grad_grad_y_1 = gradient_checker.compute_gradient_error(
+            grad_y, x_shape, grad_x, x_shape)
+        err_grad_grad_y_2 = gradient_checker.compute_gradient_error(
+            grad_y, x_shape, grad_scale, scale_shape)
+        err_grad_grad_y_3 = gradient_checker.compute_gradient_error(
+            grad_y, x_shape, grad_offset, scale_shape)
+        # In freeze mode, grad_x is not a function of x.
+        if is_training:
+          err_grad_x_1 = gradient_checker.compute_gradient_error(
+              x, x_shape, grad_x, x_shape)
+        err_grad_x_2 = gradient_checker.compute_gradient_error(
+            x, x_shape, grad_scale, scale_shape)
+
+        err_grad_scale = gradient_checker.compute_gradient_error(
+            scale, scale_shape, grad_x, x_shape)
+      else:
+        x32 = constant_op.constant(x_val, dtype=dtypes.float32, name='x32')
+        grad_y32 = constant_op.constant(
+            grad_y_val, dtype=dtypes.float32, name='grad_y32')
+        y32, _, _ = nn_impl.fused_batch_norm(
+            x32,
+            scale,
+            offset,
+            mean=pop_mean,
+            variance=pop_var,
+            data_format=data_format,
+            is_training=is_training)
+        grad_x32, grad_scale32, grad_offset32 = gradients_impl.gradients(
+            y32, [x32, scale, offset], grad_y32)
+        err_grad_grad_y_1 = self._compute_gradient_error_float16(
+            grad_y, grad_y32, x_shape, grad_x, grad_x32, x_shape)
+        err_grad_grad_y_2 = self._compute_gradient_error_float16(
+            grad_y, grad_y32, x_shape, grad_scale, grad_scale32, scale_shape)
+        err_grad_grad_y_3 = self._compute_gradient_error_float16(
+            grad_y, grad_y32, x_shape, grad_offset, grad_offset32, scale_shape)
+        # In freeze mode, grad_x is not a function of x.
+        if is_training:
+          err_grad_x_1 = self._compute_gradient_error_float16(
+              x, x32, x_shape, grad_x, grad_x32, x_shape)
+        err_grad_x_2 = self._compute_gradient_error_float16(
+            x, x32, x_shape, grad_scale, grad_scale32, scale_shape)
+
+        err_grad_scale = self._compute_gradient_error_float16(
+            scale, scale, scale_shape, grad_x, grad_x32, x_shape)
 
     self.assertLess(err_grad_grad_y_1, err_tolerance)
     self.assertLess(err_grad_grad_y_2, err_tolerance)
@@ -230,102 +336,150 @@ class BatchNormalizationTest(test.TestCase):
   def testInference(self):
     x_shape = [1, 1, 6, 1]
     if test.is_gpu_available(cuda_only=True):
-      self._test_inference(x_shape, [1], use_gpu=True, data_format='NHWC')
-      self._test_inference(x_shape, [1], use_gpu=True, data_format='NCHW')
-    self._test_inference(x_shape, [1], use_gpu=False, data_format='NHWC')
+      for dtype in [np.float16, np.float32]:
+        self._test_inference(
+            x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NHWC')
+        self._test_inference(
+            x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NCHW')
+    self._test_inference(
+        x_shape, np.float32, [1], np.float32, use_gpu=False, data_format='NHWC')
 
     x_shape = [1, 1, 6, 2]
     if test.is_gpu_available(cuda_only=True):
-      self._test_inference(x_shape, [2], use_gpu=True, data_format='NHWC')
-    self._test_inference(x_shape, [2], use_gpu=False, data_format='NHWC')
+      for dtype in [np.float16, np.float32]:
+        self._test_inference(
+            x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NHWC')
+    self._test_inference(
+        x_shape, np.float32, [2], np.float32, use_gpu=False, data_format='NHWC')
 
     x_shape = [1, 2, 1, 6]
     if test.is_gpu_available(cuda_only=True):
-      self._test_inference(x_shape, [2], use_gpu=True, data_format='NCHW')
+      for dtype in [np.float16, np.float32]:
+        self._test_inference(
+            x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NCHW')
 
     x_shape = [27, 131, 127, 6]
     if test.is_gpu_available(cuda_only=True):
-      self._test_inference(x_shape, [131], use_gpu=True, data_format='NCHW')
-      self._test_inference(x_shape, [6], use_gpu=True, data_format='NHWC')
-    self._test_inference(x_shape, [6], use_gpu=False, data_format='NHWC')
+      for dtype in [np.float16, np.float32]:
+        self._test_inference(
+            x_shape, dtype, [131], np.float32, use_gpu=True, data_format='NCHW')
+        self._test_inference(
+            x_shape, dtype, [6], np.float32, use_gpu=True, data_format='NHWC')
+    self._test_inference(
+        x_shape, np.float32, [6], np.float32, use_gpu=False, data_format='NHWC')
 
   def testTraining(self):
     x_shape = [1, 1, 6, 1]
     if test.is_gpu_available(cuda_only=True):
-      self._test_training(x_shape, [1], use_gpu=True, data_format='NHWC')
-      self._test_training(x_shape, [1], use_gpu=True, data_format='NCHW')
-    self._test_training(x_shape, [1], use_gpu=False, data_format='NHWC')
+      for dtype in [np.float16, np.float32]:
+        self._test_training(
+            x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NHWC')
+        self._test_training(
+            x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NCHW')
+    self._test_training(
+        x_shape, np.float32, [1], np.float32, use_gpu=False, data_format='NHWC')
 
     x_shape = [1, 1, 6, 2]
     if test.is_gpu_available(cuda_only=True):
-      self._test_training(x_shape, [2], use_gpu=True, data_format='NHWC')
-    self._test_training(x_shape, [2], use_gpu=False, data_format='NHWC')
+      for dtype in [np.float16, np.float32]:
+        self._test_training(
+            x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NHWC')
+    self._test_training(
+        x_shape, np.float32, [2], np.float32, use_gpu=False, data_format='NHWC')
 
     x_shape = [1, 2, 1, 6]
     if test.is_gpu_available(cuda_only=True):
-      self._test_training(x_shape, [2], use_gpu=True, data_format='NCHW')
+      for dtype in [np.float16, np.float32]:
+        self._test_training(
+            x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NCHW')
 
     x_shape = [27, 131, 127, 6]
     if test.is_gpu_available(cuda_only=True):
-      self._test_training(x_shape, [131], use_gpu=True, data_format='NCHW')
-      self._test_training(x_shape, [6], use_gpu=True, data_format='NHWC')
-    self._test_training(x_shape, [6], use_gpu=False, data_format='NHWC')
+      for dtype in [np.float16, np.float32]:
+        self._test_training(
+            x_shape, dtype, [131], np.float32, use_gpu=True, data_format='NCHW')
+        self._test_training(
+            x_shape, dtype, [6], np.float32, use_gpu=True, data_format='NHWC')
+    self._test_training(
+        x_shape, np.float32, [6], np.float32, use_gpu=False, data_format='NHWC')
 
   def testBatchNormGrad(self):
     for is_training in [True, False]:
       x_shape = [1, 1, 6, 1]
       if test.is_gpu_available(cuda_only=True):
-        self._test_gradient(
-            x_shape, [1],
-            use_gpu=True,
-            data_format='NHWC',
-            is_training=is_training)
-        self._test_gradient(
-            x_shape, [1],
-            use_gpu=True,
-            data_format='NCHW',
-            is_training=is_training)
+        for dtype in [np.float16, np.float32]:
+          self._test_gradient(
+              x_shape,
+              dtype, [1],
+              np.float32,
+              use_gpu=True,
+              data_format='NHWC',
+              is_training=is_training)
+          self._test_gradient(
+              x_shape,
+              dtype, [1],
+              np.float32,
+              use_gpu=True,
+              data_format='NCHW',
+              is_training=is_training)
       self._test_gradient(
-          x_shape, [1],
+          x_shape,
+          np.float32, [1],
+          np.float32,
           use_gpu=False,
           data_format='NHWC',
           is_training=is_training)
 
       x_shape = [1, 1, 6, 2]
       if test.is_gpu_available(cuda_only=True):
-        self._test_gradient(
-            x_shape, [2],
-            use_gpu=True,
-            data_format='NHWC',
-            is_training=is_training)
+        for dtype in [np.float16, np.float32]:
+          self._test_gradient(
+              x_shape,
+              dtype, [2],
+              np.float32,
+              use_gpu=True,
+              data_format='NHWC',
+              is_training=is_training)
       self._test_gradient(
-          x_shape, [2],
+          x_shape,
+          np.float32, [2],
+          np.float32,
           use_gpu=False,
           data_format='NHWC',
           is_training=is_training)
 
       x_shape = [1, 2, 1, 6]
       if test.is_gpu_available(cuda_only=True):
-        self._test_gradient(
-            x_shape, [2],
-            use_gpu=True,
-            data_format='NCHW',
-            is_training=is_training)
-
-      x_shape = [7, 9, 13, 6]
+        for dtype in [np.float16, np.float32]:
+          self._test_gradient(
+              x_shape,
+              dtype, [2],
+              np.float32,
+              use_gpu=True,
+              data_format='NCHW',
+              is_training=is_training)
+
+      x_shape = [5, 7, 11, 4]
       if test.is_gpu_available(cuda_only=True):
-        self._test_gradient(
-            x_shape, [9],
-            use_gpu=True,
-            data_format='NCHW',
-            is_training=is_training)
-        self._test_gradient(
-            x_shape, [6],
-            use_gpu=True,
-            data_format='NHWC',
-            is_training=is_training)
+        for dtype in [np.float16, np.float32]:
+          self._test_gradient(
+              x_shape,
+              dtype, [7],
+              np.float32,
+              use_gpu=True,
+              data_format='NCHW',
+              is_training=is_training)
+          self._test_gradient(
+              x_shape,
+              dtype, [4],
+              np.float32,
+              use_gpu=True,
+              data_format='NHWC',
+              is_training=is_training)
       self._test_gradient(
-          x_shape, [6],
+          x_shape,
+          np.float32, [4],
+          np.float32,
           use_gpu=False,
           data_format='NHWC',
           is_training=is_training)
@@ -333,34 +487,48 @@ class BatchNormalizationTest(test.TestCase):
   def _testBatchNormGradGrad(self, config):
     shape = config['shape']
     err_tolerance = config['err_tolerance']
+    dtype = config['dtype']
     for is_training in [True, False]:
       if test.is_gpu_available(cuda_only=True):
         self._test_grad_grad(
-            shape, [shape[3]],
+            shape,
+            dtype, [shape[3]],
+            np.float32,
             use_gpu=True,
             data_format='NHWC',
             is_training=is_training,
             err_tolerance=err_tolerance)
         self._test_grad_grad(
-            shape, [shape[1]],
+            shape,
+            dtype, [shape[1]],
+            np.float32,
             use_gpu=True,
             data_format='NCHW',
             is_training=is_training,
             err_tolerance=err_tolerance)
-      self._test_grad_grad(
-          shape, [shape[3]],
-          use_gpu=False,
-          data_format='NHWC',
-          is_training=is_training,
-          err_tolerance=err_tolerance)
+      if dtype != np.float16:
+        self._test_grad_grad(
+            shape,
+            np.float32, [shape[3]],
+            np.float32,
+            use_gpu=False,
+            data_format='NHWC',
+            is_training=is_training,
+            err_tolerance=err_tolerance)
 
   def testBatchNormGradGrad(self):
     configs = [{
         'shape': [2, 3, 4, 5],
-        'err_tolerance': 1e-2
+        'err_tolerance': 1e-2,
+        'dtype': np.float32,
+    }, {
+        'shape': [2, 3, 2, 2],
+        'err_tolerance': 1e-3,
+        'dtype': np.float32,
     }, {
         'shape': [2, 3, 2, 2],
-        'err_tolerance': 1e-3
+        'err_tolerance': 2e-3,
+        'dtype': np.float16,
     }]
     for config in configs:
       self._testBatchNormGradGrad(config)
author	Reed Wanderman-Milne <reedwm@google.com>	2017-09-27 12:58:14 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2017-09-27 13:04:57 -0700
commit	759690f026a1a08b3ac5cc84d8498c05c32b2a7d (patch)
tree	9c7ba12fef51b97226f4e0a07b9aa0eff7fccff1 /tensorflow/python/ops/nn_fused_batchnorm_test.py
parent	20370104cd8adf4c3f9068dfe95bde54cccadfa5 (diff)