aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/python/ops/nn_fused_batchnorm_test.py
diff options
context:
space:
mode:
authorGravatar Reed Wanderman-Milne <reedwm@google.com>2017-09-27 12:58:14 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-09-27 13:04:57 -0700
commit759690f026a1a08b3ac5cc84d8498c05c32b2a7d (patch)
tree9c7ba12fef51b97226f4e0a07b9aa0eff7fccff1 /tensorflow/python/ops/nn_fused_batchnorm_test.py
parent20370104cd8adf4c3f9068dfe95bde54cccadfa5 (diff)
Add float16 support to tf.nn.fused_batch_norm on the GPU.
Scale, offset, mean, and variance must still be float32 if the input is float16. PiperOrigin-RevId: 170239448
Diffstat (limited to 'tensorflow/python/ops/nn_fused_batchnorm_test.py')
-rw-r--r--tensorflow/python/ops/nn_fused_batchnorm_test.py390
1 files changed, 279 insertions, 111 deletions
diff --git a/tensorflow/python/ops/nn_fused_batchnorm_test.py b/tensorflow/python/ops/nn_fused_batchnorm_test.py
index 1c1554e9f3..1fcd0384da 100644
--- a/tensorflow/python/ops/nn_fused_batchnorm_test.py
+++ b/tensorflow/python/ops/nn_fused_batchnorm_test.py
@@ -21,9 +21,11 @@ from __future__ import print_function
import numpy as np
from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import gradient_checker
from tensorflow.python.ops import gradients_impl
+from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn_grad
from tensorflow.python.ops import nn_impl
from tensorflow.python.platform import test
@@ -31,28 +33,38 @@ from tensorflow.python.platform import test
class BatchNormalizationTest(test.TestCase):
+ def _batch_norm(self, x, mean, var, offset, scale, epsilon):
+ # We compute the batch norm manually in this function because
+ # nn_impl.batch_normalization does not support float16 yet.
+ # TODO(reedwm): Add float16 support to nn_impl.batch_normalization.
+ inv = math_ops.rsqrt(var + epsilon) * scale
+ y = math_ops.cast(x, scale.dtype) * inv + (offset - mean * inv)
+ return math_ops.cast(y, x.dtype)
+
def _inference_ref(self, x, scale, offset, mean, var, epsilon, data_format):
if data_format not in ['NHWC', 'NCHW']:
raise ValueError('data_format must be NCHW or NHWC, '
'got %s.' % data_format)
if data_format == 'NCHW':
x = array_ops.transpose(x, [0, 2, 3, 1])
- y = nn_impl.batch_normalization(x, mean, var, offset, scale, epsilon)
+ y = self._batch_norm(x, mean, var, offset, scale, epsilon)
if data_format == 'NCHW':
y = array_ops.transpose(y, [0, 3, 1, 2])
return y.eval()
def _test_inference(self,
x_shape,
+ x_dtype,
scale_shape,
+ scale_dtype,
use_gpu=True,
data_format='NHWC'):
np.random.seed(1)
- x_val = np.random.random_sample(x_shape).astype(np.float32)
- scale_val = np.random.random_sample(scale_shape).astype(np.float32)
- offset_val = np.random.random_sample(scale_shape).astype(np.float32)
- mean_val = np.random.random_sample(scale_shape).astype(np.float32)
- var_val = np.random.random_sample(scale_shape).astype(np.float32)
+ x_val = np.random.random_sample(x_shape).astype(x_dtype)
+ scale_val = np.random.random_sample(scale_shape).astype(scale_dtype)
+ offset_val = np.random.random_sample(scale_shape).astype(scale_dtype)
+ mean_val = np.random.random_sample(scale_shape).astype(scale_dtype)
+ var_val = np.random.random_sample(scale_shape).astype(scale_dtype)
with self.test_session(use_gpu=use_gpu) as sess:
x = constant_op.constant(x_val, name='x')
@@ -73,7 +85,11 @@ class BatchNormalizationTest(test.TestCase):
y_val = sess.run(y)
y_ref = self._inference_ref(x, scale, offset, mean, var, epsilon,
data_format)
- self.assertAllClose(y_ref, y_val, atol=1e-3)
+ # An atol value of 1e-3 is too small for float16's, because some adjacent
+ # float16 values that y_val can take are greater than 1e-3 apart, e.g.
+ # 2.16602 and 2.16797.
+ atol = 2e-3 if x_dtype == np.float16 else 1e-3
+ self.assertAllClose(y_ref, y_val, atol=atol)
def _training_ref(self, x, scale, offset, epsilon, data_format):
if data_format not in ['NHWC', 'NCHW']:
@@ -81,21 +97,24 @@ class BatchNormalizationTest(test.TestCase):
'got %s.' % data_format)
if data_format == 'NCHW':
x = array_ops.transpose(x, [0, 2, 3, 1])
- mean, var = nn_impl.moments(x, [0, 1, 2], keep_dims=False)
- y = nn_impl.batch_normalization(x, mean, var, offset, scale, epsilon)
+ mean, var = nn_impl.moments(
+ math_ops.cast(x, scale.dtype), [0, 1, 2], keep_dims=False)
+ y = self._batch_norm(x, mean, var, offset, scale, epsilon)
if data_format == 'NCHW':
y = array_ops.transpose(y, [0, 3, 1, 2])
return y.eval(), mean.eval(), var.eval()
def _test_training(self,
x_shape,
+ x_dtype,
scale_shape,
+ scale_dtype,
use_gpu=True,
data_format='NHWC'):
np.random.seed(1)
- x_val = np.random.random_sample(x_shape).astype(np.float32)
- scale_val = np.random.random_sample(scale_shape).astype(np.float32)
- offset_val = np.random.random_sample(scale_shape).astype(np.float32)
+ x_val = np.random.random_sample(x_shape).astype(x_dtype)
+ scale_val = np.random.random_sample(scale_shape).astype(scale_dtype)
+ offset_val = np.random.random_sample(scale_shape).astype(scale_dtype)
with self.test_session(use_gpu=use_gpu) as sess:
x = constant_op.constant(x_val, name='x')
scale = constant_op.constant(scale_val, name='scale')
@@ -111,7 +130,8 @@ class BatchNormalizationTest(test.TestCase):
y_val, mean_val, var_val = sess.run([y, mean, var])
y_ref, mean_ref, var_ref = self._training_ref(x, scale, offset, epsilon,
data_format)
- self.assertAllClose(y_ref, y_val, atol=1e-3)
+ y_atol = 2e-3 if x_dtype == np.float16 else 1e-3
+ self.assertAllClose(y_ref, y_val, atol=y_atol)
self.assertAllClose(mean_ref, mean_val, atol=1e-3)
# This is for Bessel's correction. tf.nn.moments uses n, instead of n-1, as
# the denominator in the formula to calculate variance, while
@@ -120,16 +140,51 @@ class BatchNormalizationTest(test.TestCase):
var_ref = var_ref * sample_size / (max(sample_size - 1.0, 1.0))
self.assertAllClose(var_ref, var_val, atol=1e-3)
+ def _compute_gradient_error_float16(self, x, x32, x_shape, y, y32, y_shape):
+ """Computes the gradient error for float16 inputs and/or outputs.
+
+ This returns the same value as gradient_checker.compute_gradient_error. The
+ difference is that gradient_checker.compute_gradient_error does not
+ numerically compute the gradients in a numerically stable way for float16
+ tensors. To fix this, this function requires float32 versions of x and y to
+ numerically compute the gradients, to compare with the float16 symbolically
+ computed gradients.
+
+ Args:
+ x: The input tensor.
+ x32: A float32 version of x.
+ x_shape: The shape of x.
+ y: The output tensor.
+ y32: A float32 version of y. Must be calculated based on x32, not x.
+ y_shape: The shape of y.
+
+ Returns:
+ The maximum error in between the two Jacobians, as in
+ gradient_checker.compute_gradient_error.
+ """
+ x_init_val = np.random.random_sample(x_shape).astype(np.float16)
+ x32_init_val = x_init_val.astype(np.float32)
+
+ # TODO(reedwm): Do not perform the unnecessary computations in
+ # compute_gradient, since they double the computation time of this function.
+ theoretical_grad, _ = gradient_checker.compute_gradient(
+ x, x_shape, y, y_shape, delta=1e-3, x_init_value=x_init_val)
+ _, numerical_grad = gradient_checker.compute_gradient(
+ x32, x_shape, y32, y_shape, delta=1e-3, x_init_value=x32_init_val)
+ return np.fabs(theoretical_grad - numerical_grad).max()
+
def _test_gradient(self,
x_shape,
+ x_dtype,
scale_shape,
+ scale_dtype,
use_gpu=True,
data_format='NHWC',
is_training=True):
np.random.seed(1)
- x_val = np.random.random_sample(x_shape).astype(np.float32)
- scale_val = np.random.random_sample(scale_shape).astype(np.float32)
- offset_val = np.random.random_sample(scale_shape).astype(np.float32)
+ x_val = np.random.random_sample(x_shape).astype(x_dtype)
+ scale_val = np.random.random_sample(scale_shape).astype(scale_dtype)
+ offset_val = np.random.random_sample(scale_shape).astype(scale_dtype)
with self.test_session(use_gpu=use_gpu):
x = constant_op.constant(x_val, name='x')
@@ -139,8 +194,8 @@ class BatchNormalizationTest(test.TestCase):
pop_mean = None
pop_var = None
else:
- pop_mean = np.random.random_sample(scale_shape).astype(np.float32)
- pop_var = np.random.random_sample(scale_shape).astype(np.float32)
+ pop_mean = np.random.random_sample(scale_shape).astype(scale_dtype)
+ pop_var = np.random.random_sample(scale_shape).astype(scale_dtype)
y, _, _ = nn_impl.fused_batch_norm(
x,
scale,
@@ -149,28 +204,49 @@ class BatchNormalizationTest(test.TestCase):
variance=pop_var,
data_format=data_format,
is_training=is_training)
- err_x = gradient_checker.compute_gradient_error(x, x_shape, y, x_shape)
- err_scale = gradient_checker.compute_gradient_error(scale, scale_shape, y,
- x_shape)
- err_offset = gradient_checker.compute_gradient_error(offset, scale_shape,
- y, x_shape)
- err_tolerance = 1e-3
- self.assertLess(err_x, err_tolerance)
- self.assertLess(err_scale, err_tolerance)
- self.assertLess(err_offset, err_tolerance)
+ if x_dtype != np.float16:
+ err_x = gradient_checker.compute_gradient_error(x, x_shape, y, x_shape)
+ err_scale = gradient_checker.compute_gradient_error(
+ scale, scale_shape, y, x_shape)
+ err_offset = gradient_checker.compute_gradient_error(
+ offset, scale_shape, y, x_shape)
+ else:
+ x32 = constant_op.constant(x_val, name='x32', dtype=dtypes.float32)
+ y32, _, _ = nn_impl.fused_batch_norm(
+ x32,
+ scale,
+ offset,
+ mean=pop_mean,
+ variance=pop_var,
+ data_format=data_format,
+ is_training=is_training)
+ err_x = self._compute_gradient_error_float16(x, x32, x_shape, y, y32,
+ x_shape)
+ err_scale = self._compute_gradient_error_float16(
+ scale, scale, scale_shape, y, y32, x_shape)
+ err_offset = self._compute_gradient_error_float16(
+ offset, offset, scale_shape, y, y32, x_shape)
+
+ x_err_tolerance = 2e-3 if x_dtype == np.float16 else 1e-3
+ scale_err_tolerance = 1e-3
+ self.assertLess(err_x, x_err_tolerance)
+ self.assertLess(err_scale, scale_err_tolerance)
+ self.assertLess(err_offset, scale_err_tolerance)
def _test_grad_grad(self,
x_shape,
+ x_dtype,
scale_shape,
+ scale_dtype,
use_gpu=True,
data_format='NHWC',
is_training=True,
err_tolerance=1e-3):
np.random.seed(1)
- x_val = np.random.random_sample(x_shape).astype(np.float32)
- grad_y_val = np.random.random_sample(x_shape).astype(np.float32)
- scale_val = np.random.random_sample(scale_shape).astype(np.float32)
- offset_val = np.random.random_sample(scale_shape).astype(np.float32)
+ x_val = np.random.random_sample(x_shape).astype(x_dtype)
+ grad_y_val = np.random.random_sample(x_shape).astype(x_dtype)
+ scale_val = np.random.random_sample(scale_shape).astype(scale_dtype)
+ offset_val = np.random.random_sample(scale_shape).astype(scale_dtype)
with self.test_session(use_gpu=use_gpu) as sess:
x = constant_op.constant(x_val, name='x')
@@ -181,8 +257,8 @@ class BatchNormalizationTest(test.TestCase):
pop_mean = None
pop_var = None
else:
- pop_mean = np.random.random_sample(scale_shape).astype(np.float32)
- pop_var = np.random.random_sample(scale_shape).astype(np.float32)
+ pop_mean = np.random.random_sample(scale_shape).astype(scale_dtype)
+ pop_var = np.random.random_sample(scale_shape).astype(scale_dtype)
y, _, _ = nn_impl.fused_batch_norm(
x,
scale,
@@ -203,21 +279,51 @@ class BatchNormalizationTest(test.TestCase):
for grad_val, grad_internal_val in zip(grad_vals, grad_internal_vals):
self.assertAllClose(grad_val, grad_internal_val, atol=err_tolerance)
- err_grad_grad_y_1 = gradient_checker.compute_gradient_error(
- grad_y, x_shape, grad_x, x_shape)
- err_grad_grad_y_2 = gradient_checker.compute_gradient_error(
- grad_y, x_shape, grad_scale, scale_shape)
- err_grad_grad_y_3 = gradient_checker.compute_gradient_error(
- grad_y, x_shape, grad_offset, scale_shape)
- # In freeze mode, grad_x is not a function of x.
- if is_training:
- err_grad_x_1 = gradient_checker.compute_gradient_error(
- x, x_shape, grad_x, x_shape)
- err_grad_x_2 = gradient_checker.compute_gradient_error(
- x, x_shape, grad_scale, scale_shape)
-
- err_grad_scale = gradient_checker.compute_gradient_error(
- scale, scale_shape, grad_x, x_shape)
+ if x_dtype != np.float16:
+ err_grad_grad_y_1 = gradient_checker.compute_gradient_error(
+ grad_y, x_shape, grad_x, x_shape)
+ err_grad_grad_y_2 = gradient_checker.compute_gradient_error(
+ grad_y, x_shape, grad_scale, scale_shape)
+ err_grad_grad_y_3 = gradient_checker.compute_gradient_error(
+ grad_y, x_shape, grad_offset, scale_shape)
+ # In freeze mode, grad_x is not a function of x.
+ if is_training:
+ err_grad_x_1 = gradient_checker.compute_gradient_error(
+ x, x_shape, grad_x, x_shape)
+ err_grad_x_2 = gradient_checker.compute_gradient_error(
+ x, x_shape, grad_scale, scale_shape)
+
+ err_grad_scale = gradient_checker.compute_gradient_error(
+ scale, scale_shape, grad_x, x_shape)
+ else:
+ x32 = constant_op.constant(x_val, dtype=dtypes.float32, name='x32')
+ grad_y32 = constant_op.constant(
+ grad_y_val, dtype=dtypes.float32, name='grad_y32')
+ y32, _, _ = nn_impl.fused_batch_norm(
+ x32,
+ scale,
+ offset,
+ mean=pop_mean,
+ variance=pop_var,
+ data_format=data_format,
+ is_training=is_training)
+ grad_x32, grad_scale32, grad_offset32 = gradients_impl.gradients(
+ y32, [x32, scale, offset], grad_y32)
+ err_grad_grad_y_1 = self._compute_gradient_error_float16(
+ grad_y, grad_y32, x_shape, grad_x, grad_x32, x_shape)
+ err_grad_grad_y_2 = self._compute_gradient_error_float16(
+ grad_y, grad_y32, x_shape, grad_scale, grad_scale32, scale_shape)
+ err_grad_grad_y_3 = self._compute_gradient_error_float16(
+ grad_y, grad_y32, x_shape, grad_offset, grad_offset32, scale_shape)
+ # In freeze mode, grad_x is not a function of x.
+ if is_training:
+ err_grad_x_1 = self._compute_gradient_error_float16(
+ x, x32, x_shape, grad_x, grad_x32, x_shape)
+ err_grad_x_2 = self._compute_gradient_error_float16(
+ x, x32, x_shape, grad_scale, grad_scale32, scale_shape)
+
+ err_grad_scale = self._compute_gradient_error_float16(
+ scale, scale, scale_shape, grad_x, grad_x32, x_shape)
self.assertLess(err_grad_grad_y_1, err_tolerance)
self.assertLess(err_grad_grad_y_2, err_tolerance)
@@ -230,102 +336,150 @@ class BatchNormalizationTest(test.TestCase):
def testInference(self):
x_shape = [1, 1, 6, 1]
if test.is_gpu_available(cuda_only=True):
- self._test_inference(x_shape, [1], use_gpu=True, data_format='NHWC')
- self._test_inference(x_shape, [1], use_gpu=True, data_format='NCHW')
- self._test_inference(x_shape, [1], use_gpu=False, data_format='NHWC')
+ for dtype in [np.float16, np.float32]:
+ self._test_inference(
+ x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NHWC')
+ self._test_inference(
+ x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NCHW')
+ self._test_inference(
+ x_shape, np.float32, [1], np.float32, use_gpu=False, data_format='NHWC')
x_shape = [1, 1, 6, 2]
if test.is_gpu_available(cuda_only=True):
- self._test_inference(x_shape, [2], use_gpu=True, data_format='NHWC')
- self._test_inference(x_shape, [2], use_gpu=False, data_format='NHWC')
+ for dtype in [np.float16, np.float32]:
+ self._test_inference(
+ x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NHWC')
+ self._test_inference(
+ x_shape, np.float32, [2], np.float32, use_gpu=False, data_format='NHWC')
x_shape = [1, 2, 1, 6]
if test.is_gpu_available(cuda_only=True):
- self._test_inference(x_shape, [2], use_gpu=True, data_format='NCHW')
+ for dtype in [np.float16, np.float32]:
+ self._test_inference(
+ x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NCHW')
x_shape = [27, 131, 127, 6]
if test.is_gpu_available(cuda_only=True):
- self._test_inference(x_shape, [131], use_gpu=True, data_format='NCHW')
- self._test_inference(x_shape, [6], use_gpu=True, data_format='NHWC')
- self._test_inference(x_shape, [6], use_gpu=False, data_format='NHWC')
+ for dtype in [np.float16, np.float32]:
+ self._test_inference(
+ x_shape, dtype, [131], np.float32, use_gpu=True, data_format='NCHW')
+ self._test_inference(
+ x_shape, dtype, [6], np.float32, use_gpu=True, data_format='NHWC')
+ self._test_inference(
+ x_shape, np.float32, [6], np.float32, use_gpu=False, data_format='NHWC')
def testTraining(self):
x_shape = [1, 1, 6, 1]
if test.is_gpu_available(cuda_only=True):
- self._test_training(x_shape, [1], use_gpu=True, data_format='NHWC')
- self._test_training(x_shape, [1], use_gpu=True, data_format='NCHW')
- self._test_training(x_shape, [1], use_gpu=False, data_format='NHWC')
+ for dtype in [np.float16, np.float32]:
+ self._test_training(
+ x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NHWC')
+ self._test_training(
+ x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NCHW')
+ self._test_training(
+ x_shape, np.float32, [1], np.float32, use_gpu=False, data_format='NHWC')
x_shape = [1, 1, 6, 2]
if test.is_gpu_available(cuda_only=True):
- self._test_training(x_shape, [2], use_gpu=True, data_format='NHWC')
- self._test_training(x_shape, [2], use_gpu=False, data_format='NHWC')
+ for dtype in [np.float16, np.float32]:
+ self._test_training(
+ x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NHWC')
+ self._test_training(
+ x_shape, np.float32, [2], np.float32, use_gpu=False, data_format='NHWC')
x_shape = [1, 2, 1, 6]
if test.is_gpu_available(cuda_only=True):
- self._test_training(x_shape, [2], use_gpu=True, data_format='NCHW')
+ for dtype in [np.float16, np.float32]:
+ self._test_training(
+ x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NCHW')
x_shape = [27, 131, 127, 6]
if test.is_gpu_available(cuda_only=True):
- self._test_training(x_shape, [131], use_gpu=True, data_format='NCHW')
- self._test_training(x_shape, [6], use_gpu=True, data_format='NHWC')
- self._test_training(x_shape, [6], use_gpu=False, data_format='NHWC')
+ for dtype in [np.float16, np.float32]:
+ self._test_training(
+ x_shape, dtype, [131], np.float32, use_gpu=True, data_format='NCHW')
+ self._test_training(
+ x_shape, dtype, [6], np.float32, use_gpu=True, data_format='NHWC')
+ self._test_training(
+ x_shape, np.float32, [6], np.float32, use_gpu=False, data_format='NHWC')
def testBatchNormGrad(self):
for is_training in [True, False]:
x_shape = [1, 1, 6, 1]
if test.is_gpu_available(cuda_only=True):
- self._test_gradient(
- x_shape, [1],
- use_gpu=True,
- data_format='NHWC',
- is_training=is_training)
- self._test_gradient(
- x_shape, [1],
- use_gpu=True,
- data_format='NCHW',
- is_training=is_training)
+ for dtype in [np.float16, np.float32]:
+ self._test_gradient(
+ x_shape,
+ dtype, [1],
+ np.float32,
+ use_gpu=True,
+ data_format='NHWC',
+ is_training=is_training)
+ self._test_gradient(
+ x_shape,
+ dtype, [1],
+ np.float32,
+ use_gpu=True,
+ data_format='NCHW',
+ is_training=is_training)
self._test_gradient(
- x_shape, [1],
+ x_shape,
+ np.float32, [1],
+ np.float32,
use_gpu=False,
data_format='NHWC',
is_training=is_training)
x_shape = [1, 1, 6, 2]
if test.is_gpu_available(cuda_only=True):
- self._test_gradient(
- x_shape, [2],
- use_gpu=True,
- data_format='NHWC',
- is_training=is_training)
+ for dtype in [np.float16, np.float32]:
+ self._test_gradient(
+ x_shape,
+ dtype, [2],
+ np.float32,
+ use_gpu=True,
+ data_format='NHWC',
+ is_training=is_training)
self._test_gradient(
- x_shape, [2],
+ x_shape,
+ np.float32, [2],
+ np.float32,
use_gpu=False,
data_format='NHWC',
is_training=is_training)
x_shape = [1, 2, 1, 6]
if test.is_gpu_available(cuda_only=True):
- self._test_gradient(
- x_shape, [2],
- use_gpu=True,
- data_format='NCHW',
- is_training=is_training)
-
- x_shape = [7, 9, 13, 6]
+ for dtype in [np.float16, np.float32]:
+ self._test_gradient(
+ x_shape,
+ dtype, [2],
+ np.float32,
+ use_gpu=True,
+ data_format='NCHW',
+ is_training=is_training)
+
+ x_shape = [5, 7, 11, 4]
if test.is_gpu_available(cuda_only=True):
- self._test_gradient(
- x_shape, [9],
- use_gpu=True,
- data_format='NCHW',
- is_training=is_training)
- self._test_gradient(
- x_shape, [6],
- use_gpu=True,
- data_format='NHWC',
- is_training=is_training)
+ for dtype in [np.float16, np.float32]:
+ self._test_gradient(
+ x_shape,
+ dtype, [7],
+ np.float32,
+ use_gpu=True,
+ data_format='NCHW',
+ is_training=is_training)
+ self._test_gradient(
+ x_shape,
+ dtype, [4],
+ np.float32,
+ use_gpu=True,
+ data_format='NHWC',
+ is_training=is_training)
self._test_gradient(
- x_shape, [6],
+ x_shape,
+ np.float32, [4],
+ np.float32,
use_gpu=False,
data_format='NHWC',
is_training=is_training)
@@ -333,34 +487,48 @@ class BatchNormalizationTest(test.TestCase):
def _testBatchNormGradGrad(self, config):
shape = config['shape']
err_tolerance = config['err_tolerance']
+ dtype = config['dtype']
for is_training in [True, False]:
if test.is_gpu_available(cuda_only=True):
self._test_grad_grad(
- shape, [shape[3]],
+ shape,
+ dtype, [shape[3]],
+ np.float32,
use_gpu=True,
data_format='NHWC',
is_training=is_training,
err_tolerance=err_tolerance)
self._test_grad_grad(
- shape, [shape[1]],
+ shape,
+ dtype, [shape[1]],
+ np.float32,
use_gpu=True,
data_format='NCHW',
is_training=is_training,
err_tolerance=err_tolerance)
- self._test_grad_grad(
- shape, [shape[3]],
- use_gpu=False,
- data_format='NHWC',
- is_training=is_training,
- err_tolerance=err_tolerance)
+ if dtype != np.float16:
+ self._test_grad_grad(
+ shape,
+ np.float32, [shape[3]],
+ np.float32,
+ use_gpu=False,
+ data_format='NHWC',
+ is_training=is_training,
+ err_tolerance=err_tolerance)
def testBatchNormGradGrad(self):
configs = [{
'shape': [2, 3, 4, 5],
- 'err_tolerance': 1e-2
+ 'err_tolerance': 1e-2,
+ 'dtype': np.float32,
+ }, {
+ 'shape': [2, 3, 2, 2],
+ 'err_tolerance': 1e-3,
+ 'dtype': np.float32,
}, {
'shape': [2, 3, 2, 2],
- 'err_tolerance': 1e-3
+ 'err_tolerance': 2e-3,
+ 'dtype': np.float16,
}]
for config in configs:
self._testBatchNormGradGrad(config)