# Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for fused_batch_norm related functionality in tensorflow.ops.nn.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops from tensorflow.python.ops import gradient_checker from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_grad from tensorflow.python.ops import nn_impl from tensorflow.python.platform import test class BatchNormalizationTest(test.TestCase): def _batch_norm(self, x, mean, var, offset, scale, epsilon): # We compute the batch norm manually in this function because # nn_impl.batch_normalization does not support float16 yet. # TODO(reedwm): Add float16 support to nn_impl.batch_normalization. inv = math_ops.rsqrt(var + epsilon) * scale y = math_ops.cast(x, scale.dtype) * inv + (offset - mean * inv) return math_ops.cast(y, x.dtype) def _inference_ref(self, x, scale, offset, mean, var, epsilon, data_format): if data_format not in ['NHWC', 'NCHW']: raise ValueError('data_format must be NCHW or NHWC, ' 'got %s.' % data_format) if data_format == 'NCHW': x = array_ops.transpose(x, [0, 2, 3, 1]) y = self._batch_norm(x, mean, var, offset, scale, epsilon) if data_format == 'NCHW': y = array_ops.transpose(y, [0, 3, 1, 2]) return y.eval() def _test_inference(self, x_shape, x_dtype, scale_shape, scale_dtype, use_gpu=True, data_format='NHWC'): np.random.seed(1) x_val = np.random.random_sample(x_shape).astype(x_dtype) scale_val = np.random.random_sample(scale_shape).astype(scale_dtype) offset_val = np.random.random_sample(scale_shape).astype(scale_dtype) mean_val = np.random.random_sample(scale_shape).astype(scale_dtype) var_val = np.random.random_sample(scale_shape).astype(scale_dtype) with self.test_session(use_gpu=use_gpu) as sess: x = constant_op.constant(x_val, name='x') scale = constant_op.constant(scale_val, name='scale') offset = constant_op.constant(offset_val, name='offset') mean = constant_op.constant(mean_val, name='mean') var = constant_op.constant(var_val, name='variance') epsilon = 0.001 y, _, _ = nn_impl.fused_batch_norm( x, scale, offset, mean=mean, variance=var, epsilon=epsilon, data_format=data_format, is_training=False) y_val = sess.run(y) y_ref = self._inference_ref(x, scale, offset, mean, var, epsilon, data_format) # An atol value of 1e-3 is too small for float16's, because some adjacent # float16 values that y_val can take are greater than 1e-3 apart, e.g. # 2.16602 and 2.16797. atol = 2e-3 if x_dtype == np.float16 else 1e-3 self.assertAllClose(y_ref, y_val, atol=atol) def _training_ref(self, x, scale, offset, epsilon, data_format): if data_format not in ['NHWC', 'NCHW']: raise ValueError('data_format must be NCHW or NHWC, ' 'got %s.' % data_format) if data_format == 'NCHW': x = array_ops.transpose(x, [0, 2, 3, 1]) mean, var = nn_impl.moments( math_ops.cast(x, scale.dtype), [0, 1, 2], keep_dims=False) y = self._batch_norm(x, mean, var, offset, scale, epsilon) if data_format == 'NCHW': y = array_ops.transpose(y, [0, 3, 1, 2]) return y.eval(), mean.eval(), var.eval() def _test_training(self, x_shape, x_dtype, scale_shape, scale_dtype, use_gpu=True, data_format='NHWC'): np.random.seed(1) x_val = np.random.random_sample(x_shape).astype(x_dtype) scale_val = np.random.random_sample(scale_shape).astype(scale_dtype) offset_val = np.random.random_sample(scale_shape).astype(scale_dtype) with self.test_session(use_gpu=use_gpu) as sess: x = constant_op.constant(x_val, name='x') scale = constant_op.constant(scale_val, name='scale') offset = constant_op.constant(offset_val, name='offset') epsilon = 0.001 y, mean, var = nn_impl.fused_batch_norm( x, scale, offset, epsilon=epsilon, data_format=data_format, is_training=True) y_val, mean_val, var_val = sess.run([y, mean, var]) y_ref, mean_ref, var_ref = self._training_ref(x, scale, offset, epsilon, data_format) y_atol = 2e-3 if x_dtype == np.float16 else 1e-3 self.assertAllClose(y_ref, y_val, atol=y_atol) self.assertAllClose(mean_ref, mean_val, atol=1e-3) # This is for Bessel's correction. tf.nn.moments uses n, instead of n-1, as # the denominator in the formula to calculate variance, while # tf.nn.fused_batch_norm has Bessel's correction built in. sample_size = x_val.size / scale_val.size var_ref = var_ref * sample_size / (max(sample_size - 1.0, 1.0)) self.assertAllClose(var_ref, var_val, atol=1e-3) def _compute_gradient_error_float16(self, x, x32, x_shape, y, y32, y_shape): """Computes the gradient error for float16 inputs and/or outputs. This returns the same value as gradient_checker.compute_gradient_error. The difference is that gradient_checker.compute_gradient_error does not numerically compute the gradients in a numerically stable way for float16 tensors. To fix this, this function requires float32 versions of x and y to numerically compute the gradients, to compare with the float16 symbolically computed gradients. Args: x: The input tensor. x32: A float32 version of x. x_shape: The shape of x. y: The output tensor. y32: A float32 version of y. Must be calculated based on x32, not x. y_shape: The shape of y. Returns: The maximum error in between the two Jacobians, as in gradient_checker.compute_gradient_error. """ x_init_val = np.random.random_sample(x_shape).astype(np.float16) x32_init_val = x_init_val.astype(np.float32) # TODO(reedwm): Do not perform the unnecessary computations in # compute_gradient, since they double the computation time of this function. theoretical_grad, _ = gradient_checker.compute_gradient( x, x_shape, y, y_shape, delta=1e-3, x_init_value=x_init_val) _, numerical_grad = gradient_checker.compute_gradient( x32, x_shape, y32, y_shape, delta=1e-3, x_init_value=x32_init_val) # If grad is empty, no error. if theoretical_grad.size == 0 and numerical_grad.size == 0: return 0 return np.fabs(theoretical_grad - numerical_grad).max() def _test_gradient(self, x_shape, x_dtype, scale_shape, scale_dtype, use_gpu=True, data_format='NHWC', is_training=True): np.random.seed(1) x_val = np.random.random_sample(x_shape).astype(x_dtype) scale_val = np.random.random_sample(scale_shape).astype(scale_dtype) offset_val = np.random.random_sample(scale_shape).astype(scale_dtype) with self.test_session(use_gpu=use_gpu): x = constant_op.constant(x_val, name='x') scale = constant_op.constant(scale_val, name='scale') offset = constant_op.constant(offset_val, name='offset') if is_training: pop_mean = None pop_var = None else: pop_mean = np.random.random_sample(scale_shape).astype(scale_dtype) pop_var = np.random.random_sample(scale_shape).astype(scale_dtype) y, _, _ = nn_impl.fused_batch_norm( x, scale, offset, mean=pop_mean, variance=pop_var, data_format=data_format, is_training=is_training) if x_dtype != np.float16: err_x = gradient_checker.compute_gradient_error(x, x_shape, y, x_shape) err_scale = gradient_checker.compute_gradient_error( scale, scale_shape, y, x_shape) err_offset = gradient_checker.compute_gradient_error( offset, scale_shape, y, x_shape) else: x32 = constant_op.constant(x_val, name='x32', dtype=dtypes.float32) y32, _, _ = nn_impl.fused_batch_norm( x32, scale, offset, mean=pop_mean, variance=pop_var, data_format=data_format, is_training=is_training) err_x = self._compute_gradient_error_float16(x, x32, x_shape, y, y32, x_shape) err_scale = self._compute_gradient_error_float16( scale, scale, scale_shape, y, y32, x_shape) err_offset = self._compute_gradient_error_float16( offset, offset, scale_shape, y, y32, x_shape) x_err_tolerance = 2e-3 if x_dtype == np.float16 else 1e-3 scale_err_tolerance = 1e-3 self.assertLess(err_x, x_err_tolerance) self.assertLess(err_scale, scale_err_tolerance) self.assertLess(err_offset, scale_err_tolerance) def _test_grad_grad(self, x_shape, x_dtype, scale_shape, scale_dtype, use_gpu=True, data_format='NHWC', is_training=True, err_tolerance=1e-3): np.random.seed(1) x_val = np.random.random_sample(x_shape).astype(x_dtype) grad_y_val = np.random.random_sample(x_shape).astype(x_dtype) scale_val = np.random.random_sample(scale_shape).astype(scale_dtype) offset_val = np.random.random_sample(scale_shape).astype(scale_dtype) with self.test_session(use_gpu=use_gpu) as sess: x = constant_op.constant(x_val, name='x') grad_y = constant_op.constant(grad_y_val, name='grad_y') scale = constant_op.constant(scale_val, name='scale') offset = constant_op.constant(offset_val, name='offset') if is_training: pop_mean = None pop_var = None else: pop_mean = np.random.random_sample(scale_shape).astype(scale_dtype) pop_var = np.random.random_sample(scale_shape).astype(scale_dtype) y, _, _ = nn_impl.fused_batch_norm( x, scale, offset, mean=pop_mean, variance=pop_var, data_format=data_format, is_training=is_training) grad_x, grad_scale, grad_offset = gradients_impl.gradients( y, [x, scale, offset], grad_y) if is_training: epsilon = y.op.get_attr('epsilon') data_format = y.op.get_attr('data_format') grad_vals = sess.run([grad_x, grad_scale, grad_offset]) grad_internal = nn_grad._BatchNormGrad(grad_y, x, scale, pop_mean, pop_var, epsilon, data_format) grad_internal_vals = sess.run(list(grad_internal)) for grad_val, grad_internal_val in zip(grad_vals, grad_internal_vals): self.assertAllClose(grad_val, grad_internal_val, atol=err_tolerance) if x_dtype != np.float16: err_grad_grad_y_1 = gradient_checker.compute_gradient_error( grad_y, x_shape, grad_x, x_shape) err_grad_grad_y_2 = gradient_checker.compute_gradient_error( grad_y, x_shape, grad_scale, scale_shape) err_grad_grad_y_3 = gradient_checker.compute_gradient_error( grad_y, x_shape, grad_offset, scale_shape) # In freeze mode, grad_x is not a function of x. if is_training: err_grad_x_1 = gradient_checker.compute_gradient_error( x, x_shape, grad_x, x_shape) err_grad_x_2 = gradient_checker.compute_gradient_error( x, x_shape, grad_scale, scale_shape) err_grad_scale = gradient_checker.compute_gradient_error( scale, scale_shape, grad_x, x_shape) else: x32 = constant_op.constant(x_val, dtype=dtypes.float32, name='x32') grad_y32 = constant_op.constant( grad_y_val, dtype=dtypes.float32, name='grad_y32') y32, _, _ = nn_impl.fused_batch_norm( x32, scale, offset, mean=pop_mean, variance=pop_var, data_format=data_format, is_training=is_training) grad_x32, grad_scale32, grad_offset32 = gradients_impl.gradients( y32, [x32, scale, offset], grad_y32) err_grad_grad_y_1 = self._compute_gradient_error_float16( grad_y, grad_y32, x_shape, grad_x, grad_x32, x_shape) err_grad_grad_y_2 = self._compute_gradient_error_float16( grad_y, grad_y32, x_shape, grad_scale, grad_scale32, scale_shape) err_grad_grad_y_3 = self._compute_gradient_error_float16( grad_y, grad_y32, x_shape, grad_offset, grad_offset32, scale_shape) # In freeze mode, grad_x is not a function of x. if is_training: err_grad_x_1 = self._compute_gradient_error_float16( x, x32, x_shape, grad_x, grad_x32, x_shape) err_grad_x_2 = self._compute_gradient_error_float16( x, x32, x_shape, grad_scale, grad_scale32, scale_shape) err_grad_scale = self._compute_gradient_error_float16( scale, scale, scale_shape, grad_x, grad_x32, x_shape) self.assertLess(err_grad_grad_y_1, err_tolerance) self.assertLess(err_grad_grad_y_2, err_tolerance) self.assertLess(err_grad_grad_y_3, err_tolerance) if is_training: self.assertLess(err_grad_x_1, err_tolerance) self.assertLess(err_grad_x_2, err_tolerance) self.assertLess(err_grad_scale, err_tolerance) def testInferenceShape1(self): x_shape = [1, 1, 6, 1] for dtype in [np.float16, np.float32]: if test.is_gpu_available(cuda_only=True): self._test_inference( x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NHWC') self._test_inference( x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NCHW') self._test_inference( x_shape, dtype, [1], np.float32, use_gpu=False, data_format='NHWC') def testInferenceShape2(self): x_shape = [1, 1, 6, 2] if test.is_gpu_available(cuda_only=True): for dtype in [np.float16, np.float32]: self._test_inference( x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NHWC') self._test_inference( x_shape, dtype, [2], np.float32, use_gpu=False, data_format='NHWC') def testInferenceShape3(self): x_shape = [1, 2, 1, 6] if test.is_gpu_available(cuda_only=True): for dtype in [np.float16, np.float32]: self._test_inference( x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NCHW') def testInferenceShape4(self): x_shape = [27, 131, 127, 6] for dtype in [np.float16, np.float32]: if test.is_gpu_available(cuda_only=True): self._test_inference( x_shape, dtype, [131], np.float32, use_gpu=True, data_format='NCHW') self._test_inference( x_shape, dtype, [6], np.float32, use_gpu=True, data_format='NHWC') self._test_inference( x_shape, dtype, [6], np.float32, use_gpu=False, data_format='NHWC') def testInferenceShape5(self): x_shape = [0, 131, 127, 6] for dtype in [np.float16, np.float32]: if test.is_gpu_available(cuda_only=True): self._test_inference( x_shape, dtype, [131], np.float32, use_gpu=True, data_format='NCHW') self._test_inference( x_shape, dtype, [6], np.float32, use_gpu=True, data_format='NHWC') self._test_inference( x_shape, dtype, [6], np.float32, use_gpu=False, data_format='NHWC') def testTrainingShape1(self): x_shape = [1, 1, 6, 1] for dtype in [np.float16, np.float32]: if test.is_gpu_available(cuda_only=True): self._test_training( x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NHWC') self._test_training( x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NCHW') self._test_training( x_shape, dtype, [1], np.float32, use_gpu=False, data_format='NHWC') def testTrainingShape2(self): x_shape = [1, 1, 6, 2] for dtype in [np.float16, np.float32]: if test.is_gpu_available(cuda_only=True): self._test_training( x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NHWC') self._test_training( x_shape, dtype, [2], np.float32, use_gpu=False, data_format='NHWC') def testTrainingShape3(self): x_shape = [1, 2, 1, 6] if test.is_gpu_available(cuda_only=True): for dtype in [np.float16, np.float32]: self._test_training( x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NCHW') def testTrainingShape4(self): x_shape = [27, 131, 127, 6] for dtype in [np.float16, np.float32]: if test.is_gpu_available(cuda_only=True): self._test_training( x_shape, dtype, [131], np.float32, use_gpu=True, data_format='NCHW') self._test_training( x_shape, dtype, [6], np.float32, use_gpu=True, data_format='NHWC') self._test_training( x_shape, dtype, [6], np.float32, use_gpu=False, data_format='NHWC') def testTrainingShape5(self): x_shape = [0, 131, 127, 6] for dtype in [np.float16, np.float32]: if test.is_gpu_available(cuda_only=True): self._test_training( x_shape, dtype, [131], np.float32, use_gpu=True, data_format='NCHW') self._test_training( x_shape, dtype, [6], np.float32, use_gpu=True, data_format='NHWC') self._test_training( x_shape, dtype, [6], np.float32, use_gpu=False, data_format='NHWC') def testBatchNormGradShape1(self): for is_training in [True, False]: x_shape = [1, 1, 6, 1] for dtype in [np.float16, np.float32]: if test.is_gpu_available(cuda_only=True): self._test_gradient( x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NHWC', is_training=is_training) self._test_gradient( x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NCHW', is_training=is_training) self._test_gradient( x_shape, dtype, [1], np.float32, use_gpu=False, data_format='NHWC', is_training=is_training) def testBatchNormGradShape2(self): for is_training in [True, False]: x_shape = [1, 1, 6, 2] for dtype in [np.float16, np.float32]: if test.is_gpu_available(cuda_only=True): self._test_gradient( x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NHWC', is_training=is_training) self._test_gradient( x_shape, dtype, [2], np.float32, use_gpu=False, data_format='NHWC', is_training=is_training) def testBatchNormGradShape3(self): for is_training in [True, False]: x_shape = [1, 2, 1, 6] if test.is_gpu_available(cuda_only=True): for dtype in [np.float16, np.float32]: self._test_gradient( x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NCHW', is_training=is_training) def testBatchNormGradShape4(self): for is_training in [True, False]: x_shape = [5, 7, 11, 4] for dtype in [np.float16, np.float32]: if test.is_gpu_available(cuda_only=True): self._test_gradient( x_shape, dtype, [7], np.float32, use_gpu=True, data_format='NCHW', is_training=is_training) self._test_gradient( x_shape, dtype, [4], np.float32, use_gpu=True, data_format='NHWC', is_training=is_training) self._test_gradient( x_shape, dtype, [4], np.float32, use_gpu=False, data_format='NHWC', is_training=is_training) def testBatchNormGradShape5(self): for is_training in [True, False]: x_shape = [0, 7, 11, 4] for dtype in [np.float16, np.float32]: if test.is_gpu_available(cuda_only=True): self._test_gradient( x_shape, dtype, [7], np.float32, use_gpu=True, data_format='NCHW', is_training=is_training) self._test_gradient( x_shape, dtype, [4], np.float32, use_gpu=True, data_format='NHWC', is_training=is_training) self._test_gradient( x_shape, dtype, [4], np.float32, use_gpu=False, data_format='NHWC', is_training=is_training) def _testBatchNormGradGrad(self, config): shape = config['shape'] err_tolerance = config['err_tolerance'] dtype = config['dtype'] for is_training in [True, False]: if test.is_gpu_available(cuda_only=True): self._test_grad_grad( shape, dtype, [shape[3]], np.float32, use_gpu=True, data_format='NHWC', is_training=is_training, err_tolerance=err_tolerance) self._test_grad_grad( shape, dtype, [shape[1]], np.float32, use_gpu=True, data_format='NCHW', is_training=is_training, err_tolerance=err_tolerance) self._test_grad_grad( shape, dtype, [shape[3]], np.float32, use_gpu=False, data_format='NHWC', is_training=is_training, err_tolerance=err_tolerance) def testBatchNormGradGradConfig1(self): config = { 'shape': [2, 3, 4, 5], 'err_tolerance': 1e-2, 'dtype': np.float32, } self._testBatchNormGradGrad(config) def testBatchNormGradGradConfig2(self): config = { 'shape': [2, 3, 2, 2], 'err_tolerance': 1e-3, 'dtype': np.float32, } self._testBatchNormGradGrad(config) def testBatchNormGradGradConfig3(self): config = { 'shape': [2, 3, 4, 5], 'err_tolerance': 1e-2, 'dtype': np.float16, } self._testBatchNormGradGrad(config) def testBatchNormGradGradConfig4(self): config = { 'shape': [2, 3, 2, 2], 'err_tolerance': 2e-3, 'dtype': np.float16, } self._testBatchNormGradGrad(config) if __name__ == '__main__': test.main()