aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/python/ops/nn_test.py
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/python/ops/nn_test.py')
-rw-r--r--tensorflow/python/ops/nn_test.py882
1 files changed, 882 insertions, 0 deletions
diff --git a/tensorflow/python/ops/nn_test.py b/tensorflow/python/ops/nn_test.py
new file mode 100644
index 0000000000..11ce56e359
--- /dev/null
+++ b/tensorflow/python/ops/nn_test.py
@@ -0,0 +1,882 @@
+"""Tests for tensorflow.ops.nn."""
+import math
+
+import tensorflow.python.platform
+
+import numpy as np
+
+from tensorflow.python.framework import test_util
+from tensorflow.python.framework import types
+from tensorflow.python.kernel_tests import gradient_checker as gc
+from tensorflow.python.ops import constant_op
+from tensorflow.python.ops import gen_nn_ops
+from tensorflow.python.ops import gradients
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn
+from tensorflow.python.ops import nn_grad
+from tensorflow.python.platform import googletest
+
+exp = math.exp
+log = math.log
+
+
+class SigmoidCrossEntropyWithLogitsTest(test_util.TensorFlowTestCase):
+
+ def _SigmoidCrossEntropyWithLogits(self, logits, targets):
+ assert len(logits) == len(targets)
+ pred = [1 / (1 + exp(-x)) for x in logits]
+ eps = 0.0001
+ pred = [min(max(p, eps), 1 - eps) for p in pred]
+ return [-z * log(y) - (1 - z) * log(1 - y) for y, z in zip(pred, targets)]
+
+ def _Inputs(self, x=None, y=None, dtype=types.float64, sizes=None):
+ x = [-100, -2, -2, 0, 2, 2, 2, 100] if x is None else x
+ y = [0, 0, 1, 0, 0, 1, 0.5, 1] if y is None else y
+ assert len(x) == len(y)
+ sizes = sizes if sizes else [len(x)]
+ logits = constant_op.constant(x, shape=sizes, dtype=dtype, name="logits")
+ targets = constant_op.constant(y, shape=sizes, dtype=dtype, name="targets")
+ losses = np.array(self._SigmoidCrossEntropyWithLogits(x, y)).reshape(*sizes)
+ return logits, targets, losses
+
+ def testConstructionNamed(self):
+ with self.test_session():
+ logits, targets, _ = self._Inputs()
+ loss = nn.sigmoid_cross_entropy_with_logits(logits, targets,
+ name="mylogistic")
+ self.assertEqual("mylogistic", loss.op.name)
+
+ def testLogisticOutput(self):
+ for use_gpu in [True, False]:
+ with self.test_session(use_gpu=use_gpu):
+ logits, targets, losses = self._Inputs(dtype=types.float32)
+ loss = nn.sigmoid_cross_entropy_with_logits(logits, targets)
+ np_loss = np.array(losses).astype(np.float32)
+ tf_loss = loss.eval()
+ self.assertAllClose(np_loss, tf_loss, atol=0.001)
+
+ def testLogisticOutputMultiDim(self):
+ for use_gpu in [True, False]:
+ with self.test_session(use_gpu=use_gpu):
+ logits, targets, losses = self._Inputs(dtype=types.float32,
+ sizes=[2, 2, 2])
+ loss = nn.sigmoid_cross_entropy_with_logits(logits, targets)
+ np_loss = np.array(losses).astype(np.float32)
+ tf_loss = loss.eval()
+ self.assertAllClose(np_loss, tf_loss, atol=0.001)
+
+ def testGradient(self):
+ sizes = [4, 2]
+ with self.test_session():
+ logits, targets, _ = self._Inputs(sizes=sizes)
+ loss = nn.sigmoid_cross_entropy_with_logits(logits, targets)
+ err = gc.ComputeGradientError(logits, sizes, loss, sizes)
+ print "logistic loss gradient err = ", err
+ self.assertLess(err, 1e-7)
+
+
+class ZeroFractionTest(test_util.TensorFlowTestCase):
+
+ def _ZeroFraction(self, x):
+ assert x.shape
+ total_elements = float(np.prod(x.shape))
+ nonzeros = float(np.count_nonzero(x.flatten()))
+ return 1.0 - (nonzeros / total_elements)
+
+ def testZeroFraction(self):
+ x_shape = [5, 17]
+ x_np = np.random.randint(0, 2, size=x_shape).astype(np.float32)
+ y_np = self._ZeroFraction(x_np)
+ with self.test_session():
+ x_tf = constant_op.constant(x_np)
+ x_tf.set_shape(x_shape)
+ y_tf = nn.zero_fraction(x_tf)
+ y_tf_np = y_tf.eval()
+ eps = 1e-8
+ self.assertAllClose(y_tf_np, y_np, eps)
+
+ def testZeroFractionEmpty(self):
+ with self.test_session():
+ x = np.zeros(0)
+ y = nn.zero_fraction(x).eval()
+ self.assertTrue(np.isnan(y))
+
+
+class SoftmaxTest(test_util.TensorFlowTestCase):
+
+ def _softmax(self, x):
+ assert len(x.shape) == 2
+ m = x.max(1)[:, np.newaxis]
+ u = np.exp(x - m)
+ z = u.sum(1)[:, np.newaxis]
+ return u / z
+
+ def testSoftmax(self):
+ x_shape = [5, 10]
+ x_np = np.random.randn(*x_shape).astype(np.float32)
+ y_np = self._softmax(x_np)
+ with self.test_session():
+ x_tf = constant_op.constant(x_np)
+ y_tf = nn.softmax(x_tf)
+ y_tf_np = y_tf.eval()
+ eps = 1e-3
+ self.assertAllClose(y_tf_np, y_np, eps)
+
+ def testGradient(self):
+ x_shape = [5, 10]
+ x_np = np.random.randn(*x_shape).astype(np.float64)
+ with self.test_session():
+ x_tf = constant_op.constant(x_np)
+ y_tf = nn.softmax(x_tf)
+ err = gc.ComputeGradientError(x_tf, x_shape, y_tf, x_shape)
+ eps = 1e-8
+ self.assertLess(err, eps)
+
+
+class DeConv2DTest(test_util.TensorFlowTestCase):
+
+ def testDeConv2DSingleStride(self):
+ with self.test_session():
+ strides = [1, 1, 1, 1]
+
+ # Input, output: [batch, height, width, depth]
+ x_shape = [2, 6, 4, 3]
+ y_shape = [2, 6, 4, 2]
+
+ # Filter: [kernel_height, kernel_width, output_depth, input_depth]
+ f_shape = [3, 3, 2, 3]
+
+ x = constant_op.constant(1.0, shape=x_shape, name="x",
+ dtype=types.float32)
+ f = constant_op.constant(1.0, shape=f_shape, name="filter",
+ dtype=types.float32)
+ output = nn.deconv2d(x, f, y_shape, strides=strides, padding="SAME")
+ value = output.eval()
+
+ # We count the number of cells being added at the locations in the output.
+ # At the center, #cells=kernel_height * kernel_width
+ # At the corners, #cells=ceil(kernel_height/2) * ceil(kernel_width/2)
+ # At the borders, #cells=ceil(kernel_height/2)*kernel_width or
+ # kernel_height * ceil(kernel_width/2)
+
+ for n in xrange(x_shape[0]):
+ for k in xrange(f_shape[2]):
+ for w in xrange(y_shape[2]):
+ for h in xrange(y_shape[1]):
+ target = 4 * 3.0
+ h_in = h > 0 and h < y_shape[1] - 1
+ w_in = w > 0 and w < y_shape[2] - 1
+ if h_in and w_in:
+ target += 5 * 3.0
+ elif h_in or w_in:
+ target += 2 * 3.0
+ self.assertAllClose(target, value[n, h, w, k])
+
+ def testDeConv2DSame(self):
+ with self.test_session():
+ strides = [1, 2, 2, 1]
+
+ # Input, output: [batch, height, width, depth]
+ x_shape = [2, 6, 4, 3]
+ y_shape = [2, 12, 8, 2]
+
+ # Filter: [kernel_height, kernel_width, output_depth, input_depth]
+ f_shape = [3, 3, 2, 3]
+
+ x = constant_op.constant(1.0, shape=x_shape, name="x",
+ dtype=types.float32)
+ f = constant_op.constant(1.0, shape=f_shape, name="filter",
+ dtype=types.float32)
+ output = nn.deconv2d(x, f, y_shape, strides=strides, padding="SAME")
+ value = output.eval()
+
+ for n in xrange(x_shape[0]):
+ for k in xrange(f_shape[2]):
+ for w in xrange(y_shape[2]):
+ for h in xrange(y_shape[1]):
+ target = 3.0
+ # We add a case for locations divisible by the stride.
+ h_in = h % strides[1] == 0 and h > 0 and h < y_shape[1] - 1
+ w_in = w % strides[2] == 0 and w > 0 and w < y_shape[2] - 1
+ if h_in and w_in:
+ target += 9.0
+ elif h_in or w_in:
+ target += 3.0
+ self.assertAllClose(target, value[n, h, w, k])
+
+ def testDeConv2DValid(self):
+ with self.test_session():
+ strides = [1, 2, 2, 1]
+
+ # Input, output: [batch, height, width, depth]
+ x_shape = [2, 6, 4, 3]
+ y_shape = [2, 13, 9, 2]
+
+ # Filter: [kernel_height, kernel_width, output_depth, input_depth]
+ f_shape = [3, 3, 2, 3]
+
+ x = constant_op.constant(1.0, shape=x_shape, name="x",
+ dtype=types.float32)
+ f = constant_op.constant(1.0, shape=f_shape, name="filter",
+ dtype=types.float32)
+ output = nn.deconv2d(x, f, y_shape, strides=strides, padding="VALID")
+ value = output.eval()
+
+ cache_values = np.zeros(y_shape, dtype=np.float32)
+
+ # The amount of padding added
+ pad = 1
+
+ for n in xrange(x_shape[0]):
+ for k in xrange(f_shape[2]):
+ for w in xrange(pad, y_shape[2] - pad):
+ for h in xrange(pad, y_shape[1] - pad):
+ target = 3.0
+ # We add a case for locations divisible by the stride.
+ h_in = h % strides[
+ 1] == 0 and h > pad and h < y_shape[1] - 1 - pad
+ w_in = w % strides[
+ 2] == 0 and w > pad and w < y_shape[2] - 1 - pad
+ if h_in and w_in:
+ target += 9.0
+ elif h_in or w_in:
+ target += 3.0
+ cache_values[n, h, w, k] = target
+
+ # copy values in the border
+ cache_values[n, :, 0, k] = cache_values[n, :, 1, k]
+ cache_values[n, :, -1, k] = cache_values[n, :, -2, k]
+ cache_values[n, 0, :, k] = cache_values[n, 1, :, k]
+ cache_values[n, -1, :, k] = cache_values[n, -2, :, k]
+
+ self.assertAllClose(cache_values, value)
+
+ def testGradient(self):
+ x_shape = [2, 6, 4, 3]
+ f_shape = [3, 3, 2, 3]
+ y_shape = [2, 12, 8, 2]
+ strides = [1, 2, 2, 1]
+ np.random.seed(1) # Make it reproducible.
+ x_val = np.random.random_sample(x_shape).astype(np.float64)
+ f_val = np.random.random_sample(f_shape).astype(np.float64)
+ with self.test_session():
+ x = constant_op.constant(x_val, name="x", dtype=types.float32)
+ f = constant_op.constant(f_val, name="f", dtype=types.float32)
+ output = nn.deconv2d(x, f, y_shape, strides=strides, padding="SAME")
+ err = gc.ComputeGradientError([x, f], [x_shape, f_shape], output, y_shape)
+ print "DeConv gradient err = %g " % err
+ err_tolerance = 0.0005
+ self.assertLess(err, err_tolerance)
+
+
+class L2LossTest(test_util.TensorFlowTestCase):
+
+ def testL2Loss(self):
+ with self.test_session():
+ x = constant_op.constant([1.0, 0.0, 3.0, 2.0], shape=[2, 2], name="x")
+ l2loss = nn.l2_loss(x)
+ value = l2loss.eval()
+ self.assertAllClose(7.0, value)
+
+ def testGradient(self):
+ x_shape = [20, 7, 3]
+ np.random.seed(1) # Make it reproducible.
+ x_val = np.random.random_sample(x_shape).astype(np.float64)
+ with self.test_session():
+ x = constant_op.constant(x_val, name="x")
+ output = nn.l2_loss(x)
+ err = gc.ComputeGradientError(x, x_shape, output, [1])
+ print "L2Loss gradient err = %g " % err
+ err_tolerance = 1e-11
+ self.assertLess(err, err_tolerance)
+
+
+class L2NormalizeTest(test_util.TensorFlowTestCase):
+
+ def _l2Normalize(self, x, dim):
+ norm = np.apply_along_axis(np.linalg.norm, dim, x)
+ return x / np.expand_dims(norm, dim)
+
+ def testL2Normalize(self):
+ x_shape = [20, 7, 3]
+ np.random.seed(1)
+ x_np = np.random.random_sample(x_shape).astype(np.float32)
+ for dim in range(len(x_shape)):
+ y_np = self._l2Normalize(x_np, dim)
+ with self.test_session():
+ x_tf = constant_op.constant(x_np, name="x")
+ y_tf = nn.l2_normalize(x_tf, dim)
+ self.assertAllClose(y_np, y_tf.eval())
+
+ def testL2NormalizeGradient(self):
+ x_shape = [20, 7, 3]
+ np.random.seed(1)
+ x_np = np.random.random_sample(x_shape).astype(np.float64)
+ for dim in range(len(x_shape)):
+ with self.test_session():
+ x_tf = constant_op.constant(x_np, name="x")
+ y_tf = nn.l2_normalize(x_tf, dim)
+ err = gc.ComputeGradientError(x_tf, x_shape, y_tf, x_shape)
+ print "L2Normalize gradient err = %g " % err
+ self.assertLess(err, 1e-4)
+
+
+class DropoutTest(test_util.TensorFlowTestCase):
+
+ def testDropout(self):
+ # Runs dropout with 0-1 tensor 10 times, sum the number of ones and validate
+ # that it is producing approximately the right number of ones over a large
+ # number of samples, based on the keep probability.
+ x_dim = 40
+ y_dim = 30
+ num_iter = 10
+ for keep_prob in [0.1, 0.5, 0.8]:
+ with self.test_session():
+ t = constant_op.constant(1.0,
+ shape=[x_dim, y_dim],
+ dtype=types.float32)
+ dropout = nn.dropout(t, keep_prob)
+ final_count = 0
+ self.assertEqual([x_dim, y_dim], dropout.get_shape())
+ for _ in xrange(0, num_iter):
+ value = dropout.eval()
+ final_count += np.count_nonzero(value)
+ # Verifies that there are only two values: 0 and 1/keep_prob.
+ sorted_value = np.unique(np.sort(value))
+ self.assertEqual(0, sorted_value[0])
+ self.assertAllClose(1 / keep_prob, sorted_value[1])
+ # Check that we are in the 15% error range
+ expected_count = x_dim * y_dim * keep_prob * num_iter
+ rel_error = math.fabs(final_count - expected_count) / expected_count
+ print rel_error
+ self.assertTrue(rel_error < 0.15)
+
+ def testShapedDropout(self):
+ # Runs dropout with 0-1 tensor 10 times, sum the number of ones and validate
+ # that it is producing approximately the right number of ones over a large
+ # number of samples, based on the keep probability. This time with shaped
+ # noise.
+ x_dim = 40 * 30
+ y_dim = 3
+ num_iter = 10
+ for keep_prob in [0.1, 0.5, 0.8]:
+ with self.test_session():
+ t = constant_op.constant(1.0,
+ shape=[x_dim, y_dim],
+ dtype=types.float32)
+ dropout = nn.dropout(t, keep_prob, noise_shape=[x_dim, 1])
+ self.assertEqual([x_dim, y_dim], dropout.get_shape())
+ final_count = 0
+ for _ in xrange(0, num_iter):
+ value = dropout.eval()
+ final_count += np.count_nonzero(value)
+ # Verifies that there are only two values: 0 and 1/keep_prob.
+ sorted_value = np.unique(np.sort(value))
+ self.assertEqual(0, sorted_value[0])
+ self.assertAllClose(1 / keep_prob, sorted_value[1])
+ # Check that we are in the 15% error range
+ expected_count = x_dim * y_dim * keep_prob * num_iter
+ rel_error = math.fabs(final_count - expected_count) / expected_count
+ print rel_error
+ self.assertTrue(rel_error < 0.15)
+
+ def testShapedDropoutCorrelation(self):
+ # Runs a shaped dropout and tests that the correlations are correct.
+ x_dim = 40
+ y_dim = 30
+ num_iter = 10
+ for keep_prob in [0.1, 0.5, 0.8]:
+ with self.test_session():
+ t = constant_op.constant(1.0,
+ shape=[x_dim, y_dim],
+ dtype=types.float32)
+ dropout = nn.dropout(t, keep_prob, noise_shape=[x_dim, 1])
+ self.assertEqual([x_dim, y_dim], dropout.get_shape())
+ for _ in xrange(0, num_iter):
+ value = dropout.eval()
+ # Verifies that each y column as only one type of activation.
+ for i in xrange(x_dim):
+ sorted_value = np.unique(np.sort(value[i, :]))
+ self.assertEqual(sorted_value.size, 1)
+
+ def testShapedDropoutShapeError(self):
+ # Runs shaped dropout and verifies an error is thrown on misshapen noise.
+ x_dim = 40
+ y_dim = 30
+ keep_prob = 0.5
+ with self.test_session():
+ t = constant_op.constant(1.0,
+ shape=[x_dim, y_dim],
+ dtype=types.float32)
+ with self.assertRaises(ValueError):
+ _ = nn.dropout(t, keep_prob, noise_shape=[x_dim, y_dim + 10])
+ with self.assertRaises(ValueError):
+ _ = nn.dropout(t, keep_prob, noise_shape=[x_dim, y_dim, 5])
+ with self.assertRaises(ValueError):
+ _ = nn.dropout(t, keep_prob, noise_shape=[x_dim + 3])
+ with self.assertRaises(ValueError):
+ _ = nn.dropout(t, keep_prob, noise_shape=[x_dim])
+ # test that broadcasting proceeds
+ _ = nn.dropout(t, keep_prob, noise_shape=[y_dim])
+ _ = nn.dropout(t, keep_prob, noise_shape=[1, y_dim])
+ _ = nn.dropout(t, keep_prob, noise_shape=[x_dim, 1])
+ _ = nn.dropout(t, keep_prob, noise_shape=[1, 1])
+
+
+class BatchNormWithGlobalNormalizationTest(test_util.TensorFlowTestCase):
+
+ def _npBatchNorm(self, x, m, v, beta, gamma, epsilon,
+ scale_after_normalization):
+ y = (x - m) / np.sqrt(v + epsilon)
+ y = y * gamma if scale_after_normalization else y
+ y += beta
+ return y
+
+ def _opsBatchNorm(self, x, m, v, beta, gamma, epsilon,
+ scale_after_normalization):
+ y = (x - m) * math_ops.rsqrt(v + epsilon)
+ if scale_after_normalization:
+ y = gamma * y
+ y += beta
+ return y
+
+ def testBatchNorm(self):
+ x_shape = [3, 5, 4, 2]
+ param_shape = [2]
+ x_val = np.random.random_sample(x_shape).astype(np.float32)
+ m_val = np.random.random_sample(param_shape).astype(np.float32)
+ v_val = np.random.random_sample(param_shape).astype(np.float32)
+ beta_val = np.random.random_sample(param_shape).astype(np.float32)
+ gamma_val = np.random.random_sample(param_shape).astype(np.float32)
+ for use_gpu in [True, False]:
+ with self.test_session(use_gpu=use_gpu) as sess:
+ x = constant_op.constant(x_val, name="x")
+ m = constant_op.constant(m_val, name="m")
+ v = constant_op.constant(v_val, name="v")
+ beta = constant_op.constant(beta_val, name="beta")
+ gamma = constant_op.constant(gamma_val, name="gamma")
+ epsilon = 0.001
+ for scale_after_normalization in [True, False]:
+ bn = nn.batch_norm_with_global_normalization(
+ x, m, v, beta, gamma, epsilon, scale_after_normalization)
+ on = self._opsBatchNorm(
+ x, m, v, beta, gamma, epsilon, scale_after_normalization)
+ np_batch_norm = self._npBatchNorm(
+ x_val, m_val, v_val, beta_val, gamma_val, epsilon,
+ scale_after_normalization)
+ tf_batch_norm, ops_batch_norm = sess.run([bn, on])
+ self.assertAllClose(np_batch_norm, tf_batch_norm, atol=0.000001)
+ self.assertAllClose(np_batch_norm, ops_batch_norm, atol=0.000001)
+ self.assertAllClose(tf_batch_norm, ops_batch_norm, atol=0.000001)
+
+ def _testBatchNormGradient(self, param_index, tag, scale_after_normalization,
+ err_tolerance=1e-11):
+ x_shape = [3, 5, 4, 5]
+ param_shape = [5]
+ np.random.seed(1) # Make it reproducible.
+ x_val = np.random.random_sample(x_shape).astype(np.float64)
+ m_val = np.random.random_sample(param_shape).astype(np.float64)
+ v_val = np.random.random_sample(param_shape).astype(np.float64)
+ beta_val = np.random.random_sample(param_shape).astype(np.float64)
+ gamma_val = np.random.random_sample(param_shape).astype(np.float64)
+ with self.test_session():
+ x = constant_op.constant(x_val, name="x")
+ m = constant_op.constant(m_val, name="m")
+ v = constant_op.constant(v_val, name="v")
+ beta = constant_op.constant(beta_val, name="beta")
+ gamma = constant_op.constant(gamma_val, name="gamma")
+ epsilon = 0.001
+ # If scale_after_normalization is False, backprop for gamma
+ # will be 0. gamma is unchanged.
+ output = nn.batch_norm_with_global_normalization(
+ x, m, v, beta, gamma, epsilon, scale_after_normalization)
+ all_params = [x, m, v, beta, gamma]
+ all_shapes = [x_shape, param_shape, param_shape, param_shape, param_shape]
+ err = gc.ComputeGradientError(all_params[param_index],
+ all_shapes[param_index], output, x_shape)
+ print "Batch normalization %s gradient %s scale err = " % (
+ tag, "with" if scale_after_normalization else "without"
+ ), err
+ self.assertLess(err, err_tolerance)
+
+ def testBatchNormInputGradient(self):
+ for scale_after_normalization in [True, False]:
+ self._testBatchNormGradient(0, "x", scale_after_normalization)
+
+ def testBatchNormMeanGradient(self):
+ for scale_after_normalization in [True, False]:
+ self._testBatchNormGradient(1, "mean", scale_after_normalization)
+
+ def testBatchNormVarianceGradient(self):
+ for scale_after_normalization in [True, False]:
+ self._testBatchNormGradient(2, "variance", scale_after_normalization,
+ err_tolerance=1e-03)
+
+ def testBatchNormBetaGradient(self):
+ for scale_after_normalization in [True, False]:
+ self._testBatchNormGradient(3, "beta", scale_after_normalization)
+
+ def testBatchNormGammaGradient(self):
+ for scale_after_normalization in [True, False]:
+ self._testBatchNormGradient(4, "gamma", scale_after_normalization)
+
+ def testBatchNormGradImpl(self):
+ x_shape = [7, 5, 4, 6]
+ param_shape = [6]
+ np.random.seed(1) # Make it reproducible.
+ x_val = np.random.random_sample(x_shape).astype(np.float32)
+ m_val = np.random.random_sample(param_shape).astype(np.float32)
+ v_val = np.random.random_sample(param_shape).astype(np.float32)
+ beta_val = np.random.random_sample(param_shape).astype(np.float32)
+ gamma_val = np.random.random_sample(param_shape).astype(np.float32)
+ backprop_val = np.random.random_sample(x_shape).astype(np.float32)
+ for use_gpu in [False, True]:
+ with self.test_session(use_gpu=use_gpu) as sess:
+ x = constant_op.constant(x_val, name="x")
+ m = constant_op.constant(m_val, name="m")
+ v = constant_op.constant(v_val, name="v")
+ beta = constant_op.constant(beta_val, name="beta")
+ gamma = constant_op.constant(gamma_val, name="gamma")
+ backprop = constant_op.constant(backprop_val, name="backprop")
+ epsilon = 0.001
+ for scale_after_normalization in [True, False]:
+ dx, dm, dv, db, dg = (
+ gen_nn_ops._batch_norm_with_global_normalization_grad(
+ x, m, v, gamma, backprop, epsilon, scale_after_normalization))
+ on = self._opsBatchNorm(
+ x, m, v, beta, gamma, epsilon, scale_after_normalization)
+ odx, odm, odv, odb, odg = gradients.gradients(
+ [on], [x, m, v, beta, gamma], [backprop])
+ if scale_after_normalization:
+ all_grads = sess.run([dx, dm, dv, db, dg, odx, odm, odv, odb, odg])
+ to_check = ["dx", "dm", "dv", "db", "dg"]
+ else:
+ all_grads = sess.run([dx, dm, dv, db, odx, odm, odv, odb])
+ to_check = ["dx", "dm", "dv", "db"]
+ for i, n in enumerate(to_check):
+ print n
+ self.assertAllClose(
+ all_grads[i + len(to_check)], all_grads[i], atol=0.000001)
+
+
+class MomentsTest(test_util.TensorFlowTestCase):
+
+ def RunMomentTest(self, shape, global_norm):
+ with self.test_session():
+ # shape = [batch, width, height, depth]
+ assert len(shape) == 4
+
+ x_numpy = np.random.normal(size=shape).astype(np.float32)
+ x = constant_op.constant(x_numpy)
+ x.set_shape(shape)
+ axes = [0, 1, 2] if global_norm else [0]
+ mean, var = nn.moments(x, axes)
+
+ num_elements = np.prod([shape[i] for i in axes])
+
+ ax = (0, 1, 2) if global_norm else (0)
+ expected_mean = np.sum(x_numpy, axis=ax) / num_elements
+ expected_mean_squared = np.multiply(expected_mean, expected_mean)
+ expected_x_squared = np.sum(
+ np.multiply(x_numpy, x_numpy), axis=ax) / num_elements
+ expected_variance = expected_x_squared - expected_mean_squared
+
+ # Check that the moments are correct.
+ self.assertAllClose(expected_mean, mean.eval())
+ self.assertAllClose(expected_variance, var.eval())
+
+ def testBasic(self):
+ self.RunMomentTest(shape=[2, 3, 5, 4], global_norm=False)
+
+ def testGlobalNormalization(self):
+ self.RunMomentTest(shape=[2, 3, 5, 4], global_norm=True)
+
+ def _testGlobalGradient(self, from_y="mean"):
+ with self.test_session():
+ x_shape = [3, 5, 4, 2]
+ x_val = np.random.random_sample(x_shape).astype(np.float64)
+ x = constant_op.constant(x_val)
+ x.set_shape(x_shape)
+
+ axes = [0, 1, 2]
+ y_shape = [2] # Depth of x
+ out_mean, out_var = nn.moments(x, axes)
+ if from_y == "mean":
+ y = out_mean
+ elif from_y == "var":
+ y = out_var
+ err = gc.ComputeGradientError(x, x_shape, y, y_shape)
+ print "Moments %s gradient err = %g" % (from_y, err)
+ self.assertLess(err, 1e-11)
+
+ def testMeanGlobalGradient(self):
+ self._testGlobalGradient(from_y="mean")
+
+ def testVarGlobalGradient(self):
+ self._testGlobalGradient(from_y="var")
+
+
+class ComputeSampledLogitsTest(test_util.TensorFlowTestCase):
+
+ def setUp(self):
+ self._num_classes = 5
+ self._dim = 10
+ self._batch_size = 3
+
+ def _GenerateTestInputs(self):
+ np.random.seed(0)
+ weights = np.random.randn(self._num_classes, self._dim).astype(np.float32)
+ biases = np.random.randn(self._num_classes).astype(np.float32)
+ hidden_acts = np.random.randn(self._batch_size, self._dim).astype(
+ np.float32)
+
+ return weights, biases, hidden_acts
+
+ def _ComputeSampledLogitsNP(self, true_w, true_b, sampled_w, sampled_b,
+ hidden_acts,
+ num_true=1,
+ true_expected=None,
+ sampled_expected=None):
+
+ batch_size, dim = hidden_acts.shape
+ true_logits = np.sum(
+ hidden_acts.reshape((batch_size, 1, dim)) * true_w.reshape(
+ (batch_size, num_true, dim)),
+ axis=2)
+ true_b = true_b.reshape((batch_size, num_true))
+ true_logits += true_b
+ sampled_logits = np.dot(hidden_acts, sampled_w.T) + sampled_b
+
+ if true_expected is not None:
+ true_logits -= np.log(true_expected)
+ if sampled_expected is not None:
+ sampled_logits -= np.log(sampled_expected[np.newaxis, :])
+
+ out_logits = np.concatenate([true_logits, sampled_logits], axis=1)
+ out_labels = np.hstack((np.ones_like(true_logits) / num_true,
+ np.zeros_like(sampled_logits)))
+
+ return out_logits, out_labels
+
+ def _ComputeSampledLogitsTF(self, weights, biases, hidden_acts, labels,
+ num_sampled, num_classes, num_true, sampled_vals,
+ subtract_log_q, remove_accidental_hits,
+ name="sampled_loss_TF"):
+ # Should be called from within a `with test_session():` block
+ weights_tf = constant_op.constant(weights)
+ biases_tf = constant_op.constant(biases)
+ hidden_acts_tf = constant_op.constant(hidden_acts,
+ shape=(self._batch_size, self._dim))
+ labels_tf = constant_op.constant(labels, dtype=types.int64,
+ shape=(self._batch_size, num_true))
+
+ pred_logits_tf, pred_labels_tf = nn._compute_sampled_logits(
+ weights_tf, biases_tf, hidden_acts_tf, labels_tf, num_sampled,
+ num_classes, num_true, sampled_vals,
+ subtract_log_q=subtract_log_q,
+ remove_accidental_hits=remove_accidental_hits,
+ name=name)
+ return pred_logits_tf, pred_labels_tf
+
+ def testComputeSampledLogitsShapes(self):
+ # We just check that the shapes of the returned values are correct.
+ weights, biases, hidden_acts = self._GenerateTestInputs()
+ sampled = [1, 0, 2, 3]
+ num_sampled = len(sampled)
+ true_exp = sampled_exp = [1., 1., 1., 1.]
+ test_sampled_vals = (sampled, true_exp, sampled_exp)
+ sampled_w, sampled_b = weights[sampled], biases[sampled]
+
+ with self.test_session() as sess:
+ for num_true_test in range(1, 5):
+ labels = np.random.randint(low=0, high=self._num_classes,
+ size=self._batch_size * num_true_test)
+ true_w, true_b = weights[labels], biases[labels]
+
+ logits_np, labels_np = self._ComputeSampledLogitsNP(
+ true_w, true_b, sampled_w, sampled_b, hidden_acts,
+ num_true=num_true_test)
+
+ logits_tf, labels_tf = self._ComputeSampledLogitsTF(
+ weights, biases, hidden_acts, labels, num_sampled,
+ self._num_classes,
+ num_true=num_true_test,
+ sampled_vals=test_sampled_vals,
+ remove_accidental_hits=True,
+ subtract_log_q=False)
+
+ logits_tf_val, labels_tf_val = sess.run([logits_tf, labels_tf])
+ self.assertEqual(logits_np.shape, logits_tf_val.shape)
+ self.assertEqual(labels_np.shape, labels_tf_val.shape)
+
+ def testComputeSampledLogitsValues(self):
+ # Here we check the actual numerics.
+ weights, biases, hidden_acts = self._GenerateTestInputs()
+ eps = 1e-3
+ sampled = [1, 0, 2, 3]
+ num_sampled = len(sampled)
+ true_exp = np.empty([self._batch_size, 1], dtype=np.float32)
+ true_exp.fill(0.5)
+ sampled_exp = np.empty([num_sampled], dtype=np.float32)
+ sampled_exp.fill(0.5)
+ sampled_w, sampled_b = weights[sampled], biases[sampled]
+ test_sampled_vals = (sampled, true_exp, sampled_exp)
+
+ with self.test_session() as sess:
+ for num_true_test in range(1, 5):
+ # Generate test data for this run
+ labels = np.random.randint(low=0, high=self._num_classes,
+ size=self._batch_size * num_true_test)
+ true_w, true_b = weights[labels], biases[labels]
+
+ # Test 1: Without accidental hit removal or subtract_log_q
+ logits_np, labels_np = self._ComputeSampledLogitsNP(
+ true_w, true_b, sampled_w, sampled_b, hidden_acts,
+ num_true=num_true_test)
+ logits_tf, labels_tf = self._ComputeSampledLogitsTF(
+ weights, biases, hidden_acts, labels, num_sampled,
+ self._num_classes,
+ num_true=num_true_test,
+ sampled_vals=test_sampled_vals,
+ subtract_log_q=False,
+ remove_accidental_hits=False,
+ name="sampled_loss_test1_num_true%d" % num_true_test)
+
+ logits_tf_val, labels_tf_val = sess.run([logits_tf, labels_tf])
+ self.assertAllClose(logits_np, logits_tf_val, eps)
+ self.assertAllClose(labels_np, labels_tf_val, eps)
+
+ # Test 2: With accidental hit removal, no subtract_log_q
+ logits_tf, labels_tf = self._ComputeSampledLogitsTF(
+ weights, biases, hidden_acts, labels, num_sampled,
+ self._num_classes,
+ num_true=num_true_test,
+ sampled_vals=test_sampled_vals,
+ subtract_log_q=False,
+ remove_accidental_hits=True,
+ name="sampled_loss_test2_num_true%d" % num_true_test)
+
+ # Test that the exponentiated logits of accidental hits are near 0.
+ # First we need to find the hits in this random test run:
+ labels_reshape = labels.reshape((self._batch_size, num_true_test))
+ logits_tf_np = logits_tf.eval()
+ for row in xrange(self._batch_size):
+ row_labels = labels_reshape[row, :]
+ for col in xrange(num_sampled):
+ if sampled[col] in row_labels:
+ # We need to add the num_true_test offset into logits_*
+ self.assertNear(
+ np.exp(logits_tf_np[row, col + num_true_test]), 0., eps)
+
+ # Test 3: With subtract_log_q, no accidental hit removal
+ logits_np, labels_np = self._ComputeSampledLogitsNP(
+ true_w, true_b, sampled_w, sampled_b, hidden_acts,
+ num_true=num_true_test,
+ true_expected=true_exp,
+ sampled_expected=sampled_exp)
+ logits_tf, labels_tf = self._ComputeSampledLogitsTF(
+ weights, biases, hidden_acts, labels, num_sampled,
+ self._num_classes,
+ num_true=num_true_test,
+ sampled_vals=test_sampled_vals,
+ subtract_log_q=True,
+ remove_accidental_hits=False,
+ name="sampled_loss_test3_num_true%d" % num_true_test)
+
+ logits_tf_val, labels_tf_val = sess.run([logits_tf, labels_tf])
+ self.assertAllClose(logits_np, logits_tf_val, eps)
+ self.assertAllClose(labels_np, labels_tf_val, eps)
+
+ def testNCELoss(self):
+ # A simple test to verify the numerics.
+
+ def _SigmoidCrossEntropyWithLogits(logits, targets):
+ # logits, targets: float arrays of the same shape.
+ assert logits.shape == targets.shape
+ pred = 1. / (1. + np.exp(-logits))
+ eps = 0.0001
+ pred = np.minimum(np.maximum(pred, eps), 1 - eps)
+ return -targets * np.log(pred) - (1. - targets) * np.log(1. - pred)
+
+ weights, biases, hidden_acts = self._GenerateTestInputs()
+ labels = [0, 1, 2]
+ true_w, true_b = weights[labels], biases[labels]
+ sampled = [1, 0, 2, 3]
+ num_sampled = len(sampled)
+ true_exp = np.empty([self._batch_size, 1], dtype=np.float32)
+ true_exp.fill(0.5)
+ sampled_exp = np.empty([num_sampled], dtype=np.float32)
+ sampled_exp.fill(0.5)
+ sampled_w, sampled_b = weights[sampled], biases[sampled]
+ test_sampled_vals = (sampled, true_exp, sampled_exp)
+
+ with self.test_session():
+ logits_np, labels_np = self._ComputeSampledLogitsNP(
+ true_w, true_b, sampled_w, sampled_b, hidden_acts,
+ true_expected=true_exp,
+ sampled_expected=sampled_exp)
+ nce_loss_np = np.sum(
+ _SigmoidCrossEntropyWithLogits(logits_np, labels_np), 1)
+
+ labels_tf = constant_op.constant(labels, shape=(self._batch_size, 1))
+ weights_tf = constant_op.constant(weights)
+ biases_tf = constant_op.constant(biases)
+ inputs_tf = constant_op.constant(hidden_acts)
+
+ nce_loss_tf = nn.nce_loss(
+ weights_tf, biases_tf, inputs_tf, labels_tf,
+ num_sampled=1,
+ num_classes=self._num_classes,
+ num_true=1,
+ sampled_values=test_sampled_vals)
+
+ self.assertAllClose(nce_loss_np, nce_loss_tf.eval(), 1e-4)
+
+ def testSampledSoftmaxLoss(self):
+ # A simple test to verify the numerics.
+
+ def _SoftmaxCrossEntropyWithLogits(logits, targets):
+ # logits, targets: float arrays of the same shape.
+ assert logits.shape == targets.shape
+ stable_exp_logits = np.exp(logits - np.amax(
+ logits, axis=1, keepdims=True))
+ pred = stable_exp_logits / np.sum(stable_exp_logits, 1, keepdims=True)
+ return -np.sum(targets * np.log(pred + 1.0e-20), axis=1)
+
+ weights, biases, hidden_acts = self._GenerateTestInputs()
+ labels = [0, 1, 2]
+ true_w, true_b = weights[labels], biases[labels]
+ sampled = [1, 0, 2, 3]
+ num_sampled = len(sampled)
+ true_exp = np.full([self._batch_size, 1], fill_value=0.5, dtype=np.float32)
+ sampled_exp = np.full([num_sampled], fill_value=0.5, dtype=np.float32)
+ sampled_w, sampled_b = weights[sampled], biases[sampled]
+ test_sampled_vals = (sampled, true_exp, sampled_exp)
+
+ with self.test_session():
+ logits_np, labels_np = self._ComputeSampledLogitsNP(
+ true_w, true_b, sampled_w, sampled_b, hidden_acts,
+ true_expected=true_exp,
+ sampled_expected=sampled_exp)
+ sampled_softmax_loss_np = _SoftmaxCrossEntropyWithLogits(logits_np,
+ labels_np)
+
+ labels_tf = constant_op.constant(labels, shape=(self._batch_size, 1))
+ weights_tf = constant_op.constant(weights)
+ biases_tf = constant_op.constant(biases)
+ inputs_tf = constant_op.constant(hidden_acts)
+
+ sampled_softmax_loss_tf = nn.sampled_softmax_loss(
+ weights_tf, biases_tf, inputs_tf, labels_tf,
+ num_sampled=1,
+ num_classes=self._num_classes,
+ num_true=1,
+ sampled_values=test_sampled_vals,
+ remove_accidental_hits=False)
+
+ self.assertAllClose(
+ sampled_softmax_loss_np, sampled_softmax_loss_tf.eval(), 1e-4)
+
+
+if __name__ == "__main__":
+ googletest.main()