# Copyright 2015 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for SparseSoftmaxCrossEntropyWithLogits op.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import sys import time import numpy as np from tensorflow.core.protobuf import config_pb2 from tensorflow.python.client import session from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors_impl from tensorflow.python.framework import ops as ops_lib from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops import gradient_checker from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import variables import tensorflow.python.ops.nn_grad # pylint: disable=unused-import from tensorflow.python.platform import app from tensorflow.python.platform import test class SparseXentTest(test.TestCase): def _npXent(self, features, labels): features = np.reshape(features, [-1, features.shape[-1]]) labels = np.reshape(labels, [-1]) batch_dim = 0 class_dim = 1 batch_size = features.shape[batch_dim] e = np.exp(features - np.reshape( np.amax( features, axis=class_dim), [batch_size, 1])) probs = e / np.reshape(np.sum(e, axis=class_dim), [batch_size, 1]) labels_mat = np.zeros_like(probs).astype(probs.dtype) labels_mat[np.arange(batch_size), labels] = 1.0 bp = (probs - labels_mat) l = -np.sum(labels_mat * np.log(probs + 1.0e-20), axis=1) return l, bp def _testXent(self, np_features, np_labels): np_loss, np_backprop = self._npXent(np_features, np_labels) with self.test_session(use_gpu=True) as sess: loss, backprop = gen_nn_ops.sparse_softmax_cross_entropy_with_logits( np_features, np_labels) tf_loss, tf_backprop = sess.run([loss, backprop]) self.assertAllCloseAccordingToType(np_loss, tf_loss) self.assertAllCloseAccordingToType(np_backprop, tf_backprop) def testSingleClass(self): for label_dtype in np.int32, np.int64: with self.test_session(use_gpu=True) as sess: loss, backprop = gen_nn_ops.sparse_softmax_cross_entropy_with_logits( np.array([[1.], [-1.], [0.]]).astype(np.float32), np.array([0, 0, 0]).astype(label_dtype)) tf_loss, tf_backprop = sess.run([loss, backprop]) self.assertAllClose([0.0, 0.0, 0.0], tf_loss) self.assertAllClose([[0.0], [0.0], [0.0]], tf_backprop) def testInvalidLabel(self): features = [[1., 1., 1., 1.], [1., 1., 1., 1.], [1., 2., 3., 4.], [1., 2., 3., 4.]] labels = [4, 3, 0, -1] if test.is_built_with_cuda() and test.is_gpu_available(): with self.test_session(use_gpu=True) as sess: loss, backprop = ( gen_nn_ops.sparse_softmax_cross_entropy_with_logits( features, labels)) tf_loss, tf_backprop = sess.run([loss, backprop]) self.assertAllClose( [[np.nan] * 4, [0.25, 0.25, 0.25, -0.75], [-0.968, 0.087, 0.237, 0.6439], [np.nan] * 4], tf_backprop, rtol=1e-3, atol=1e-3) self.assertAllClose( [np.nan, 1.3862, 3.4420, np.nan], tf_loss, rtol=1e-3, atol=1e-3) with self.test_session(use_gpu=False) as sess: loss, backprop = ( gen_nn_ops.sparse_softmax_cross_entropy_with_logits(features, labels)) with self.assertRaisesOpError("Received a label value of"): sess.run([loss, backprop]) def testNpXent(self): # We create 2 batches of logits for testing. # batch 0 is the boring uniform distribution: 1, 1, 1, 1, with target 3. # batch 1 has a bit of difference: 1, 2, 3, 4, with target 0. features = [[1., 1., 1., 1.], [1., 2., 3., 4.]] labels = [3, 0] # For batch 0, we expect the uniform distribution: 0.25, 0.25, 0.25, 0.25 # With a hard target 3, the backprop is [0.25, 0.25, 0.25, -0.75] # The loss for this batch is -log(0.25) = 1.386 # # For batch 1, we have: # exp(0) = 1 # exp(1) = 2.718 # exp(2) = 7.389 # exp(3) = 20.085 # SUM = 31.192 # So we have as probabilities: # exp(0) / SUM = 0.032 # exp(1) / SUM = 0.087 # exp(2) / SUM = 0.237 # exp(3) / SUM = 0.644 # With a hard 1, the backprop is [0.032 - 1.0 = -0.968, 0.087, 0.237, 0.644] # The loss for this batch is [1.0 * -log(0.25), 1.0 * -log(0.032)] # = [1.3862, 3.4420] np_loss, np_backprop = self._npXent(np.array(features), np.array(labels)) self.assertAllClose( np.array([[0.25, 0.25, 0.25, -0.75], [-0.968, 0.087, 0.237, 0.6439]]), np_backprop, rtol=1.e-3, atol=1.e-3) self.assertAllClose( np.array([1.3862, 3.4420]), np_loss, rtol=1.e-3, atol=1.e-3) def testShapeMismatch(self): with self.test_session(use_gpu=True): with self.assertRaisesRegexp(ValueError, ".*Rank mismatch:*"): nn_ops.sparse_softmax_cross_entropy_with_logits( labels=[[0, 2]], logits=[[0., 1.], [2., 3.], [2., 3.]]) def testScalar(self): with self.test_session(use_gpu=True): with self.assertRaisesRegexp(ValueError, ".*Logits cannot be scalars*"): nn_ops.sparse_softmax_cross_entropy_with_logits( labels=constant_op.constant(0), logits=constant_op.constant(1.0)) def testLabelsPlaceholderScalar(self): with self.test_session(use_gpu=True): labels = array_ops.placeholder(np.int32) y = nn_ops.sparse_softmax_cross_entropy_with_logits( labels=labels, logits=[[7.]]) with self.assertRaisesOpError("labels must be 1-D"): y.eval(feed_dict={labels: 0}) def testVector(self): with self.test_session(use_gpu=True): loss = nn_ops.sparse_softmax_cross_entropy_with_logits( labels=constant_op.constant(0), logits=constant_op.constant([1.0])) self.assertAllClose(0.0, loss.eval()) def testFloat(self): for label_dtype in np.int32, np.int64: self._testXent( np.array([[1., 1., 1., 1.], [1., 2., 3., 4.]]).astype(np.float32), np.array([3, 0]).astype(label_dtype)) def testDouble(self): for label_dtype in np.int32, np.int64: self._testXent( np.array([[1., 1., 1., 1.], [1., 2., 3., 4.]]).astype(np.float64), np.array([0, 3]).astype(label_dtype)) def testHalf(self): for label_dtype in np.int32, np.int64: self._testXent( np.array([[1., 1., 1., 1.], [1., 2., 3., 4.]]).astype(np.float16), np.array([3, 0]).astype(label_dtype)) def testEmpty(self): self._testXent(np.zeros((0, 3)), np.zeros((0,), dtype=np.int32)) def testGradient(self): with self.test_session(use_gpu=True): l = constant_op.constant([3, 0, 1], name="l") f = constant_op.constant( [0.1, 0.2, 0.3, 0.4, 0.1, 0.4, 0.9, 1.6, 0.1, 0.8, 2.7, 6.4], shape=[3, 4], dtype=dtypes.float64, name="f") x = nn_ops.sparse_softmax_cross_entropy_with_logits( labels=l, logits=f, name="xent") err = gradient_checker.compute_gradient_error(f, [3, 4], x, [3]) print("cross entropy gradient err = ", err) self.assertLess(err, 5e-8) def testSecondGradient(self): images_placeholder = array_ops.placeholder(dtypes.float32, shape=(3, 2)) labels_placeholder = array_ops.placeholder(dtypes.int32, shape=(3)) weights = variables.Variable(random_ops.truncated_normal([2], stddev=1.0)) weights_with_zeros = array_ops.stack([array_ops.zeros([2]), weights], axis=1) logits = math_ops.matmul(images_placeholder, weights_with_zeros) cross_entropy = nn_ops.sparse_softmax_cross_entropy_with_logits( labels=labels_placeholder, logits=logits) loss = math_ops.reduce_mean(cross_entropy) # Taking ths second gradient should fail, since it is not # yet supported. with self.assertRaisesRegexp(LookupError, "explicitly disabled"): _ = gradients_impl.hessians(loss, [weights]) def _testHighDim(self, features, labels): np_loss, np_backprop = self._npXent(np.array(features), np.array(labels)) # manually reshape loss np_loss = np.reshape(np_loss, np.array(labels).shape) with self.test_session(use_gpu=True) as sess: loss = nn_ops.sparse_softmax_cross_entropy_with_logits( labels=labels, logits=features) backprop = loss.op.inputs[0].op.outputs[1] tf_loss, tf_backprop = sess.run([loss, backprop]) self.assertAllCloseAccordingToType(np_loss, tf_loss) self.assertAllCloseAccordingToType(np_backprop, tf_backprop) def testHighDim(self): features = [[[1., 1., 1., 1.]], [[1., 2., 3., 4.]]] labels = [[3], [0]] self._testHighDim(features, labels) def testHighDim2(self): features = [[[1., 1., 1., 1.], [2., 2., 2., 2.]], [[1., 2., 3., 4.], [5., 6., 7., 8.]]] labels = [[3, 2], [0, 3]] self._testHighDim(features, labels) def testScalarHandling(self): with self.test_session(use_gpu=False) as sess: with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, ".*labels must be 1-D.*"): labels = array_ops.placeholder(dtypes.int32, shape=[None, 1]) logits = array_ops.placeholder(dtypes.float32, shape=[None, 3]) ce = nn_ops.sparse_softmax_cross_entropy_with_logits( labels=array_ops.squeeze(labels), logits=logits) labels_v2 = np.zeros((1, 1), dtype=np.int32) logits_v2 = np.random.randn(1, 3) sess.run([ce], feed_dict={labels: labels_v2, logits: logits_v2}) def _sparse_vs_dense_xent_benchmark_dense(labels, logits): labels = array_ops.identity(labels) logits = array_ops.identity(logits) with ops_lib.device("/cpu:0"): # Sparse-to-dense must be on CPU batch_size = array_ops.shape(logits)[0] num_entries = array_ops.shape(logits)[1] length = batch_size * num_entries labels += num_entries * math_ops.range(batch_size) target = sparse_ops.sparse_to_dense(labels, array_ops.stack([length]), 1.0, 0.0) target = array_ops.reshape(target, array_ops.stack([-1, num_entries])) crossent = nn_ops.softmax_cross_entropy_with_logits( labels=target, logits=logits, name="SequenceLoss/CrossEntropy") crossent_sum = math_ops.reduce_sum(crossent) grads = gradients_impl.gradients([crossent_sum], [logits])[0] return (crossent_sum, grads) def _sparse_vs_dense_xent_benchmark_sparse(labels, logits): # Using sparse_softmax_cross_entropy_with_logits labels = labels.astype(np.int64) labels = array_ops.identity(labels) logits = array_ops.identity(logits) crossent = nn_ops.sparse_softmax_cross_entropy_with_logits( logits, labels, name="SequenceLoss/CrossEntropy") crossent_sum = math_ops.reduce_sum(crossent) grads = gradients_impl.gradients([crossent_sum], [logits])[0] return (crossent_sum, grads) def sparse_vs_dense_xent_benchmark(batch_size, num_entries, use_gpu): config = config_pb2.ConfigProto() config.allow_soft_placement = True config.gpu_options.per_process_gpu_memory_fraction = 0.3 labels = np.random.randint(num_entries, size=batch_size).astype(np.int32) logits = np.random.randn(batch_size, num_entries).astype(np.float32) def _timer(sess, ops): # Warm in for _ in range(20): sess.run(ops) # Timing run start = time.time() for _ in range(20): sess.run(ops) end = time.time() return (end - start) / 20.0 # Average runtime per iteration # Using sparse_to_dense and softmax_cross_entropy_with_logits with session.Session(config=config) as sess: if not use_gpu: with ops_lib.device("/cpu:0"): ops = _sparse_vs_dense_xent_benchmark_dense(labels, logits) else: ops = _sparse_vs_dense_xent_benchmark_dense(labels, logits) delta_dense = _timer(sess, ops) # Using sparse_softmax_cross_entropy_with_logits with session.Session(config=config) as sess: if not use_gpu: with ops_lib.device("/cpu:0"): ops = _sparse_vs_dense_xent_benchmark_sparse(labels, logits) else: ops = _sparse_vs_dense_xent_benchmark_sparse(labels, logits) delta_sparse = _timer(sess, ops) print("%d \t %d \t %s \t %f \t %f \t %f" % (batch_size, num_entries, use_gpu, delta_dense, delta_sparse, delta_sparse / delta_dense)) def main(_): print("Sparse Xent vs. SparseToDense + Xent") print("batch \t depth \t gpu \t dt(dense) \t dt(sparse) " "\t dt(sparse)/dt(dense)") for use_gpu in (False, True): for batch_size in (32, 64, 128): for num_entries in (100, 1000, 10000): sparse_vs_dense_xent_benchmark(batch_size, num_entries, use_gpu) sparse_vs_dense_xent_benchmark(32, 100000, use_gpu) sparse_vs_dense_xent_benchmark(8, 1000000, use_gpu) if __name__ == "__main__": if "--benchmarks" in sys.argv: sys.argv.remove("--benchmarks") app.run() else: test.main()