diff options
Diffstat (limited to 'tensorflow/python/kernel_tests/gradient_checker.py')
-rw-r--r-- | tensorflow/python/kernel_tests/gradient_checker.py | 251 |
1 files changed, 251 insertions, 0 deletions
diff --git a/tensorflow/python/kernel_tests/gradient_checker.py b/tensorflow/python/kernel_tests/gradient_checker.py new file mode 100644 index 0000000000..fe74768986 --- /dev/null +++ b/tensorflow/python/kernel_tests/gradient_checker.py @@ -0,0 +1,251 @@ +"""Gradient checker for any ops, graphs. + +The gradient checker verifies numerically that an op/graph properly +computes the gradients +""" +import tensorflow.python.platform + +import numpy as np + +from tensorflow.python.framework import ops +from tensorflow.python.framework import types +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import constant_op +from tensorflow.python.ops import gradients +from tensorflow.python.platform import logging + + +def _Product(t): + if isinstance(t, int): + return t + else: + y = 1 + for x in t: + y *= x + return y + + +def _ComputeTheoricalJacobian(x, x_shape, x_data, dy, dy_shape, dx): + """Computes the theoretical Jacobian for dy/dx. + + Computes the theoretical Jacobian using the ops generated by + ComputeGradient(). + + Args: + x: the tensor "x". + x_shape: the dimensions of x as a tuple or an array of ints. + x_data: a numpy parray as the input data for x + dy: the tensor "dy". + dy_shape: the dimensions of dy as a tuple or an array of ints. + dx: Tensor or IndexedSlices representing dx + + Returns: + A 2-d numpy array representing the Jacobian for dy/dx. It has "x_size" rows + and "dy_size" columns where "x_size" is the number of elements in x and + "dy_size" is the number of elements in dy. + """ + # To compute the jacobian, we treat x and y are one-dimensional vectors + x_size = _Product(x_shape) + x_val_size = _Product(x_shape[1:]) # This is used for sparse gradients + dy_size = _Product(dy_shape) + + jacobian = np.zeros((x_size, dy_size), dtype=x_data.dtype) + # For each of the entry of dy, we set this to be 1 and + # everything else to be 0 and compute the backprop -- this will give us one + # one column of the Jacobian matrix. + for col in range(0, dy_size): + dy_data = np.zeros(dy_shape, dtype=x_data.dtype) + dy_data.flat[col] = 1 + sess = ops.get_default_session() + if isinstance(dx, ops.IndexedSlices): + backprop_indices, backprop_values = sess.run( + [dx.indices, dx.values], feed_dict={x: x_data, dy: dy_data}) + for i, v in zip(backprop_indices, backprop_values): + r_begin = i * x_val_size + r_end = r_begin + x_val_size + jacobian[r_begin:r_end, col] += v.flat + else: + assert isinstance(dx, ops.Tensor), "dx = " + str(dx) + backprop = sess.run(dx, feed_dict={x: x_data, dy: dy_data}) + jacobian[:, col] = backprop.reshape(x_size) + + logging.vlog(1, "Theoretical Jacobian =\n%s", jacobian) + return jacobian + + +def _ComputeNumericJacobian(x, x_shape, x_data, y, y_shape, delta): + """Computes the numeric Jacobian for dy/dx. + + Computes the numeric Japcobian by slightly perturbing the inputs and + measuring the differences on the output. + + Args: + x: the tensor "x". + x_shape: the dimensions of x as a tuple or an array of ints. + x_data: a numpy array as the input data for x + y: the tensor "y". + y_shape: the dimensions of y as a tuple or an array of ints. + delta: the amount of perturbation we give to the input + + Returns: + A 2-d numpy array representing the Jacobian for dy/dx. It has "x_size" rows + and "y_size" columns where "x_size" is the number of elements in x and + "y_size" is the number of elements in y. + """ + + # To compute the jacobian, we treat x and y are one-dimensional vectors + x_size = _Product(x_shape) + y_size = _Product(y_shape) + + jacobian = np.zeros((x_size, y_size), dtype=x_data.dtype) + # For each of the entry of x, we slightly perturbs this by adding and + # subtracting a delta and then compute difference between the outputs. This + # will give us one row of the Jacobian matrix. + for row in range(0, x_size): + x_pos = x_data.copy() + x_pos.flat[row] += delta + y_pos = y.eval(feed_dict={x: x_pos}) + x_neg = x_data.copy() + x_neg.flat[row] -= delta + y_neg = y.eval(feed_dict={x: x_neg}) + diff = (y_pos - y_neg) / (2 * delta) + jacobian[row, :] = diff.reshape(y_size) + + logging.vlog(1, "Numeric Jacobian =\n%s", jacobian) + return jacobian + + +def _ComputeDxAndDy(x, y, y_shape): + """Returns a node to compute gradient of x wrt y.""" + # We make up a dy so that we can compute the gradients. We don't really use + # the value of dy -- we will always feed it. We need to add an identity node + # so that we can always feed it properly. Otherwise, for the Add operation, + # dx is the same as dy and we cannot fetch the tensor that we are feeding. + with x.graph.as_default(): + dy_orig = constant_op.constant(1.0, shape=y_shape, dtype=y.dtype) + dy = array_ops.identity(dy_orig) + # We compute the gradients for x wrt. y + grads = gradients.gradients(y, x, dy) + assert len(grads) == 1 + return grads[0], dy_orig + + +def _ComputeGradient(x, x_shape, dx, y, y_shape, dy, + x_init_value=None, delta=1e-3): + """Computes the theoretical and numerical jacobian.""" + t = types.as_dtype(x.dtype) + allowed_types = [types.float32, types.float64] + assert t.base_dtype in allowed_types, "Don't support type %s for x" % t.name + t2 = types.as_dtype(y.dtype) + assert t2.base_dtype in allowed_types, "Don't support type %s for y" % t2.name + + if x_init_value is not None: + i_shape = list(x_init_value.shape) + assert(list(x_shape) == i_shape), "x_shape = %s, init_data shape = %s" % ( + x_shape, i_shape) + x_data = x_init_value + else: + if t == types.float32: + dtype = np.float32 + else: + dtype = np.float64 + x_data = np.asfarray(np.random.random_sample(x_shape), dtype=dtype) + + jacob_t = _ComputeTheoricalJacobian(x, x_shape, x_data, dy, y_shape, dx) + jacob_n = _ComputeNumericJacobian(x, x_shape, x_data, y, y_shape, delta) + return jacob_t, jacob_n + + +def _ComputeGradientList( + x, x_shape, y, y_shape, x_init_value=None, delta=1e-3, init_targets=None): + """Compute gradients for a list of x values.""" + assert isinstance(x, list) + dx, dy = zip(*[_ComputeDxAndDy(xi, y, y_shape) for xi in x]) + + if init_targets is not None: + assert isinstance(init_targets, (list, tuple)) + for init in init_targets: + init.run() + if x_init_value is None: + x_init_value = [None] * len(x) + ret = [_ComputeGradient(xi, x_shapei, dxi, y, y_shape, dyi, + x_init_valuei, delta) + for xi, x_shapei, dxi, dyi, x_init_valuei in + zip(x, x_shape, dx, dy, x_init_value)] + return ret + + +def ComputeGradient( + x, x_shape, y, y_shape, x_init_value=None, delta=1e-3, init_targets=None): + """Computes and returns the theoretical and numerical Jacobian. + + Args: + x: a tensor or list of tensors + x_shape: the dimensions of x as a tuple or an array of ints. If x is a list, + then this is the list of shapes. + y: a tensor + y_shape: the dimensions of y as a tuple or an array of ints. + x_init_value: (optional) a numpy array of the same shape as "x" + representing the initial value of x. If x is a list, this should be a list + of numpy arrays. If this is none, the function will pick a random tensor + as the initial value. + delta: (optional) the amount of perturbation. + init_targets: list of targets to run to initialize model params. + TODO(mrry): remove this argument. + + Returns: + Two 2-d numpy arrays representing the theoretical and numerical + Jacobian for dy/dx. Each has "x_size" rows and "y_size" columns + where "x_size" is the number of elements in x and "y_size" is the + number of elements in y. If x is a list, returns a list of two numpy arrays. + """ + if isinstance(x, list): + return _ComputeGradientList(x, x_shape, y, y_shape, x_init_value, + delta, init_targets) + else: + if init_targets is not None: + assert isinstance(init_targets, (list, tuple)) + for init in init_targets: + init.run() + dx, dy = _ComputeDxAndDy(x, y, y_shape) + ret = _ComputeGradient(x, x_shape, dx, y, y_shape, dy, x_init_value, delta) + return ret + + +def ComputeGradientError( + x, x_shape, y, y_shape, x_init_value=None, delta=1e-3, init_targets=None): + """Computes the gradient error. + + Computes the maximum error for dy/dx between the computed Jacobian and the + numerically estimated Jacobian. + + This function will modify the tensors passed in as it adds more operations + and hence changing the consumers of the operations of the input tensors. + + This function adds operations to the current session. To compute the error + using a particular device, such as a GPU, use the standard methods for + setting a device (e.g. using with sess.graph.device() or setting a device + function in the session constructor). + + Args: + x: a tensor or list of tensors + x_shape: the dimensions of x as a tuple or an array of ints. If x is a list, + then this is the list of shapes. + y: a tensor + y_shape: the dimensions of y as a tuple or an array of ints. + x_init_value: (optional) a numpy array of the same shape as "x" + representing the initial value of x. If x is a list, this should be a list + of numpy arrays. If this is none, the function will pick a random tensor + as the initial value. + delta: (optional) the amount of perturbation. + init_targets: list of targets to run to initialize model params. + TODO(mrry): Remove this argument. + + Returns: + The maximum error in between the two Jacobians. + """ + grad = ComputeGradient(x, x_shape, y, y_shape, x_init_value, + delta, init_targets) + if isinstance(grad, tuple): + grad = [grad] + return max(np.fabs(j_t - j_n).max() for j_t, j_n in grad) |