diff options
author | Francois Chollet <fchollet@google.com> | 2017-04-17 15:20:31 -0800 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2017-04-17 16:39:05 -0700 |
commit | fd561221d2fe782d320b97346dfffb41f38d2bcf (patch) | |
tree | 814fb813ff9b93e2535a65ccaba679f6c4b267e4 /tensorflow/contrib/keras | |
parent | ae84106edc892b60976b1635907009888150989f (diff) |
Refactor Keras initializers to rely on core TF initializers; add serialization methods to core TF initializers.
Change: 153403157
Diffstat (limited to 'tensorflow/contrib/keras')
5 files changed, 78 insertions, 335 deletions
diff --git a/tensorflow/contrib/keras/BUILD b/tensorflow/contrib/keras/BUILD index 438e2056c6..5166ba37a3 100644 --- a/tensorflow/contrib/keras/BUILD +++ b/tensorflow/contrib/keras/BUILD @@ -134,7 +134,7 @@ py_library( py_test( name = "integration_test", - size = "small", + size = "medium", srcs = ["python/keras/integration_test.py"], srcs_version = "PY2AND3", tags = ["notsan"], diff --git a/tensorflow/contrib/keras/python/keras/initializers.py b/tensorflow/contrib/keras/python/keras/initializers.py index f9cb35e171..b0b71e7cb4 100644 --- a/tensorflow/contrib/keras/python/keras/initializers.py +++ b/tensorflow/contrib/keras/python/keras/initializers.py @@ -18,247 +18,20 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import math - import numpy as np import six -from tensorflow.contrib.keras.python.keras import backend as K from tensorflow.contrib.keras.python.keras.utils.generic_utils import deserialize_keras_object from tensorflow.contrib.keras.python.keras.utils.generic_utils import serialize_keras_object -from tensorflow.python.framework import tensor_shape - - -class Initializer(object): - """Initializer base class: all initializers inherit from this class. - """ - - def __call__(self, shape, dtype=None): - raise NotImplementedError - - def get_config(self): - return {} - - @classmethod - def from_config(cls, config): - return cls(**config) - - -class Zeros(Initializer): - """Initializer that generates tensors initialized to 0. - """ - - def __call__(self, shape, dtype=None): - return K.constant(0, shape=shape, dtype=dtype) - - -class Ones(Initializer): - """Initializer that generates tensors initialized to 1. - """ - - def __call__(self, shape, dtype=None): - return K.constant(1, shape=shape, dtype=dtype) - - -class Constant(Initializer): - """Initializer that generates tensors initialized to a constant value. - - Arguments: - value: float; the value of the generator tensors. - """ - - def __init__(self, value=0): - self.value = value - - def __call__(self, shape, dtype=None): - return K.constant(self.value, shape=shape, dtype=dtype) - - def get_config(self): - return {'value': self.value} - - -class RandomNormal(Initializer): - """Initializer that generates tensors with a normal distribution. - - Arguments: - mean: a python scalar or a scalar tensor. Mean of the random values - to generate. - stddev: a python scalar or a scalar tensor. Standard deviation of the - random values to generate. - seed: A Python integer. Used to seed the random generator. - """ - - def __init__(self, mean=0., stddev=0.05, seed=None): - self.mean = mean - self.stddev = stddev - self.seed = seed - - def __call__(self, shape, dtype=None): - return K.random_normal( - shape, self.mean, self.stddev, dtype=dtype, seed=self.seed) - - def get_config(self): - return {'mean': self.mean, 'stddev': self.stddev, 'seed': self.seed} - - -class RandomUniform(Initializer): - """Initializer that generates tensors with a uniform distribution. - - Arguments: - minval: A python scalar or a scalar tensor. Lower bound of the range - of random values to generate. - maxval: A python scalar or a scalar tensor. Upper bound of the range - of random values to generate. Defaults to 1 for float types. - seed: A Python integer. Used to seed the random generator. - """ - - def __init__(self, minval=-0.05, maxval=0.05, seed=None): - self.minval = minval - self.maxval = maxval - self.seed = seed - - def __call__(self, shape, dtype=None): - return K.random_uniform( - shape, self.minval, self.maxval, dtype=dtype, seed=self.seed) - - def get_config(self): - return { - 'minval': self.minval, - 'maxval': self.maxval, - 'seed': self.seed, - } - - -class TruncatedNormal(Initializer): - """Initializer that generates a truncated normal distribution. - - These values are similar to values from a `RandomNormal` - except that values more than two standard deviations from the mean - are discarded and re-drawn. This is the recommended initializer for - neural network weights and filters. - - Arguments: - mean: a python scalar or a scalar tensor. Mean of the random values - to generate. - stddev: a python scalar or a scalar tensor. Standard deviation of the - random values to generate. - seed: A Python integer. Used to seed the random generator. - """ - - def __init__(self, mean=0., stddev=0.05, seed=None): - self.mean = mean - self.stddev = stddev - self.seed = seed - - def __call__(self, shape, dtype=None): - return K.truncated_normal( - shape, self.mean, self.stddev, dtype=dtype, seed=self.seed) - - def get_config(self): - return {'mean': self.mean, 'stddev': self.stddev, 'seed': self.seed} - - -class VarianceScaling(Initializer): - """Initializer capable of adapting its scale to the shape of weights. - - With `distribution="normal"`, samples are drawn from a truncated normal - distribution centered on zero, with `stddev = sqrt(scale / n)` where n is: - - - number of input units in the weight tensor, if mode = "fan_in" - - number of output units, if mode = "fan_out" - - average of the numbers of input and output units, if mode = "fan_avg" - - With `distribution="uniform"`, - samples are drawn from a uniform distribution - within [-limit, limit], with `limit = sqrt(3 * scale / n)`. - - Arguments: - scale: Scaling factor (positive float). - mode: One of "fan_in", "fan_out", "fan_avg". - distribution: Random distribution to use. One of "normal", "uniform". - seed: A Python integer. Used to seed the random generator. - - Raises: - ValueError: In case of an invalid value for the "scale", mode" or - "distribution" arguments. - """ - - def __init__(self, scale=1.0, mode='fan_in', distribution='normal', - seed=None): - if scale <= 0.: - raise ValueError('`scale` must be a positive float. Got:', scale) - mode = mode.lower() - if mode not in {'fan_in', 'fan_out', 'fan_avg'}: - raise ValueError('Invalid `mode` argument: ' - 'expected on of {"fan_in", "fan_out", "fan_avg"} ' - 'but got', mode) - distribution = distribution.lower() - if distribution not in {'normal', 'uniform'}: - raise ValueError('Invalid `distribution` argument: ' - 'expected one of {"normal", "uniform"} ' - 'but got', distribution) - self.scale = scale - self.mode = mode - self.distribution = distribution - self.seed = seed - - def __call__(self, shape, dtype=None): - fan_in, fan_out = _compute_fans(shape) - scale = self.scale - if self.mode == 'fan_in': - scale /= max(1., fan_in) - elif self.mode == 'fan_out': - scale /= max(1., fan_out) - else: - scale /= max(1., float(fan_in + fan_out) / 2) - if self.distribution == 'normal': - stddev = math.sqrt(scale) - return K.truncated_normal(shape, 0., stddev, dtype=dtype, seed=self.seed) - else: - limit = math.sqrt(3. * scale) - return K.random_uniform(shape, -limit, limit, dtype=dtype, seed=self.seed) - - def get_config(self): - return { - 'scale': self.scale, - 'mode': self.mode, - 'distribution': self.distribution, - 'seed': self.seed - } - - -class Orthogonal(Initializer): - """Initializer that generates a random orthogonal matrix. - - Arguments: - gain: Multiplicative factor to apply to the orthogonal matrix. - seed: A Python integer. Used to seed the random generator. - - References: - Saxe et al., http://arxiv.org/abs/1312.6120 - """ - - def __init__(self, gain=1., seed=None): - self.gain = gain - self.seed = seed - - def __call__(self, shape, dtype=None): - num_rows = 1 - for dim in shape[:-1]: - num_rows *= dim - num_cols = shape[-1] - flat_shape = (num_rows, num_cols) - if self.seed is not None: - np.random.seed(self.seed) - a = np.random.normal(0.0, 1.0, flat_shape) - u, _, v = np.linalg.svd(a, full_matrices=False) - # Pick the one with the correct shape. - q = u if u.shape == flat_shape else v - q = q.reshape(shape) - return self.gain * q[:shape[0], :shape[1]] - - def get_config(self): - return {'gain': self.gain, 'seed': self.seed} +from tensorflow.python.ops.init_ops import Constant +from tensorflow.python.ops.init_ops import Initializer +from tensorflow.python.ops.init_ops import Ones +from tensorflow.python.ops.init_ops import Orthogonal +from tensorflow.python.ops.init_ops import RandomNormal +from tensorflow.python.ops.init_ops import RandomUniform +from tensorflow.python.ops.init_ops import TruncatedNormal +from tensorflow.python.ops.init_ops import VarianceScaling +from tensorflow.python.ops.init_ops import Zeros class Identity(Initializer): @@ -406,47 +179,6 @@ orthogonal = Orthogonal # Utility functions -def _compute_fans(shape, data_format='channels_last'): - """Computes the number of input and output units for a weight shape. - - Arguments: - shape: Integer shape tuple. - data_format: Image data format to use for convolution kernels. - Note that all kernels in Keras are standardized on the - `channels_last` ordering (even when inputs are set - to `channels_first`). - - Returns: - A tuple of scalars, `(fan_in, fan_out)`. - - Raises: - ValueError: in case of invalid `data_format` argument. - """ - shape = tensor_shape.TensorShape(shape).as_list() - if len(shape) == 2: - fan_in = shape[0] - fan_out = shape[1] - elif len(shape) in {3, 4, 5}: - # Assuming convolution kernels (1D, 2D or 3D). - # TH kernel shape: (depth, input_depth, ...) - # TF kernel shape: (..., input_depth, depth) - if data_format == 'channels_first': - receptive_field_size = np.prod(shape[2:]) - fan_in = shape[1] * receptive_field_size - fan_out = shape[0] * receptive_field_size - elif data_format == 'channels_last': - receptive_field_size = np.prod(shape[:2]) - fan_in = shape[-2] * receptive_field_size - fan_out = shape[-1] * receptive_field_size - else: - raise ValueError('Invalid data_format: ' + data_format) - else: - # No specific assumptions. - fan_in = math.sqrt(np.prod(shape)) - fan_out = math.sqrt(np.prod(shape)) - return fan_in, fan_out - - def serialize(initializer): return serialize_keras_object(initializer) diff --git a/tensorflow/contrib/keras/python/keras/initializers_test.py b/tensorflow/contrib/keras/python/keras/initializers_test.py index 7436fbb390..c9f50c28ea 100644 --- a/tensorflow/contrib/keras/python/keras/initializers_test.py +++ b/tensorflow/contrib/keras/python/keras/initializers_test.py @@ -21,121 +21,132 @@ from __future__ import print_function import numpy as np from tensorflow.contrib.keras.python import keras +from tensorflow.python.ops import init_ops from tensorflow.python.platform import test -def _runner(init, shape, target_mean=None, target_std=None, - target_max=None, target_min=None): - variable = keras.backend.variable(init(shape)) - output = keras.backend.get_value(variable) - lim = 3e-2 - if target_std is not None: - assert abs(output.std() - target_std) < lim, output.std() - if target_mean is not None: - assert abs(output.mean() - target_mean) < lim, output.mean() - if target_max is not None: - assert abs(output.max() - target_max) < lim, output.max() - if target_min is not None: - assert abs(output.min() - target_min) < lim, output.min() - - class KerasInitializersTest(test.TestCase): + def _runner(self, init, shape, target_mean=None, target_std=None, + target_max=None, target_min=None): + variable = keras.backend.variable(init(shape)) + output = keras.backend.get_value(variable) + lim = 3e-2 + if target_std is not None: + self.assertGreater(lim, abs(output.std() - target_std)) + if target_mean is not None: + self.assertGreater(lim, abs(output.mean() - target_mean)) + if target_max is not None: + self.assertGreater(lim, abs(output.max() - target_max)) + if target_min is not None: + self.assertGreater(lim, abs(output.min() - target_min)) + + # Test serialization (assumes deterministic behavior). + config = init.get_config() + reconstructed_init = init.__class__.from_config(config) + variable = keras.backend.variable(reconstructed_init(shape)) + output_2 = keras.backend.get_value(variable) + self.assertAllClose(output, output_2, atol=1e-4) + def test_uniform(self): tensor_shape = (9, 6, 7) with self.test_session(): - _runner(keras.initializers.RandomUniform(minval=-1, maxval=1, seed=124), - tensor_shape, - target_mean=0., target_max=1, target_min=-1) + self._runner(keras.initializers.RandomUniform(minval=-1, + maxval=1, + seed=124), + tensor_shape, + target_mean=0., target_max=1, target_min=-1) def test_normal(self): tensor_shape = (8, 12, 99) with self.test_session(): - _runner(keras.initializers.RandomNormal(mean=0, stddev=1, seed=153), - tensor_shape, - target_mean=0., target_std=1) + self._runner(keras.initializers.RandomNormal(mean=0, stddev=1, seed=153), + tensor_shape, + target_mean=0., target_std=1) def test_truncated_normal(self): tensor_shape = (12, 99, 7) with self.test_session(): - _runner(keras.initializers.TruncatedNormal(mean=0, stddev=1, seed=126), - tensor_shape, - target_mean=0., target_std=None, target_max=2) + self._runner(keras.initializers.TruncatedNormal(mean=0, + stddev=1, + seed=126), + tensor_shape, + target_mean=0., target_std=None, target_max=2) def test_constant(self): tensor_shape = (5, 6, 4) with self.test_session(): - _runner(keras.initializers.Constant(2), tensor_shape, - target_mean=2, target_max=2, target_min=2) + self._runner(keras.initializers.Constant(2), tensor_shape, + target_mean=2, target_max=2, target_min=2) def test_lecun_uniform(self): tensor_shape = (5, 6, 4, 2) with self.test_session(): - fan_in, _ = keras.initializers._compute_fans(tensor_shape) + fan_in, _ = init_ops._compute_fans(tensor_shape) scale = np.sqrt(3. / fan_in) - _runner(keras.initializers.lecun_uniform(seed=123), tensor_shape, - target_mean=0., target_max=scale, target_min=-scale) + self._runner(keras.initializers.lecun_uniform(seed=123), tensor_shape, + target_mean=0., target_max=scale, target_min=-scale) def test_glorot_uniform(self): tensor_shape = (5, 6, 4, 2) with self.test_session(): - fan_in, fan_out = keras.initializers._compute_fans(tensor_shape) + fan_in, fan_out = init_ops._compute_fans(tensor_shape) scale = np.sqrt(6. / (fan_in + fan_out)) - _runner(keras.initializers.glorot_uniform(seed=123), tensor_shape, - target_mean=0., target_max=scale, target_min=-scale) + self._runner(keras.initializers.glorot_uniform(seed=123), tensor_shape, + target_mean=0., target_max=scale, target_min=-scale) def test_he_uniform(self): tensor_shape = (5, 6, 4, 2) with self.test_session(): - fan_in, _ = keras.initializers._compute_fans(tensor_shape) + fan_in, _ = init_ops._compute_fans(tensor_shape) scale = np.sqrt(6. / fan_in) - _runner(keras.initializers.he_uniform(seed=123), tensor_shape, - target_mean=0., target_max=scale, target_min=-scale) + self._runner(keras.initializers.he_uniform(seed=123), tensor_shape, + target_mean=0., target_max=scale, target_min=-scale) def test_glorot_normal(self): tensor_shape = (5, 6, 4, 2) with self.test_session(): - fan_in, fan_out = keras.initializers._compute_fans(tensor_shape) + fan_in, fan_out = init_ops._compute_fans(tensor_shape) scale = np.sqrt(2. / (fan_in + fan_out)) - _runner(keras.initializers.glorot_normal(seed=123), tensor_shape, - target_mean=0., target_std=None, target_max=2 * scale) + self._runner(keras.initializers.glorot_normal(seed=123), tensor_shape, + target_mean=0., target_std=None, target_max=2 * scale) def test_he_normal(self): tensor_shape = (5, 6, 4, 2) with self.test_session(): - fan_in, _ = keras.initializers._compute_fans(tensor_shape) + fan_in, _ = init_ops._compute_fans(tensor_shape) scale = np.sqrt(2. / fan_in) - _runner(keras.initializers.he_normal(seed=123), tensor_shape, - target_mean=0., target_std=None, target_max=2 * scale) + self._runner(keras.initializers.he_normal(seed=123), tensor_shape, + target_mean=0., target_std=None, target_max=2 * scale) def test_orthogonal(self): - tensor_shape = (7, 8) + tensor_shape = (10, 10) with self.test_session(): - _runner(keras.initializers.orthogonal(seed=123), tensor_shape, - target_mean=0.) + self._runner(keras.initializers.orthogonal(seed=123), tensor_shape, + target_mean=0.) def test_identity(self): with self.test_session(): tensor_shape = (3, 4, 5) with self.assertRaises(ValueError): - _runner(keras.initializers.identity(), tensor_shape, - target_mean=1. / tensor_shape[0], target_max=1.) + self._runner(keras.initializers.identity(), tensor_shape, + target_mean=1. / tensor_shape[0], target_max=1.) tensor_shape = (3, 3) - _runner(keras.initializers.identity(), tensor_shape, - target_mean=1. / tensor_shape[0], target_max=1.) + self._runner(keras.initializers.identity(), tensor_shape, + target_mean=1. / tensor_shape[0], target_max=1.) def test_zero(self): tensor_shape = (4, 5) with self.test_session(): - _runner(keras.initializers.zeros(), tensor_shape, - target_mean=0., target_max=0.) + self._runner(keras.initializers.zeros(), tensor_shape, + target_mean=0., target_max=0.) def test_one(self): tensor_shape = (4, 5) with self.test_session(): - _runner(keras.initializers.ones(), tensor_shape, - target_mean=1., target_max=1.) + self._runner(keras.initializers.ones(), tensor_shape, + target_mean=1., target_max=1.) if __name__ == '__main__': diff --git a/tensorflow/contrib/keras/python/keras/integration_test.py b/tensorflow/contrib/keras/python/keras/integration_test.py index 3a3d36ca1c..16d0713b31 100644 --- a/tensorflow/contrib/keras/python/keras/integration_test.py +++ b/tensorflow/contrib/keras/python/keras/integration_test.py @@ -33,13 +33,13 @@ class KerasIntegrationTest(test.TestCase): (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data( train_samples=200, test_samples=100, - input_shape=(8,), + input_shape=(10,), num_classes=2) y_train = keras.utils.to_categorical(y_train) y_test = keras.utils.to_categorical(y_test) model = keras.models.Sequential([ - keras.layers.Dense(8, + keras.layers.Dense(16, activation='relu', input_shape=x_train.shape[1:]), keras.layers.Dropout(0.1), @@ -59,13 +59,13 @@ class KerasIntegrationTest(test.TestCase): (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data( train_samples=200, test_samples=100, - input_shape=(8,), + input_shape=(10,), num_classes=2) y_train = keras.utils.to_categorical(y_train) y_test = keras.utils.to_categorical(y_test) inputs = keras.layers.Input(shape=x_train.shape[1:]) - x = keras.layers.Dense(8, activation='relu')(inputs) + x = keras.layers.Dense(16, activation='relu')(inputs) x = keras.layers.Dropout(0.1)(x) outputs = keras.layers.Dense(y_train.shape[-1], activation='softmax')(x) diff --git a/tensorflow/contrib/keras/python/keras/optimizers_test.py b/tensorflow/contrib/keras/python/keras/optimizers_test.py index b3aaddb7c0..af5e3c99b9 100644 --- a/tensorflow/contrib/keras/python/keras/optimizers_test.py +++ b/tensorflow/contrib/keras/python/keras/optimizers_test.py @@ -41,7 +41,7 @@ def _test_optimizer(optimizer, target=0.75): input_shape=(10,), num_classes=2) y_train = keras.utils.to_categorical(y_train) - model = _get_model(x_train.shape[1], 10, y_train.shape[1]) + model = _get_model(x_train.shape[1], 20, y_train.shape[1]) model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) |