diff options
author | Petros Mol <pmol@google.com> | 2017-03-16 10:02:12 -0800 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2017-03-16 11:26:32 -0700 |
commit | f5398a32b7a3e5d8249a1d03016eab2168dd324d (patch) | |
tree | 0a807b05a12d5d5ab5319a15ee2dc85dfd3e68c5 /tensorflow/contrib/linear_optimizer | |
parent | c15c8e766b280fcaa0ed09617842762116c8fe4d (diff) |
Adding SDCA-based tf.learn estimators.
Change: 150340985
Diffstat (limited to 'tensorflow/contrib/linear_optimizer')
4 files changed, 1121 insertions, 0 deletions
diff --git a/tensorflow/contrib/linear_optimizer/BUILD b/tensorflow/contrib/linear_optimizer/BUILD index fbd7959c39..d87066f6f6 100644 --- a/tensorflow/contrib/linear_optimizer/BUILD +++ b/tensorflow/contrib/linear_optimizer/BUILD @@ -104,6 +104,38 @@ py_test( ], ) +py_library( + name = "sdca_estimator_py", + srcs = ["python/sdca_estimator.py"], + srcs_version = "PY2AND3", + deps = [ + ":sdca_ops_py", + ":sparse_feature_column_py", + "//tensorflow/contrib/layers:layers_py", + "//tensorflow/contrib/learn", + "//tensorflow/python:array_ops", + "//tensorflow/python:framework", + "//tensorflow/python:math_ops", + "//tensorflow/python:variables", + ], +) + +py_test( + name = "sdca_estimator_test", + srcs = ["python/sdca_estimator_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":sdca_estimator_py", + "//tensorflow/contrib/layers:layers_py", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:platform_test", + "//third_party/py/numpy", + ], +) + filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py b/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py new file mode 100644 index 0000000000..b6074c856e --- /dev/null +++ b/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py @@ -0,0 +1,567 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Linear Estimators.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib import layers +from tensorflow.contrib.framework.python.ops import variables as contrib_variables +from tensorflow.contrib.learn.python.learn.estimators import estimator +from tensorflow.contrib.learn.python.learn.estimators import head as head_lib +from tensorflow.contrib.learn.python.learn.estimators import prediction_key +from tensorflow.contrib.linear_optimizer.python import sdca_optimizer +from tensorflow.contrib.linear_optimizer.python.ops import sdca_ops +from tensorflow.contrib.linear_optimizer.python.ops.sparse_feature_column import SparseFeatureColumn +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import sparse_tensor +from tensorflow.python.framework import tensor_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.training import session_run_hook + + +def _head_is_valid_for_sdca(head): + """Returns true if the provided head is supported by SDCAOptimizer.""" + # pylint: disable=protected-access + return isinstance(head, head_lib._BinaryLogisticHead) or isinstance( + head, head_lib._BinarySvmHead) or isinstance(head, + head_lib._RegressionHead) + # pylint: enable=protected-access + + +def _add_bias_column(feature_columns, columns_to_tensors, bias_variable, + columns_to_variables): + """Adds a fake bias feature column filled with all 1s.""" + # TODO(b/31008490): Move definition to a common constants place. + bias_column_name = "tf_virtual_bias_column" + if any(col.name is bias_column_name for col in feature_columns): + raise ValueError("%s is a reserved column name." % bias_column_name) + if not feature_columns: + raise ValueError("feature_columns can't be empty.") + + # Loop through input tensors until we can figure out batch_size. + batch_size = None + for column in columns_to_tensors.values(): + if isinstance(column, tuple): + column = column[0] + if isinstance(column, sparse_tensor.SparseTensor): + shape = tensor_util.constant_value(column.dense_shape) + if shape is not None: + batch_size = shape[0] + break + else: + batch_size = array_ops.shape(column)[0] + break + if batch_size is None: + raise ValueError("Could not infer batch size from input features.") + + bias_column = layers.real_valued_column(bias_column_name) + columns_to_tensors[bias_column] = array_ops.ones( + [batch_size, 1], dtype=dtypes.float32) + columns_to_variables[bias_column] = [bias_variable] + + +def _get_sdca_train_step(optimizer, columns_to_variables, weight_column_name, + loss_type, features, targets, global_step): + """Returns the training operation of an SdcaModel optimizer.""" + + def _dense_tensor_to_sparse_feature_column(dense_tensor): + """Returns SparseFeatureColumn for the input dense_tensor.""" + ignore_value = 0.0 + sparse_indices = array_ops.where( + math_ops.not_equal(dense_tensor, + math_ops.cast(ignore_value, dense_tensor.dtype))) + sparse_values = array_ops.gather_nd(dense_tensor, sparse_indices) + # TODO(sibyl-Aix6ihai, sibyl-vie3Poto): Makes this efficient, as now SDCA supports + # very sparse features with weights and not weights. + return SparseFeatureColumn( + array_ops.reshape( + array_ops.split(value=sparse_indices, num_or_size_splits=2, + axis=1)[0], [-1]), + array_ops.reshape( + array_ops.split(value=sparse_indices, num_or_size_splits=2, + axis=1)[1], [-1]), + array_ops.reshape(math_ops.to_float(sparse_values), [-1])) + + def _training_examples_and_variables(): + """Returns dictionaries for training examples and variables.""" + batch_size = targets.get_shape()[0] + + # Iterate over all feature columns and create appropriate lists for dense + # and sparse features as well as dense and sparse weights (variables) for + # SDCA. + # TODO(sibyl-vie3Poto): Reshape variables stored as values in column_to_variables + # dict as 1-dimensional tensors. + dense_features, sparse_features, sparse_feature_with_values = [], [], [] + dense_feature_weights = [] + sparse_feature_weights, sparse_feature_with_values_weights = [], [] + for column in sorted(columns_to_variables.keys(), key=lambda x: x.key): + transformed_tensor = features[column] + if isinstance(column, layers.feature_column._RealValuedColumn): # pylint: disable=protected-access + # A real-valued column corresponds to a dense feature in SDCA. A + # transformed tensor corresponding to a RealValuedColumn has rank 2 + # (its shape is typically [batch_size, column.dimension]) and so it + # can be passed to SDCA as is. + dense_features.append(transformed_tensor) + # For real valued columns, the variables list contains exactly one + # element. + dense_feature_weights.append(columns_to_variables[column][0]) + elif isinstance(column, layers.feature_column._BucketizedColumn): # pylint: disable=protected-access + # A bucketized column corresponds to a sparse feature in SDCA. The + # bucketized feature is "sparsified" for SDCA by converting it to a + # SparseFeatureColumn respresenting the one-hot encoding of the + # bucketized feature. + # + # TODO(sibyl-vie3Poto): Explore whether it is more efficient to translate a + # bucketized feature column to a dense feature in SDCA. This will likely + # depend on the number of buckets. + dense_bucket_tensor = column._to_dnn_input_layer(transformed_tensor) # pylint: disable=protected-access + sparse_feature_column = _dense_tensor_to_sparse_feature_column( + dense_bucket_tensor) + sparse_feature_with_values.append(sparse_feature_column) + # For bucketized columns, the variables list contains exactly one + # element. + sparse_feature_with_values_weights.append( + columns_to_variables[column][0]) + elif isinstance( + column, + ( + layers.feature_column._CrossedColumn, # pylint: disable=protected-access + layers.feature_column._SparseColumn)): # pylint: disable=protected-access + sparse_features.append( + SparseFeatureColumn( + array_ops.reshape( + array_ops.split( + value=transformed_tensor.indices, + num_or_size_splits=2, + axis=1)[0], [-1]), + array_ops.reshape(transformed_tensor.values, [-1]), None)) + sparse_feature_weights.append(columns_to_variables[column][0]) + elif isinstance(column, layers.feature_column._WeightedSparseColumn): # pylint: disable=protected-access + id_tensor = column.id_tensor(transformed_tensor) + weight_tensor = column.weight_tensor(transformed_tensor) + sparse_feature_with_values.append( + SparseFeatureColumn( + array_ops.reshape( + array_ops.split( + value=id_tensor.indices, num_or_size_splits=2, axis=1)[ + 0], [-1]), + array_ops.reshape(id_tensor.values, [-1]), + array_ops.reshape(weight_tensor.values, [-1]))) + sparse_feature_with_values_weights.append( + columns_to_variables[column][0]) + else: + raise ValueError("SDCAOptimizer does not support column type {}".format( + type(column).__name__)) + + example_weights = array_ops.reshape( + features[weight_column_name], + shape=[-1]) if weight_column_name else array_ops.ones([batch_size]) + example_ids = features[optimizer.example_id_column] + sparse_feature_with_values.extend(sparse_features) + sparse_feature_with_values_weights.extend(sparse_feature_weights) + examples = dict( + sparse_features=sparse_feature_with_values, + dense_features=dense_features, + example_labels=math_ops.to_float( + array_ops.reshape(targets, shape=[-1])), + example_weights=example_weights, + example_ids=example_ids) + sdca_variables = dict( + sparse_features_weights=sparse_feature_with_values_weights, + dense_features_weights=dense_feature_weights) + return examples, sdca_variables + + training_examples, training_variables = _training_examples_and_variables() + sdca_model = sdca_ops.SdcaModel( + examples=training_examples, + variables=training_variables, + options=dict( + symmetric_l1_regularization=optimizer.symmetric_l1_regularization, + symmetric_l2_regularization=optimizer.symmetric_l2_regularization, + num_loss_partitions=optimizer.num_loss_partitions, + num_table_shards=optimizer.num_table_shards, + loss_type=loss_type)) + train_op = sdca_model.minimize(global_step=global_step) + return sdca_model, train_op + + +def sdca_model_fn(features, labels, mode, params, config=None): + """A model_fn for linear models that use the SDCA optimizer. + + Args: + features: A dict of `Tensor` keyed by column name. + labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of + dtype `int32` or `int64` with values in the set {0, 1}. + mode: Defines whether this is training, evaluation or prediction. + See `ModeKeys`. + params: A dict of hyperparameters. + The following hyperparameters are expected: + * head: A `Head` instance. Type must be one of `_BinarySvmHead`, + `_RegressionHead` or `_BinaryLogisticHead`. + * feature_columns: An iterable containing all the feature columns used by + the model. + * optimizer: An `SDCAOptimizer` instance. + * weight_column_name: A string defining the weight feature column, or + None if there are no weights. + * update_weights_hook: A `SessionRunHook` object or None. Used to update + model weights. + config: `RunConfig` object to configure the runtime settings. + + Returns: + A `ModelFnOps` instance. + + Raises: + ValueError: If `optimizer` is not an `SDCAOptimizer` instance. + ValueError: If the type of head is neither `_BinarySvmHead`, nor + `_RegressionHead` nor `_MultiClassHead`. + ValueError: If mode is not any of the `ModeKeys`. + """ + head = params["head"] + feature_columns = params["feature_columns"] + example_id_column = params["example_id_column"] + l1_regularization = params["l1_regularization"] + l2_regularization = params["l2_regularization"] + num_loss_partitions = params["num_loss_partitions"] + weight_column_name = params["weight_column_name"] + update_weights_hook = params.get("update_weights_hook", None) + + loss_type = None + if isinstance(head, head_lib._BinarySvmHead): # pylint: disable=protected-access + loss_type = "hinge_loss" + elif isinstance(head, head_lib._BinaryLogisticHead): # pylint: disable=protected-access + loss_type = "logistic_loss" + elif isinstance(head, head_lib._RegressionHead): # pylint: disable=protected-access + loss_type = "squared_loss" + else: + raise ValueError("Unsupported head type: {}".format(type(head))) + + assert head.logits_dimension == 1, ( + "SDCA only applies to logits_dimension=1.") + + # Update num_loss_partitions based on number of workers. + n_loss_partitions = num_loss_partitions or max(1, config.num_worker_replicas) + optimizer = sdca_optimizer.SDCAOptimizer( + example_id_column=example_id_column, + num_loss_partitions=n_loss_partitions, + symmetric_l1_regularization=l1_regularization, + symmetric_l2_regularization=l2_regularization) + + parent_scope = "linear" + + with variable_scope.variable_op_scope(features.values(), + parent_scope) as scope: + logits, columns_to_variables, bias = ( + layers.weighted_sum_from_feature_columns( + columns_to_tensors=features, + feature_columns=feature_columns, + num_outputs=1, + scope=scope)) + + _add_bias_column(feature_columns, features, bias, columns_to_variables) + + def _train_op_fn(unused_loss): + global_step = contrib_variables.get_global_step() + sdca_model, train_op = _get_sdca_train_step(optimizer, columns_to_variables, + weight_column_name, loss_type, + features, labels, global_step) + if update_weights_hook is not None: + update_weights_hook.set_parameters(sdca_model, train_op) + return train_op + + model_fn_ops = head.create_model_fn_ops( + features=features, + labels=labels, + mode=mode, + train_op_fn=_train_op_fn, + logits=logits) + if update_weights_hook is not None: + return model_fn_ops._replace(training_chief_hooks=( + model_fn_ops.training_chief_hooks + [update_weights_hook])) + return model_fn_ops + + +class _SdcaUpdateWeightsHook(session_run_hook.SessionRunHook): + """SessionRunHook to update and shrink SDCA model weights.""" + + def __init__(self): + pass + + def set_parameters(self, sdca_model, train_op): + self._sdca_model = sdca_model + self._train_op = train_op + + def begin(self): + """Construct the update_weights op. + + The op is implicitly added to the default graph. + """ + self._update_op = self._sdca_model.update_weights(self._train_op) + + def before_run(self, run_context): + """Return the update_weights op so that it is executed during this run.""" + return session_run_hook.SessionRunArgs(self._update_op) + + +class _SDCAEstimator(estimator.Estimator): + """Base estimator class for linear models using the SDCA optimizer. + + This class should not be used directly. Rather, users should call one of the + derived estimators. + + The input_fn provided to `fit`, `evaluate` and predict_* methods should have + the following features, otherwise there will be a `KeyError`: + - a feature with `key=example_id_column` whose value is a `Tensor` of dtype + string. + - if `weight_column_name` is not `None`, a feature with + `key=weight_column_name` whose value is a `Tensor`. + - for each `column` in `feature_columns`: + - if `column` is a `SparseColumn`, a feature with `key=column.name` + whose `value` is a `SparseTensor`. + - if `column` is a `RealValuedColumn, a feature with `key=column.name` + whose `value` is a `Tensor`. + """ + + def __init__(self, + example_id_column, + feature_columns, + weight_column_name=None, + model_dir=None, + head=None, + l1_regularization=0.0, + l2_regularization=1.0, + num_loss_partitions=None, + config=None, + feature_engineering_fn=None): + """Construct a `_SDCAEstimator` estimator object. + + Args: + example_id_column: A string defining the feature column name representing + example ids. Used to initialize the underlying SDCA optimizer. + feature_columns: An iterable containing all the feature columns used by + the model. All items in the set should be instances of classes derived + from `FeatureColumn`. + weight_column_name: A string defining feature column name representing + weights. It is used to down weight or boost examples during training. It + will be multiplied by the loss of the example. + model_dir: Directory to save model parameters, graph etc. This can also be + used to load checkpoints from the directory into an estimator to + continue training a previously saved model. + head: type of head. Currently, _BinaryLogisticHead and _BinarySvmHead are + supported for classification and _RegressionHead for regression. It + should be a subclass of _SingleHead. + l1_regularization: L1-regularization parameter. Refers to global L1 + regularization (across all examples). + l2_regularization: L2-regularization parameter. Refers to global L2 + regularization (across all examples). + num_loss_partitions: number of partitions of the (global) loss function + optimized by the underlying optimizer (SDCAOptimizer). + config: `RunConfig` object to configure the runtime settings. + feature_engineering_fn: Feature engineering function. Takes features and + labels which are the output of `input_fn` and returns features and + labels which will be fed into the model. + + Returns: + A `_SDCAEstimator` estimator. + + Raises: + ValueError: if head is not supported by SDCA. + """ + self._feature_columns = tuple(feature_columns or []) + assert self._feature_columns + + if not _head_is_valid_for_sdca(head): + raise ValueError( + "head type: {} is not supported. Supported head types: " + "_BinaryLogisticHead, _BinarySvmHead and _RegressionHead.".format( + type(head))) + assert head.logits_dimension == 1 + + params = { + "head": head, + "feature_columns": feature_columns, + "example_id_column": example_id_column, + "num_loss_partitions": num_loss_partitions, + "l1_regularization": l1_regularization, + "l2_regularization": l2_regularization, + "weight_column_name": weight_column_name, + "update_weights_hook": _SdcaUpdateWeightsHook(), + } + + super(_SDCAEstimator, self).__init__( + model_fn=sdca_model_fn, + model_dir=model_dir, + config=config, + params=params, + feature_engineering_fn=feature_engineering_fn) + + +class SDCALogisticClassifier(_SDCAEstimator): + """Logistic regression binary classifier using the SDCA optimizer. + + Example usage: + + ```python + sparse_column_a = sparse_column_with_hash_bucket(...) + sparse_column_b = sparse_column_with_hash_bucket(...) + + sparse_feature_a_x_sparse_feature_b = crossed_column(...) + + estimator = SDCALogisticClassifier( + example_id_column='example_id', + feature_columns=[sparse_column_a, sparse_feature_a_x_sparse_feature_b]), + weight_column_name=..., + l2_regularization=..., + num_loss_partitions=..., + ) + + # Input builders + # returns x, y (where y is the label Tensor (with 0/1 values) + def input_fn_{train, eval}: + + # returns x (features dict) + def input_fn_test: + ... + estimator.fit(input_fn=input_fn_train) + estimator.evaluate(input_fn=input_fn_eval) + estimator.predict_classes(input_fn=input_fn_test) # returns predicted classes. + estimator.predict_proba(input_fn=input_fn_test) # returns predicted prob/ties. + ``` + """ + + def __init__(self, + example_id_column, + feature_columns, + weight_column_name=None, + model_dir=None, + l1_regularization=0.0, + l2_regularization=1.0, + num_loss_partitions=None, + config=None, + feature_engineering_fn=None): + """Construct a `SDCALogisticClassifier` object. See _SDCAEstimator.""" + super(SDCALogisticClassifier, self).__init__( + example_id_column=example_id_column, + feature_columns=feature_columns, + weight_column_name=weight_column_name, + model_dir=model_dir, + head=head_lib.multi_class_head( + n_classes=2, weight_column_name=weight_column_name), + l1_regularization=l1_regularization, + l2_regularization=l2_regularization, + num_loss_partitions=num_loss_partitions, + config=config, + feature_engineering_fn=None) + + def predict_classes(self, input_fn=None): + """Runs inference to determine the predicted class. + + Args: + input_fn: The input function providing features. + + Returns: + A generator of predicted classes for the features provided by input_fn. + """ + key = prediction_key.PredictionKey.CLASSES + predictions = super(SDCALogisticClassifier, self).predict( + input_fn=input_fn, outputs=[key]) + return (pred[key] for pred in predictions) + + def predict_proba(self, input_fn=None): + """Runs inference to determine the class probability predictions. + + Args: + input_fn: The input function providing features. + + Returns: + A generator of predicted class probabilities for the features provided by + input_fn. + """ + key = prediction_key.PredictionKey.PROBABILITIES + predictions = super(SDCALogisticClassifier, self).predict( + input_fn=input_fn, outputs=[key]) + return (pred[key] for pred in predictions) + + +class SDCARegressor(_SDCAEstimator): + """Linear regressor model using SDCA to solve the underlying optimization. + + Example usage: + + ```python + sparse_column_a = sparse_column_with_hash_bucket(...) + sparse_column_b = sparse_column_with_hash_bucket(...) + + sparse_feature_a_x_sparse_feature_b = crossed_column(...) + + estimator = SDCARegressor( + example_id_column='example_id', + feature_columns=[sparse_column_a, sparse_feature_a_x_sparse_feature_b]), + weight_column_name=..., + l2_regularization=..., + num_loss_partitions=..., + ) + + # Input builders + # returns x, y (where y is the label Tensor (with 0/1 values) + def input_fn_{train, eval}: + + # returns x (features dict) + def input_fn_test: + ... + estimator.fit(input_fn=input_fn_train) + estimator.evaluate(input_fn=input_fn_eval) + estimator.predict_scores(input_fn=input_fn_test) # returns predicted scores. + """ + + def __init__(self, + example_id_column, + feature_columns, + weight_column_name=None, + model_dir=None, + l1_regularization=0.0, + l2_regularization=1.0, + num_loss_partitions=None, + config=None, + feature_engineering_fn=None): + """Construct a `SDCARegressor` estimator object. See _SDCAEstimator.""" + super(SDCARegressor, self).__init__( + example_id_column=example_id_column, + feature_columns=feature_columns, + weight_column_name=weight_column_name, + model_dir=model_dir, + head=head_lib.regression_head(weight_column_name=weight_column_name), + l1_regularization=l1_regularization, + l2_regularization=l2_regularization, + num_loss_partitions=num_loss_partitions, + config=config, + feature_engineering_fn=None) + + def predict_scores(self, input_fn): + """Returns predicted scores for given features. + + Args: + input_fn: The input function providing features. + + Returns: + A generator of predicted scores for the features provided by input_fn. + """ + key = prediction_key.PredictionKey.SCORES + predictions = super(SDCARegressor, self).predict( + input_fn=input_fn, outputs=[key]) + return (pred[key] for pred in predictions) diff --git a/tensorflow/contrib/linear_optimizer/python/sdca_estimator_test.py b/tensorflow/contrib/linear_optimizer/python/sdca_estimator_test.py new file mode 100644 index 0000000000..081651df8d --- /dev/null +++ b/tensorflow/contrib/linear_optimizer/python/sdca_estimator_test.py @@ -0,0 +1,502 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for linear_optimizer.sdca_estimator.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.layers.python.layers import feature_column as feature_column_lib +from tensorflow.contrib.linear_optimizer.python.sdca_estimator import SDCALogisticClassifier +from tensorflow.contrib.linear_optimizer.python.sdca_estimator import SDCARegressor +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import sparse_tensor +from tensorflow.python.platform import test + + +class SDCALogisticClassifierTest(test.TestCase): + + def testRealValuedFeatures(self): + """Tests SDCALogisticClassifier works with real valued features.""" + + def input_fn(): + return { + 'example_id': constant_op.constant(['1', '2']), + 'maintenance_cost': constant_op.constant([[500.0], [200.0]]), + 'sq_footage': constant_op.constant([[800.0], [600.0]]), + 'weights': constant_op.constant([[1.0], [1.0]]) + }, constant_op.constant([[0], [1]]) + + maintenance_cost = feature_column_lib.real_valued_column('maintenance_cost') + sq_footage = feature_column_lib.real_valued_column('sq_footage') + classifier = SDCALogisticClassifier( + example_id_column='example_id', + feature_columns=[maintenance_cost, sq_footage], + weight_column_name='weights') + classifier.fit(input_fn=input_fn, steps=100) + loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss'] + self.assertLess(loss, 0.05) + + def testRealValuedFeatureWithHigherDimension(self): + """Tests SDCALogisticClassifier with high-dimension real valued features.""" + + # input_fn is identical to the one in testRealValuedFeatures where 2 + # 1-dimensional dense features are replaced by a 2-dimensional feature. + def input_fn(): + return { + 'example_id': + constant_op.constant(['1', '2']), + 'dense_feature': + constant_op.constant([[500.0, 800.0], [200.0, 600.0]]) + }, constant_op.constant([[0], [1]]) + + dense_feature = feature_column_lib.real_valued_column( + 'dense_feature', dimension=2) + classifier = SDCALogisticClassifier( + example_id_column='example_id', feature_columns=[dense_feature]) + classifier.fit(input_fn=input_fn, steps=100) + loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss'] + self.assertLess(loss, 0.05) + + def testBucketizedFeatures(self): + """Tests SDCALogisticClassifier with bucketized features.""" + + def input_fn(): + return { + 'example_id': constant_op.constant(['1', '2', '3']), + 'price': constant_op.constant([[600.0], [1000.0], [400.0]]), + 'sq_footage': constant_op.constant([[1000.0], [600.0], [700.0]]), + 'weights': constant_op.constant([[1.0], [1.0], [1.0]]) + }, constant_op.constant([[1], [0], [1]]) + + price_bucket = feature_column_lib.bucketized_column( + feature_column_lib.real_valued_column('price'), + boundaries=[500.0, 700.0]) + sq_footage_bucket = feature_column_lib.bucketized_column( + feature_column_lib.real_valued_column('sq_footage'), boundaries=[650.0]) + classifier = SDCALogisticClassifier( + example_id_column='example_id', + feature_columns=[price_bucket, sq_footage_bucket], + weight_column_name='weights', + l2_regularization=1.0) + classifier.fit(input_fn=input_fn, steps=50) + metrics = classifier.evaluate(input_fn=input_fn, steps=1) + self.assertGreater(metrics['accuracy'], 0.9) + + def testSparseFeatures(self): + """Tests SDCALogisticClassifier with sparse features.""" + + def input_fn(): + return { + 'example_id': + constant_op.constant(['1', '2', '3']), + 'price': + constant_op.constant([[0.4], [0.6], [0.3]]), + 'country': + sparse_tensor.SparseTensor( + values=['IT', 'US', 'GB'], + indices=[[0, 0], [1, 3], [2, 1]], + dense_shape=[3, 5]), + 'weights': + constant_op.constant([[1.0], [1.0], [1.0]]) + }, constant_op.constant([[1], [0], [1]]) + + price = feature_column_lib.real_valued_column('price') + country = feature_column_lib.sparse_column_with_hash_bucket( + 'country', hash_bucket_size=5) + classifier = SDCALogisticClassifier( + example_id_column='example_id', + feature_columns=[price, country], + weight_column_name='weights') + classifier.fit(input_fn=input_fn, steps=50) + metrics = classifier.evaluate(input_fn=input_fn, steps=1) + self.assertGreater(metrics['accuracy'], 0.9) + + def testWeightedSparseFeatures(self): + """Tests SDCALogisticClassifier with weighted sparse features.""" + + def input_fn(): + return { + 'example_id': + constant_op.constant(['1', '2', '3']), + 'price': + sparse_tensor.SparseTensor( + values=[2., 3., 1.], + indices=[[0, 0], [1, 0], [2, 0]], + dense_shape=[3, 5]), + 'country': + sparse_tensor.SparseTensor( + values=['IT', 'US', 'GB'], + indices=[[0, 0], [1, 0], [2, 0]], + dense_shape=[3, 5]) + }, constant_op.constant([[1], [0], [1]]) + + country = feature_column_lib.sparse_column_with_hash_bucket( + 'country', hash_bucket_size=5) + country_weighted_by_price = feature_column_lib.weighted_sparse_column( + country, 'price') + classifier = SDCALogisticClassifier( + example_id_column='example_id', + feature_columns=[country_weighted_by_price]) + classifier.fit(input_fn=input_fn, steps=50) + metrics = classifier.evaluate(input_fn=input_fn, steps=1) + self.assertGreater(metrics['accuracy'], 0.9) + + def testCrossedFeatures(self): + """Tests SDCALogisticClassifier with crossed features.""" + + def input_fn(): + return { + 'example_id': + constant_op.constant(['1', '2', '3']), + 'language': + sparse_tensor.SparseTensor( + values=['english', 'italian', 'spanish'], + indices=[[0, 0], [1, 0], [2, 0]], + dense_shape=[3, 1]), + 'country': + sparse_tensor.SparseTensor( + values=['US', 'IT', 'MX'], + indices=[[0, 0], [1, 0], [2, 0]], + dense_shape=[3, 1]) + }, constant_op.constant([[0], [0], [1]]) + + language = feature_column_lib.sparse_column_with_hash_bucket( + 'language', hash_bucket_size=5) + country = feature_column_lib.sparse_column_with_hash_bucket( + 'country', hash_bucket_size=5) + country_language = feature_column_lib.crossed_column( + [language, country], hash_bucket_size=10) + classifier = SDCALogisticClassifier( + example_id_column='example_id', feature_columns=[country_language]) + classifier.fit(input_fn=input_fn, steps=10) + metrics = classifier.evaluate(input_fn=input_fn, steps=1) + self.assertGreater(metrics['accuracy'], 0.9) + + def testMixedFeatures(self): + """Tests SDCALogisticClassifier with a mix of features.""" + + def input_fn(): + return { + 'example_id': + constant_op.constant(['1', '2', '3']), + 'price': + constant_op.constant([[0.6], [0.8], [0.3]]), + 'sq_footage': + constant_op.constant([[900.0], [700.0], [600.0]]), + 'country': + sparse_tensor.SparseTensor( + values=['IT', 'US', 'GB'], + indices=[[0, 0], [1, 3], [2, 1]], + dense_shape=[3, 5]), + 'weights': + constant_op.constant([[3.0], [1.0], [1.0]]) + }, constant_op.constant([[1], [0], [1]]) + + price = feature_column_lib.real_valued_column('price') + sq_footage_bucket = feature_column_lib.bucketized_column( + feature_column_lib.real_valued_column('sq_footage'), + boundaries=[650.0, 800.0]) + country = feature_column_lib.sparse_column_with_hash_bucket( + 'country', hash_bucket_size=5) + sq_footage_country = feature_column_lib.crossed_column( + [sq_footage_bucket, country], hash_bucket_size=10) + classifier = SDCALogisticClassifier( + example_id_column='example_id', + feature_columns=[price, sq_footage_bucket, country, sq_footage_country], + weight_column_name='weights') + classifier.fit(input_fn=input_fn, steps=50) + metrics = classifier.evaluate(input_fn=input_fn, steps=1) + self.assertGreater(metrics['accuracy'], 0.9) + + +class SDCARegressorTest(test.TestCase): + + def testRealValuedLinearFeatures(self): + """Tests SDCARegressor works with real valued features.""" + x = [[1.2, 2.0, -1.5], [-2.0, 3.0, -0.5], [1.0, -0.5, 4.0]] + weights = [[3.0], [-1.2], [0.5]] + y = np.dot(x, weights) + + def input_fn(): + return { + 'example_id': constant_op.constant(['1', '2', '3']), + 'x': constant_op.constant(x), + 'weights': constant_op.constant([[10.0], [10.0], [10.0]]) + }, constant_op.constant(y) + + x_column = feature_column_lib.real_valued_column('x', dimension=3) + regressor = SDCARegressor( + example_id_column='example_id', + feature_columns=[x_column], + weight_column_name='weights') + regressor.fit(input_fn=input_fn, steps=20) + loss = regressor.evaluate(input_fn=input_fn, steps=1)['loss'] + self.assertLess(loss, 0.01) + self.assertIn('linear/x/weight', regressor.get_variable_names()) + regressor_weights = regressor.get_variable_value('linear/x/weight') + self.assertAllClose( + [w[0] for w in weights], regressor_weights.flatten(), rtol=0.1) + + def testMixedFeaturesArbitraryWeights(self): + """Tests SDCARegressor works with a mix of features.""" + + def input_fn(): + return { + 'example_id': + constant_op.constant(['1', '2', '3']), + 'price': + constant_op.constant([[0.6], [0.8], [0.3]]), + 'sq_footage': + constant_op.constant([[900.0], [700.0], [600.0]]), + 'country': + sparse_tensor.SparseTensor( + values=['IT', 'US', 'GB'], + indices=[[0, 0], [1, 3], [2, 1]], + dense_shape=[3, 5]), + 'weights': + constant_op.constant([[3.0], [5.0], [7.0]]) + }, constant_op.constant([[1.55], [-1.25], [-3.0]]) + + price = feature_column_lib.real_valued_column('price') + sq_footage_bucket = feature_column_lib.bucketized_column( + feature_column_lib.real_valued_column('sq_footage'), + boundaries=[650.0, 800.0]) + country = feature_column_lib.sparse_column_with_hash_bucket( + 'country', hash_bucket_size=5) + sq_footage_country = feature_column_lib.crossed_column( + [sq_footage_bucket, country], hash_bucket_size=10) + regressor = SDCARegressor( + example_id_column='example_id', + feature_columns=[price, sq_footage_bucket, country, sq_footage_country], + l2_regularization=1.0, + weight_column_name='weights') + regressor.fit(input_fn=input_fn, steps=20) + loss = regressor.evaluate(input_fn=input_fn, steps=1)['loss'] + self.assertLess(loss, 0.05) + + def testSdcaOptimizerSparseFeaturesWithL1Reg(self): + """Tests SDCARegressor works with sparse features and L1 regularization.""" + + def input_fn(): + return { + 'example_id': + constant_op.constant(['1', '2', '3']), + 'price': + constant_op.constant([[0.4], [0.6], [0.3]]), + 'country': + sparse_tensor.SparseTensor( + values=['IT', 'US', 'GB'], + indices=[[0, 0], [1, 3], [2, 1]], + dense_shape=[3, 5]), + 'weights': + constant_op.constant([[10.0], [10.0], [10.0]]) + }, constant_op.constant([[1.4], [-0.8], [2.6]]) + + price = feature_column_lib.real_valued_column('price') + country = feature_column_lib.sparse_column_with_hash_bucket( + 'country', hash_bucket_size=5) + # Regressor with no L1 regularization. + regressor = SDCARegressor( + example_id_column='example_id', + feature_columns=[price, country], + weight_column_name='weights') + regressor.fit(input_fn=input_fn, steps=20) + no_l1_reg_loss = regressor.evaluate(input_fn=input_fn, steps=1)['loss'] + variable_names = regressor.get_variable_names() + self.assertIn('linear/price/weight', variable_names) + self.assertIn('linear/country/weights', variable_names) + no_l1_reg_weights = { + 'linear/price/weight': + regressor.get_variable_value('linear/price/weight'), + 'linear/country/weights': + regressor.get_variable_value('linear/country/weights'), + } + + # Regressor with L1 regularization. + regressor = SDCARegressor( + example_id_column='example_id', + feature_columns=[price, country], + l1_regularization=1.0, + weight_column_name='weights') + regressor.fit(input_fn=input_fn, steps=20) + l1_reg_loss = regressor.evaluate(input_fn=input_fn, steps=1)['loss'] + l1_reg_weights = { + 'linear/price/weight': + regressor.get_variable_value('linear/price/weight'), + 'linear/country/weights': + regressor.get_variable_value('linear/country/weights'), + } + + # Unregularized loss is lower when there is no L1 regularization. + self.assertLess(no_l1_reg_loss, l1_reg_loss) + self.assertLess(no_l1_reg_loss, 0.05) + + # But weights returned by the regressor with L1 regularization have smaller + # L1 norm. + l1_reg_weights_norm, no_l1_reg_weights_norm = 0.0, 0.0 + for var_name in sorted(l1_reg_weights): + l1_reg_weights_norm += sum( + np.absolute(l1_reg_weights[var_name].flatten())) + no_l1_reg_weights_norm += sum( + np.absolute(no_l1_reg_weights[var_name].flatten())) + print('Var name: %s, value: %s' % (var_name, + no_l1_reg_weights[var_name].flatten())) + self.assertLess(l1_reg_weights_norm, no_l1_reg_weights_norm) + + def testBiasOnly(self): + """Tests SDCARegressor has a valid bias weight.""" + + def input_fn(): + """Testing the bias weight when it's the only feature present. + + All of the instances in this input only have the bias feature, and a + 1/4 of the labels are positive. This means that the expected weight for + the bias should be close to the average prediction, i.e 0.25. + Returns: + Training data for the test. + """ + num_examples = 40 + return { + 'example_id': + constant_op.constant([str(x + 1) for x in range(num_examples)]), + # place_holder is an empty column which is always 0 (absent), because + # LinearClassifier requires at least one column. + 'place_holder': + constant_op.constant([[0.0]] * num_examples), + }, constant_op.constant([[1 if i % 4 is 0 else 0] + for i in range(num_examples)]) + + place_holder = feature_column_lib.real_valued_column('place_holder') + regressor = SDCARegressor( + example_id_column='example_id', feature_columns=[place_holder]) + regressor.fit(input_fn=input_fn, steps=100) + self.assertNear( + regressor.get_variable_value('linear/bias_weight')[0], 0.25, err=0.1) + + def testBiasAndOtherColumns(self): + """SDCARegressor has valid bias weight when other columns are present.""" + + def input_fn(): + """Testing the bias weight when there are other features present. + + 1/2 of the instances in this input have feature 'a', the rest have + feature 'b', and we expect the bias to be added to each instance as well. + 0.4 of all instances that have feature 'a' are positive, and 0.2 of all + instances that have feature 'b' are positive. The labels in the dataset + are ordered to appear shuffled since SDCA expects shuffled data, and + converges faster with this pseudo-random ordering. + If the bias was centered we would expect the weights to be: + bias: 0.3 + a: 0.1 + b: -0.1 + Until b/29339026 is resolved, the bias gets regularized with the same + global value for the other columns, and so the expected weights get + shifted and are: + bias: 0.2 + a: 0.2 + b: 0.0 + Returns: + The test dataset. + """ + num_examples = 200 + half = int(num_examples / 2) + return { + 'example_id': + constant_op.constant([str(x + 1) for x in range(num_examples)]), + 'a': + constant_op.constant([[1]] * int(half) + [[0]] * int(half)), + 'b': + constant_op.constant([[0]] * int(half) + [[1]] * int(half)), + }, constant_op.constant( + [[x] + for x in [1, 0, 0, 1, 1, 0, 0, 0, 1, 0] * int(half / 10) + + [0, 1, 0, 0, 0, 0, 0, 0, 1, 0] * int(half / 10)]) + + regressor = SDCARegressor( + example_id_column='example_id', + feature_columns=[ + feature_column_lib.real_valued_column('a'), + feature_column_lib.real_valued_column('b') + ]) + + regressor.fit(input_fn=input_fn, steps=200) + + variable_names = regressor.get_variable_names() + self.assertIn('linear/bias_weight', variable_names) + self.assertIn('linear/a/weight', variable_names) + self.assertIn('linear/b/weight', variable_names) + # TODO(b/29339026): Change the expected results to expect a centered bias. + self.assertNear( + regressor.get_variable_value('linear/bias_weight')[0], 0.2, err=0.05) + self.assertNear( + regressor.get_variable_value('linear/a/weight')[0], 0.2, err=0.05) + self.assertNear( + regressor.get_variable_value('linear/b/weight')[0], 0.0, err=0.05) + + def testBiasAndOtherColumnsFabricatedCentered(self): + """SDCARegressor has valid bias weight when instances are centered.""" + + def input_fn(): + """Testing the bias weight when there are other features present. + + 1/2 of the instances in this input have feature 'a', the rest have + feature 'b', and we expect the bias to be added to each instance as well. + 0.1 of all instances that have feature 'a' have a label of 1, and 0.1 of + all instances that have feature 'b' have a label of -1. + We can expect the weights to be: + bias: 0.0 + a: 0.1 + b: -0.1 + Returns: + The test dataset. + """ + num_examples = 200 + half = int(num_examples / 2) + return { + 'example_id': + constant_op.constant([str(x + 1) for x in range(num_examples)]), + 'a': + constant_op.constant([[1]] * int(half) + [[0]] * int(half)), + 'b': + constant_op.constant([[0]] * int(half) + [[1]] * int(half)), + }, constant_op.constant([[1 if x % 10 == 0 else 0] for x in range(half)] + + [[-1 if x % 10 == 0 else 0] for x in range(half)]) + + regressor = SDCARegressor( + example_id_column='example_id', + feature_columns=[ + feature_column_lib.real_valued_column('a'), + feature_column_lib.real_valued_column('b') + ]) + + regressor.fit(input_fn=input_fn, steps=100) + + variable_names = regressor.get_variable_names() + self.assertIn('linear/bias_weight', variable_names) + self.assertIn('linear/a/weight', variable_names) + self.assertIn('linear/b/weight', variable_names) + self.assertNear( + regressor.get_variable_value('linear/bias_weight')[0], 0.0, err=0.05) + self.assertNear( + regressor.get_variable_value('linear/a/weight')[0], 0.1, err=0.05) + self.assertNear( + regressor.get_variable_value('linear/b/weight')[0], -0.1, err=0.05) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/linear_optimizer/python/sdca_optimizer.py b/tensorflow/contrib/linear_optimizer/python/sdca_optimizer.py index 9edb00e7b0..afa0b3b833 100644 --- a/tensorflow/contrib/linear_optimizer/python/sdca_optimizer.py +++ b/tensorflow/contrib/linear_optimizer/python/sdca_optimizer.py @@ -74,6 +74,26 @@ class SDCAOptimizer(object): def get_name(self): return 'SDCAOptimizer' + @property + def example_id_column(self): + return self._example_id_column + + @property + def num_loss_partitions(self): + return self._num_loss_partitions + + @property + def num_table_shards(self): + return self._num_table_shards + + @property + def symmetric_l1_regularization(self): + return self._symmetric_l1_regularization + + @property + def symmetric_l2_regularization(self): + return self._symmetric_l2_regularization + def get_train_step(self, columns_to_variables, weight_column_name, loss_type, features, targets, global_step): |