diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2018-09-11 15:02:21 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2018-09-11 15:10:06 -0700 |
commit | f3242baaf10842ff4753b5974f426cf963fa8eef (patch) | |
tree | 58adc3b49a0eceeb806b3ba56fbe73c366be9af2 /tensorflow/python/feature_column | |
parent | ba650a5c989106330519dbde0de368f580435a8b (diff) |
Add support for populating a feature columns to output tensors dictionary in input_layer.
PiperOrigin-RevId: 212528172
Diffstat (limited to 'tensorflow/python/feature_column')
-rw-r--r-- | tensorflow/python/feature_column/feature_column.py | 25 | ||||
-rw-r--r-- | tensorflow/python/feature_column/feature_column_test.py | 34 |
2 files changed, 53 insertions, 6 deletions
diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py index 2246d2f3e9..9984379e9d 100644 --- a/tensorflow/python/feature_column/feature_column.py +++ b/tensorflow/python/feature_column/feature_column.py @@ -169,7 +169,8 @@ def _internal_input_layer(features, weight_collections=None, trainable=True, cols_to_vars=None, - scope=None): + scope=None, + cols_to_output_tensors=None): """See input_layer. `scope` is a name or variable scope to use.""" feature_columns = _normalize_feature_columns(feature_columns) @@ -202,14 +203,17 @@ def _internal_input_layer(features, trainable=trainable) num_elements = column._variable_shape.num_elements() # pylint: disable=protected-access batch_size = array_ops.shape(tensor)[0] - output_tensors.append( - array_ops.reshape(tensor, shape=(batch_size, num_elements))) + output_tensor = array_ops.reshape( + tensor, shape=(batch_size, num_elements)) + output_tensors.append(output_tensor) if cols_to_vars is not None: # Retrieve any variables created (some _DenseColumn's don't create # variables, in which case an empty list is returned). cols_to_vars[column] = ops.get_collection( ops.GraphKeys.GLOBAL_VARIABLES, scope=variable_scope.get_variable_scope().name) + if cols_to_output_tensors is not None: + cols_to_output_tensors[column] = output_tensor _verify_static_batch_size_equality(output_tensors, ordered_columns) return array_ops.concat(output_tensors, 1) @@ -219,7 +223,8 @@ def input_layer(features, feature_columns, weight_collections=None, trainable=True, - cols_to_vars=None): + cols_to_vars=None, + cols_to_output_tensors=None): """Returns a dense `Tensor` as input layer based on given `feature_columns`. Generally a single example in training data is described with FeatureColumns. @@ -264,6 +269,9 @@ def input_layer(features, dimension=10): [<tf.Variable 'some_variable:0' shape=(5, 10), <tf.Variable 'some_variable:1' shape=(5, 10)]} If a column creates no variables, its value will be an empty list. + cols_to_output_tensors: If not `None`, must be a dictionary that will be + filled with a mapping from '_FeatureColumn' to the associated + output `Tensor`s. Returns: A `Tensor` which represents input layer of a model. Its shape @@ -273,8 +281,13 @@ def input_layer(features, Raises: ValueError: if an item in `feature_columns` is not a `_DenseColumn`. """ - return _internal_input_layer(features, feature_columns, weight_collections, - trainable, cols_to_vars) + return _internal_input_layer( + features, + feature_columns, + weight_collections=weight_collections, + trainable=trainable, + cols_to_vars=cols_to_vars, + cols_to_output_tensors=cols_to_output_tensors) # TODO(akshayka): InputLayer should be a subclass of Layer, and it diff --git a/tensorflow/python/feature_column/feature_column_test.py b/tensorflow/python/feature_column/feature_column_test.py index 9b482237ab..abb79efa68 100644 --- a/tensorflow/python/feature_column/feature_column_test.py +++ b/tensorflow/python/feature_column/feature_column_test.py @@ -1637,6 +1637,40 @@ class LinearModelTest(test.TestCase): self.assertAllEqual([[0.], [0.]], cols_to_vars[price2][0].eval()) self.assertAllEqual([[0.]], cols_to_vars[price2][1].eval()) + def test_fills_cols_to_output_tensors(self): + # Provide three _DenseColumn's to input_layer: a _NumericColumn, a + # _BucketizedColumn, and an _EmbeddingColumn. Only the _EmbeddingColumn + # creates a Variable. + apple_numeric_column = fc.numeric_column('apple_numeric_column') + banana_dense_feature = fc.numeric_column('banana_dense_feature') + banana_dense_feature_bucketized = fc.bucketized_column( + banana_dense_feature, boundaries=[0.]) + cherry_sparse_column = fc.categorical_column_with_hash_bucket( + 'cherry_sparse_feature', hash_bucket_size=5) + dragonfruit_embedding_column = fc.embedding_column( + cherry_sparse_column, dimension=10) + with ops.Graph().as_default(): + features = { + 'apple_numeric_column': [[3.], [4.]], + 'banana_dense_feature': [[-1.], [4.]], + 'cherry_sparse_feature': [['a'], ['x']], + } + cols_to_output_tensors = {} + all_cols = [ + apple_numeric_column, banana_dense_feature_bucketized, + dragonfruit_embedding_column + ] + input_layer = fc.input_layer( + features, all_cols, cols_to_output_tensors=cols_to_output_tensors) + + # We check the mapping by checking that we have the right keys, + # and that the values (output_tensors) were indeed the ones used to + # form the input layer. + self.assertItemsEqual(all_cols, cols_to_output_tensors.keys()) + input_layer_inputs = [tensor for tensor in input_layer.op.inputs[:-1]] + output_tensors = [tensor for tensor in cols_to_output_tensors.values()] + self.assertItemsEqual(input_layer_inputs, output_tensors) + def test_dense_collection(self): price = fc.numeric_column('price') with ops.Graph().as_default() as g: |