aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/python/feature_column
diff options
context:
space:
mode:
authorGravatar Rohan Jain <rohanj@google.com>2018-08-23 14:13:11 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-08-23 14:17:16 -0700
commitb91f904112914c7ca89f4d3c2839bed258776e78 (patch)
treeba63bc12964dc245f8579ed00e444ae3f07754b6 /tensorflow/python/feature_column
parenteb0c8596b1fd6dc85446ee559a4919f599a55da4 (diff)
Implementing InputLayer using the new feature columns.
Creates a SharedEmbeddingStateManager object to manage the state of shared embedding columns. PiperOrigin-RevId: 209997417
Diffstat (limited to 'tensorflow/python/feature_column')
-rw-r--r--tensorflow/python/feature_column/feature_column_test.py74
-rw-r--r--tensorflow/python/feature_column/feature_column_v2.py550
-rw-r--r--tensorflow/python/feature_column/feature_column_v2_test.py764
3 files changed, 725 insertions, 663 deletions
diff --git a/tensorflow/python/feature_column/feature_column_test.py b/tensorflow/python/feature_column/feature_column_test.py
index 9d2babc6e0..9b482237ab 100644
--- a/tensorflow/python/feature_column/feature_column_test.py
+++ b/tensorflow/python/feature_column/feature_column_test.py
@@ -2747,6 +2747,62 @@ class FunctionalInputLayerTest(test.TestCase):
variables_lib.Variable)
self.assertAllEqual(cols_to_vars[some_embedding_column][0].shape, [5, 10])
+ def test_fills_cols_to_vars_shared_embedding(self):
+ # Provide 5 DenseColumn's to input_layer: a NumericColumn, a
+ # BucketizedColumn, an EmbeddingColumn, two SharedEmbeddingColumns. The
+ # EmbeddingColumn creates a Variable and the two SharedEmbeddingColumns
+ # shared one variable.
+ price1 = fc.numeric_column('price1')
+ dense_feature = fc.numeric_column('dense_feature')
+ dense_feature_bucketized = fc.bucketized_column(
+ dense_feature, boundaries=[0.])
+ some_sparse_column = fc.categorical_column_with_hash_bucket(
+ 'sparse_feature', hash_bucket_size=5)
+ some_embedding_column = fc.embedding_column(
+ some_sparse_column, dimension=10)
+ categorical_column_a = fc.categorical_column_with_identity(
+ key='aaa', num_buckets=3)
+ categorical_column_b = fc.categorical_column_with_identity(
+ key='bbb', num_buckets=3)
+ shared_embedding_a, shared_embedding_b = fc.shared_embedding_columns(
+ [categorical_column_a, categorical_column_b], dimension=2)
+ with ops.Graph().as_default():
+ features = {
+ 'price1': [[3.], [4.]],
+ 'dense_feature': [[-1.], [4.]],
+ 'sparse_feature': [['a'], ['x']],
+ 'aaa':
+ sparse_tensor.SparseTensor(
+ indices=((0, 0), (1, 0), (1, 1)),
+ values=(0, 1, 0),
+ dense_shape=(2, 2)),
+ 'bbb':
+ sparse_tensor.SparseTensor(
+ indices=((0, 0), (1, 0), (1, 1)),
+ values=(1, 2, 1),
+ dense_shape=(2, 2)),
+ }
+ cols_to_vars = {}
+ all_cols = [
+ price1, dense_feature_bucketized, some_embedding_column,
+ shared_embedding_a, shared_embedding_b
+ ]
+ fc.input_layer(features, all_cols, cols_to_vars=cols_to_vars)
+ self.assertItemsEqual(list(cols_to_vars.keys()), all_cols)
+ self.assertEqual(0, len(cols_to_vars[price1]))
+ self.assertEqual(0, len(cols_to_vars[dense_feature_bucketized]))
+ self.assertEqual(1, len(cols_to_vars[some_embedding_column]))
+ self.assertEqual(1, len(cols_to_vars[shared_embedding_a]))
+ # This is a bug in the current implementation and should be fixed in the
+ # new one.
+ self.assertEqual(0, len(cols_to_vars[shared_embedding_b]))
+ self.assertIsInstance(cols_to_vars[some_embedding_column][0],
+ variables_lib.Variable)
+ self.assertAllEqual(cols_to_vars[some_embedding_column][0].shape, [5, 10])
+ self.assertIsInstance(cols_to_vars[shared_embedding_a][0],
+ variables_lib.Variable)
+ self.assertAllEqual(cols_to_vars[shared_embedding_a][0].shape, [3, 2])
+
def test_fills_cols_to_vars_partitioned_variables(self):
price1 = fc.numeric_column('price1')
dense_feature = fc.numeric_column('dense_feature')
@@ -2772,6 +2828,10 @@ class FunctionalInputLayerTest(test.TestCase):
self.assertEqual(0, len(cols_to_vars[price1]))
self.assertEqual(0, len(cols_to_vars[dense_feature_bucketized]))
self.assertEqual(3, len(cols_to_vars[some_embedding_column]))
+ self.assertEqual(
+ 'input_from_feature_columns/input_layer/sparse_feature_embedding/'
+ 'embedding_weights/part_0:0',
+ cols_to_vars[some_embedding_column][0].name)
self.assertAllEqual(cols_to_vars[some_embedding_column][0].shape, [2, 10])
self.assertAllEqual(cols_to_vars[some_embedding_column][1].shape, [2, 10])
self.assertAllEqual(cols_to_vars[some_embedding_column][2].shape, [1, 10])
@@ -5544,20 +5604,6 @@ class SharedEmbeddingColumnTest(test.TestCase):
self.assertIsNone(partition_info)
return embedding_values
- # Expected lookup result, using combiner='mean'.
- expected_lookups_a = (
- # example 0:
- (7., 11.), # ids [2], embedding = [7, 11]
- # example 1:
- (2., 3.5), # ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
- )
- expected_lookups_b = (
- # example 0:
- (1., 2.), # ids [0], embedding = [1, 2]
- # example 1:
- (0., 0.), # ids [], embedding = [0, 0]
- )
-
# Build columns.
categorical_column_a = fc.categorical_column_with_identity(
key='aaa', num_buckets=vocabulary_size)
diff --git a/tensorflow/python/feature_column/feature_column_v2.py b/tensorflow/python/feature_column/feature_column_v2.py
index b6bf516286..aa66ed77e9 100644
--- a/tensorflow/python/feature_column/feature_column_v2.py
+++ b/tensorflow/python/feature_column/feature_column_v2.py
@@ -142,6 +142,7 @@ from tensorflow.python.framework import ops
from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib
from tensorflow.python.framework import tensor_shape
from tensorflow.python.keras.engine import training
+from tensorflow.python.keras.engine.base_layer import Layer
from tensorflow.python.layers import base
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import check_ops
@@ -155,7 +156,6 @@ from tensorflow.python.ops import parsing_ops
from tensorflow.python.ops import resource_variable_ops
from tensorflow.python.ops import sparse_ops
from tensorflow.python.ops import string_ops
-from tensorflow.python.ops import template
from tensorflow.python.ops import variable_scope
from tensorflow.python.ops import variables
from tensorflow.python.platform import gfile
@@ -164,67 +164,148 @@ from tensorflow.python.training import checkpoint_utils
from tensorflow.python.util import nest
-def _internal_input_layer(features,
- feature_columns,
- weight_collections=None,
- trainable=True,
- cols_to_vars=None,
- scope=None):
- """See input_layer. `scope` is a name or variable scope to use."""
+class StateManager(object):
+ """Manages the state associated with FeatureColumns.
- feature_columns = fc_old._normalize_feature_columns(feature_columns) # pylint: disable=protected-access
- for column in feature_columns:
- if not isinstance(column, fc_old._DenseColumn): # pylint: disable=protected-access
- raise ValueError(
- 'Items of feature_columns must be a _DenseColumn. '
- 'You can wrap a categorical column with an '
- 'embedding_column or indicator_column. Given: {}'.format(column))
- weight_collections = list(weight_collections or [])
- if ops.GraphKeys.GLOBAL_VARIABLES not in weight_collections:
- weight_collections.append(ops.GraphKeys.GLOBAL_VARIABLES)
- if ops.GraphKeys.MODEL_VARIABLES not in weight_collections:
- weight_collections.append(ops.GraphKeys.MODEL_VARIABLES)
-
- # a non-None `scope` can allow for variable reuse, when, e.g., this function
- # is wrapped by a `make_template`.
- with variable_scope.variable_scope(
- scope, default_name='input_layer', values=features.values()):
- builder = fc_old._LazyBuilder(features) # pylint: disable=protected-access
- output_tensors = []
- ordered_columns = []
- for column in sorted(feature_columns, key=lambda x: x.name):
- ordered_columns.append(column)
- with variable_scope.variable_scope(
- None, default_name=column._var_scope_name): # pylint: disable=protected-access
- tensor = column._get_dense_tensor( # pylint: disable=protected-access
- builder,
- weight_collections=weight_collections,
- trainable=trainable)
- num_elements = column._variable_shape.num_elements() # pylint: disable=protected-access
- batch_size = array_ops.shape(tensor)[0]
- output_tensors.append(
- array_ops.reshape(tensor, shape=(batch_size, num_elements)))
- if cols_to_vars is not None:
- # Retrieve any variables created (some _DenseColumn's don't create
- # variables, in which case an empty list is returned).
- cols_to_vars[column] = ops.get_collection(
- ops.GraphKeys.GLOBAL_VARIABLES,
- scope=variable_scope.get_variable_scope().name)
- _verify_static_batch_size_equality(output_tensors, ordered_columns)
- return array_ops.concat(output_tensors, 1)
+ Some `FeatureColumn`s create variables or resources to assist their
+ computation. The `StateManager` is responsible for creating and storing these
+ objects since `FeatureColumn`s are supposed to be stateless configuration
+ only.
+ """
+
+ def create_variable(self,
+ feature_column,
+ name,
+ shape,
+ dtype=None,
+ trainable=True,
+ initializer=None):
+ """Creates a new variable.
+
+ Args:
+ feature_column: A `FeatureColumn` object this variable corresponds to.
+ name: variable name.
+ shape: variable shape.
+ dtype: The type of the variable. Defaults to `self.dtype` or `float32`.
+ trainable: Whether this variable is trainable or not.
+ initializer: initializer instance (callable).
+
+ Returns:
+ The created variable.
+ """
+ del feature_column, name, shape, dtype, trainable, initializer
+ raise NotImplementedError('StateManager.create_variable')
+
+ def add_variable(self, feature_column, var):
+ """Adds an existing variable to the state.
+
+ Args:
+ feature_column: A `FeatureColumn` object to associate this variable with.
+ var: The variable.
+ """
+ del feature_column, var
+ raise NotImplementedError('StateManager.add_variable')
+
+ def get_variable(self, feature_column, name):
+ """Returns an existing variable.
+
+ Args:
+ feature_column: A `FeatureColumn` object this variable corresponds to.
+ name: variable name.
+ """
+ del feature_column, name
+ raise NotImplementedError('StateManager.get_var')
+
+ def add_resource(self, feature_column, name, resource):
+ """Creates a new resource.
+
+ Resources can be things such as tables etc.
+
+ Args:
+ feature_column: A `FeatureColumn` object this resource corresponds to.
+ name: Name of the resource.
+ resource: The resource.
+
+ Returns:
+ The created resource.
+ """
+ del feature_column, name, resource
+ raise NotImplementedError('StateManager.add_resource')
+ def get_resource(self, feature_column, name):
+ """Returns an already created resource.
-def input_layer(features,
- feature_columns,
- weight_collections=None,
- trainable=True,
- cols_to_vars=None):
- """Returns a dense `Tensor` as input layer based on given `feature_columns`.
+ Resources can be things such as tables etc.
+
+ Args:
+ feature_column: A `FeatureColumn` object this variable corresponds to.
+ name: Name of the resource.
+ """
+ del feature_column, name
+ raise NotImplementedError('StateManager.get_resource')
+
+
+class _InputLayerStateManager(StateManager):
+ """Manages the state of InputLayer."""
+
+ def __init__(self, layer, feature_columns, trainable):
+ """Creates an _InputLayerStateManager object.
+
+ Args:
+ layer: The input layer this state manager is associated with.
+ feature_columns: List of feature columns for the input layer
+ trainable: Whether by default, variables created are trainable or not.
+ """
+ self._trainable = trainable
+ self._layer = layer
+ self._cols_to_vars_map = {}
+ self._cols_to_names_map = {}
+ for column in sorted(feature_columns, key=lambda x: x.name):
+ self._cols_to_vars_map[column] = {}
+ base_name = column.name
+ if isinstance(column, SharedEmbeddingColumn):
+ base_name = column.shared_collection_name
+ with variable_scope.variable_scope(base_name) as vs:
+ self._cols_to_names_map[column] = _strip_leading_slashes(vs.name)
+
+ def create_variable(self,
+ feature_column,
+ name,
+ shape,
+ dtype=None,
+ trainable=True,
+ initializer=None):
+ if name in self._cols_to_vars_map[feature_column]:
+ raise ValueError('Variable already exists.')
+ with variable_scope.variable_scope(self._cols_to_names_map[feature_column]):
+ var = self._layer.add_variable(
+ name=name,
+ shape=shape,
+ dtype=dtype,
+ initializer=initializer,
+ trainable=self._trainable and trainable,
+ # TODO(rohanj): Get rid of this hack once we have a mechanism for
+ # specifying a default partitioner for an entire layer. In that case,
+ # the default getter for Layers should work.
+ getter=variable_scope.get_variable)
+ self._cols_to_vars_map[feature_column][name] = var
+ return var
+
+ def get_variable(self, feature_column, name):
+ if name in self._cols_to_vars_map[feature_column]:
+ return self._cols_to_vars_map[feature_column][name]
+ raise ValueError('Variable does not exist.')
+
+
+class FeatureLayer(Layer):
+ """A layer that produces a dense `Tensor` based on given `feature_columns`.
Generally a single example in training data is described with FeatureColumns.
At the first layer of the model, this column oriented data should be converted
to a single `Tensor`.
+ This layer can be called multiple times with different features.
+
Example:
```python
@@ -233,105 +314,122 @@ def input_layer(features,
categorical_column_with_hash_bucket("keywords", 10K), dimensions=16)
columns = [price, keywords_embedded, ...]
features = tf.parse_example(..., features=make_parse_example_spec(columns))
- dense_tensor = input_layer(features, columns)
+ feature_layer = FeatureLayer(columns)
+ dense_tensor = feature_layer(features)
for units in [128, 64, 32]:
dense_tensor = tf.layers.dense(dense_tensor, units, tf.nn.relu)
- prediction = tf.layers.dense(dense_tensor, 1)
- ```
-
- Args:
- features: A mapping from key to tensors. `_FeatureColumn`s look up via these
- keys. For example `numeric_column('price')` will look at 'price' key in
- this dict. Values can be a `SparseTensor` or a `Tensor` depends on
- corresponding `_FeatureColumn`.
- feature_columns: An iterable containing the FeatureColumns to use as inputs
- to your model. All items should be instances of classes derived from
- `_DenseColumn` such as `numeric_column`, `embedding_column`,
- `bucketized_column`, `indicator_column`. If you have categorical features,
- you can wrap them with an `embedding_column` or `indicator_column`.
- weight_collections: A list of collection names to which the Variable will be
- added. Note that variables will also be added to collections
- `tf.GraphKeys.GLOBAL_VARIABLES` and `ops.GraphKeys.MODEL_VARIABLES`.
- trainable: If `True` also add the variable to the graph collection
- `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
- cols_to_vars: If not `None`, must be a dictionary that will be filled with a
- mapping from `_FeatureColumn` to list of `Variable`s. For example, after
- the call, we might have cols_to_vars =
- {_EmbeddingColumn(
- categorical_column=_HashedCategoricalColumn(
- key='sparse_feature', hash_bucket_size=5, dtype=tf.string),
- dimension=10): [<tf.Variable 'some_variable:0' shape=(5, 10),
- <tf.Variable 'some_variable:1' shape=(5, 10)]}
- If a column creates no variables, its value will be an empty list.
-
- Returns:
- A `Tensor` which represents input layer of a model. Its shape
- is (batch_size, first_layer_dimension) and its dtype is `float32`.
- first_layer_dimension is determined based on given `feature_columns`.
-
- Raises:
- ValueError: if an item in `feature_columns` is not a `_DenseColumn`.
- """
- return _internal_input_layer(features, feature_columns, weight_collections,
- trainable, cols_to_vars)
-
-
-# TODO(akshayka): InputLayer should be a subclass of Layer, and it
-# should implement the logic in input_layer using Layer's build-and-call
-# paradigm; input_layer should create an instance of InputLayer and
-# return the result of invoking its apply method, just as functional layers do.
-class InputLayer(object):
- """An object-oriented version of `input_layer` that reuses variables."""
+ prediction = tf.layers.dense(dense_tensor, 1)."""
def __init__(self,
feature_columns,
- weight_collections=None,
trainable=True,
- cols_to_vars=None):
- """See `input_layer`."""
+ name=None,
+ shared_state_manager=None,
+ **kwargs):
+ """Constructs a FeatureLayer.
- self._feature_columns = feature_columns
- self._weight_collections = weight_collections
- self._trainable = trainable
- self._cols_to_vars = cols_to_vars
- self._input_layer_template = template.make_template(
- 'feature_column_input_layer',
- _internal_input_layer,
- create_scope_now_=True)
- self._scope = self._input_layer_template.variable_scope
-
- def __call__(self, features):
- return self._input_layer_template(
- features=features,
- feature_columns=self._feature_columns,
- weight_collections=self._weight_collections,
- trainable=self._trainable,
- cols_to_vars=None,
- scope=self._scope)
+ Args:
+ feature_columns: An iterable containing the FeatureColumns to use as
+ inputs to your model. All items should be instances of classes derived
+ from `DenseColumn` such as `numeric_column`, `embedding_column`,
+ `bucketized_column`, `indicator_column`. If you have categorical
+ features, you can wrap them with an `embedding_column` or
+ `indicator_column`.
+ trainable: If `True` also add the variable to the graph collection
+ `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
+ name: Name to give to the FeatureLayer.
+ shared_state_manager: SharedEmbeddingStateManager that manages the state
+ of SharedEmbeddingColumns. The state of SharedEmbeddingColumns, unlike
+ regular embedding columns cannot be owned by the InputLayer itself since
+ SharedEmbeddingColumns can be shared across different InputLayers. As a
+ result users are expected to create a SharedEmbeddingStateManager object
+ which would be responsible for managing the shared state and can be
+ passed into different InputLayer objects to share state. For example,
+
+ ```python
+ sc_1, sc_2 = shared_embedding_column_v2(...)
+ sc_3, sc_4 = shared_embedding_column_v2(...)
+ ssm = SharedEmbeddingStateManager()
+ feature_layer1 = FeatureLayer([sc_1, sc_3], ...,
+ shared_state_manager=ssm)
+ feature_layer2 = FeatureLayer([sc_2, sc_4], ...,
+ shared_state_manager=ssm)
+ ```
+ now input_layer1 and input_layer2 will share variables across. If
+ sharing is not desired, one can create 2 separate
+ SharedEmbeddingStateManager objects
+
+ ```python
+ ssm1 = SharedEmbeddingStateManager()
+ ssm2 = SharedEmbeddingStateManager()
+ feature_layer1 = FeatureLayer([sc_1, sc_3], ...,
+ shared_state_manager=ssm1)
+ feature_layer2 = FeatureLayer([sc_2, sc_4], ...,
+ shared_state_manager=ssm2)
+ ```
+ **kwargs: Keyword arguments to construct a layer.
- @property
- def non_trainable_variables(self):
- return self._input_layer_template.non_trainable_variables
+ Raises:
+ ValueError: if an item in `feature_columns` is not a `DenseColumn`.
+ """
+ super(FeatureLayer, self).__init__(name=name, trainable=trainable, **kwargs)
- @property
- def non_trainable_weights(self):
- return self._input_layer_template.non_trainable_weights
+ self._feature_columns = _normalize_feature_columns(feature_columns)
+ self._state_manager = _InputLayerStateManager(self, self._feature_columns,
+ self.trainable)
+ self._shared_state_manager = shared_state_manager
+ for column in sorted(self._feature_columns, key=lambda x: x.name):
+ if not isinstance(column, DenseColumn):
+ raise ValueError(
+ 'Items of feature_columns must be a DenseColumn. '
+ 'You can wrap a categorical column with an '
+ 'embedding_column or indicator_column. Given: {}'.format(column))
- @property
- def trainable_variables(self):
- return self._input_layer_template.trainable_variables
+ def build(self, _):
+ for column in sorted(self._feature_columns, key=lambda x: x.name):
+ if isinstance(column, SharedEmbeddingColumn):
+ column.create_state(self._shared_state_manager)
+ else:
+ with variable_scope.variable_scope(None, default_name=self.name):
+ column.create_state(self._state_manager)
+ super(FeatureLayer, self).build(None)
- @property
- def trainable_weights(self):
- return self._input_layer_template.trainable_weights
+ def call(self, features, cols_to_output_tensors=None):
+ """Returns a dense tensor corresponding to the `feature_columns`.
- @property
- def variables(self):
- return self._input_layer_template.variables
+ Args:
+ features: A mapping from key to tensors. `FeatureColumn`s look up via
+ these keys. For example `numeric_column('price')` will look at 'price'
+ key in this dict. Values can be a `SparseTensor` or a `Tensor` depends
+ on corresponding `FeatureColumn`.
+ cols_to_output_tensors: If not `None`, this will be filled with a dict
+ mapping feature columns to output tensors created.
- @property
- def weights(self):
- return self._input_layer_template.weights
+ Returns:
+ A `Tensor` which represents input layer of a model. Its shape
+ is (batch_size, first_layer_dimension) and its dtype is `float32`.
+ first_layer_dimension is determined based on given `feature_columns`.
+ """
+ transformation_cache = FeatureTransformationCache(features)
+ output_tensors = []
+ ordered_columns = []
+ for column in sorted(self._feature_columns, key=lambda x: x.name):
+ ordered_columns.append(column)
+ if isinstance(column, SharedEmbeddingColumn):
+ tensor = column.get_dense_tensor(transformation_cache,
+ self._shared_state_manager)
+ else:
+ tensor = column.get_dense_tensor(transformation_cache,
+ self._state_manager)
+ num_elements = column.variable_shape.num_elements()
+ batch_size = array_ops.shape(tensor)[0]
+ tensor = array_ops.reshape(tensor, shape=(batch_size, num_elements))
+ output_tensors.append(tensor)
+ if cols_to_output_tensors is not None:
+ cols_to_output_tensors[column] = tensor
+
+ _verify_static_batch_size_equality(output_tensors, ordered_columns)
+ return array_ops.concat(output_tensors, 1)
def linear_model(features,
@@ -565,12 +663,15 @@ class _BiasLayer(base.Layer):
return self._bias_variable
-def _get_expanded_variable_list(variable):
- if (isinstance(variable, variables.Variable) or
- resource_variable_ops.is_resource_variable(variable)):
- return [variable] # Single variable case.
- else: # Must be a PartitionedVariable, so convert into a list.
- return list(variable)
+def _get_expanded_variable_list(var_list):
+ returned_list = []
+ for variable in var_list:
+ if (isinstance(variable, variables.Variable) or
+ resource_variable_ops.is_resource_variable(variable)):
+ returned_list.append(variable) # Single variable case.
+ else: # Must be a PartitionedVariable, so convert into a list.
+ returned_list.extend(list(variable))
+ return returned_list
def _strip_leading_slashes(name):
@@ -661,7 +762,7 @@ class _LinearModel(training.Model):
scope=variable_scope.get_variable_scope()), # pylint: disable=not-callable
name='weighted_sum')
bias = self._bias_layer.variables[0]
- self._cols_to_vars['bias'] = _get_expanded_variable_list(bias)
+ self._cols_to_vars['bias'] = _get_expanded_variable_list([bias])
return predictions
def _add_layers(self, layers):
@@ -877,10 +978,15 @@ def embedding_column(
trainable=trainable)
-def shared_embedding_columns(
- categorical_columns, dimension, combiner='mean', initializer=None,
- shared_embedding_collection_name=None, ckpt_to_load_from=None,
- tensor_name_in_ckpt=None, max_norm=None, trainable=True):
+def shared_embedding_columns_v2(categorical_columns,
+ dimension,
+ combiner='mean',
+ initializer=None,
+ shared_embedding_collection_name=None,
+ ckpt_to_load_from=None,
+ tensor_name_in_ckpt=None,
+ max_norm=None,
+ trainable=True):
"""List of dense columns that convert from sparse, categorical input.
This is similar to `embedding_column`, except that it produces a list of
@@ -1803,51 +1909,6 @@ def crossed_column(keys, hash_bucket_size, hash_key=None):
keys=tuple(keys), hash_bucket_size=hash_bucket_size, hash_key=hash_key)
-class StateManager(object):
- """Manages the state associated with FeatureColumns.
-
- Some `FeatureColumn`s create variables or resources to assist their
- computation. The `StateManager` is responsible for creating and storing these
- objects since `FeatureColumn`s are supposed to be stateless configuration
- only.
- """
-
- def get_variable(self,
- feature_column,
- name,
- shape,
- dtype=None,
- initializer=None):
- """Creates a new variable or returns an existing one.
-
- Args:
- feature_column: A `FeatureColumn` object this variable corresponds to.
- name: variable name.
- shape: variable shape.
- dtype: The type of the variable. Defaults to `self.dtype` or `float32`.
- initializer: initializer instance (callable).
-
- Returns:
- The variable.
- """
- raise NotImplementedError('StateManager.get_variable')
-
- def get_resource(self, feature_column, name, resource_creator):
- """Creates a new resource or returns an existing one.
-
- Resources can be things such as tables etc.
-
- Args:
- feature_column: A `FeatureColumn` object this variable corresponds to.
- name: Name of the resource.
- resource_creator: A callable that can create the resource.
-
- Returns:
- The resource.
- """
- raise NotImplementedError('StateManager.get_resource')
-
-
class FeatureColumn(object):
"""Represents a feature column abstraction.
@@ -2550,6 +2611,17 @@ class EmbeddingColumn(
"""See `DenseColumn` base class."""
return tensor_shape.vector(self.dimension)
+ def create_state(self, state_manager):
+ """Creates the embedding lookup variable."""
+ embedding_shape = (self.categorical_column.num_buckets, self.dimension)
+ state_manager.create_variable(
+ self,
+ name='embedding_weights',
+ shape=embedding_shape,
+ dtype=dtypes.float32,
+ trainable=self.trainable,
+ initializer=self.initializer)
+
def _get_dense_tensor_internal(self, transformation_cache, state_manager):
"""Private method that follows the signature of _get_dense_tensor."""
# Get sparse IDs and weights.
@@ -2558,13 +2630,8 @@ class EmbeddingColumn(
sparse_ids = sparse_tensors.id_tensor
sparse_weights = sparse_tensors.weight_tensor
- embedding_shape = (self.categorical_column.num_buckets, self.dimension)
embedding_weights = state_manager.get_variable(
- self,
- name='embedding_weights',
- shape=embedding_shape,
- dtype=dtypes.float32,
- initializer=self.initializer)
+ self, name='embedding_weights')
if self.ckpt_to_load_from is not None:
to_restore = embedding_weights
@@ -2637,6 +2704,68 @@ def _get_graph_for_variable(var):
return var.graph
+class SharedEmbeddingStateManager(Layer):
+ """A state manager that handle the state of shared embedding columns.
+
+ This can handle multiple sets of columns that share variables."""
+
+ def __init__(self, trainable=True, name=None, **kwargs):
+ """Constructs a `SharedEmbeddingStateManager`.
+
+ Args:
+ trainable: If true, variables created are trainable.
+ name: Name of the State Manager.
+ **kwargs: Keyword arguments.
+ """
+ super(SharedEmbeddingStateManager, self).__init__(
+ name=name, trainable=trainable, **kwargs)
+ self._var_dict = {}
+
+ def create_variable(self,
+ name,
+ shape,
+ dtype=None,
+ trainable=True,
+ initializer=None):
+ """Creates a variable.
+
+ Makes sure only one var is created per `shared_collection_name`. `name` is
+ ignored here as the variable is named `shared_collection_name` instead.
+
+ Args:
+ name: Name of the variable. Not used.
+ shape: Variable shape.
+ dtype: Variable type.
+ trainable: If variable created should be trainable or not.
+ initializer: Variable initializer.
+
+ Returns:
+ A variable or partitioned variable.
+ """
+ if name in self._var_dict:
+ var = self._var_dict[name]
+ return var
+ with variable_scope.variable_scope(
+ self.name, reuse=variable_scope.AUTO_REUSE):
+ var = self.add_variable(
+ name=name,
+ shape=shape,
+ dtype=dtype,
+ trainable=self.trainable and trainable,
+ initializer=initializer,
+ # TODO(rohanj): Get rid of this hack once we have a mechanism for
+ # specifying a default partitioner for an entire layer. In that case,
+ # the default getter for Layers should work.
+ getter=variable_scope.get_variable)
+ self._var_dict[name] = var
+ return var
+
+ def get_variable(self, feature_column, name):
+ if name not in self._var_dict:
+ raise ValueError('Variable name: {} not recognized.'.format(name))
+ return self._var_dict[name]
+
+
class SharedEmbeddingColumn(
DenseColumn, SequenceDenseColumn,
collections.namedtuple(
@@ -2675,6 +2804,16 @@ class SharedEmbeddingColumn(
"""See `DenseColumn` base class."""
return tensor_shape.vector(self.dimension)
+ def create_state(self, state_manager):
+ """Creates the shared embedding lookup variable."""
+ embedding_shape = (self.categorical_column.num_buckets, self.dimension)
+ state_manager.create_variable(
+ name=self.shared_collection_name,
+ shape=embedding_shape,
+ dtype=dtypes.float32,
+ trainable=self.trainable,
+ initializer=self.initializer)
+
def _get_dense_tensor_internal(self, transformation_cache, state_manager):
"""Private method that follows the signature of _get_dense_tensor."""
# This method is called from a variable_scope with name _var_scope_name,
@@ -2687,13 +2826,8 @@ class SharedEmbeddingColumn(
sparse_ids = sparse_tensors.id_tensor
sparse_weights = sparse_tensors.weight_tensor
- embedding_shape = (self.categorical_column.num_buckets, self.dimension)
embedding_weights = state_manager.get_variable(
- self,
- name='embedding_weights',
- shape=embedding_shape,
- dtype=dtypes.float32,
- initializer=self.initializer)
+ self, name=self.shared_collection_name)
if self.ckpt_to_load_from is not None:
to_restore = embedding_weights
diff --git a/tensorflow/python/feature_column/feature_column_v2_test.py b/tensorflow/python/feature_column/feature_column_v2_test.py
index ad578d287a..6b343ecf3e 100644
--- a/tensorflow/python/feature_column/feature_column_v2_test.py
+++ b/tensorflow/python/feature_column/feature_column_v2_test.py
@@ -33,12 +33,12 @@ from tensorflow.python.eager import context
from tensorflow.python.estimator.inputs import numpy_io
from tensorflow.python.feature_column import feature_column as fc_old
from tensorflow.python.feature_column import feature_column_v2 as fc
+from tensorflow.python.feature_column.feature_column_v2 import _LinearModel
+from tensorflow.python.feature_column.feature_column_v2 import _transform_features
from tensorflow.python.feature_column.feature_column_v2 import FeatureColumn
+from tensorflow.python.feature_column.feature_column_v2 import FeatureLayer
from tensorflow.python.feature_column.feature_column_v2 import FeatureTransformationCache
-from tensorflow.python.feature_column.feature_column_v2 import InputLayer
from tensorflow.python.feature_column.feature_column_v2 import StateManager
-from tensorflow.python.feature_column.feature_column_v2 import _LinearModel
-from tensorflow.python.feature_column.feature_column_v2 import _transform_features
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import errors
@@ -824,22 +824,6 @@ class HashedCategoricalColumnTest(test.TestCase):
self.assertEqual(
transformation_cache.get(hashed_sparse, None), id_weight_pair.id_tensor)
- def DISABLED_test_get_sparse_tensors_weight_collections(self):
- column = fc.categorical_column_with_hash_bucket('aaa', 10)
- inputs = sparse_tensor.SparseTensor(
- values=['omar', 'stringer', 'marlo'],
- indices=[[0, 0], [1, 0], [1, 1]],
- dense_shape=[2, 2])
- column._get_sparse_tensors(
- FeatureTransformationCache({
- 'aaa': inputs
- }),
- weight_collections=('my_weights',))
-
- self.assertItemsEqual(
- [], ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES))
- self.assertItemsEqual([], ops.get_collection('my_weights'))
-
def test_get_sparse_tensors_dense_input(self):
hashed_sparse = fc.categorical_column_with_hash_bucket('wire', 10)
transformation_cache = FeatureTransformationCache({
@@ -2640,13 +2624,13 @@ class _LinearModelTest(test.TestCase):
sess.run(net, feed_dict={features['price']: np.array(1)})
-class InputLayerTest(test.TestCase):
+class FeatureLayerTest(test.TestCase):
@test_util.run_in_graph_and_eager_modes()
def test_retrieving_input(self):
features = {'a': [0.]}
- input_layer = InputLayer(fc_old.numeric_column('a'))
- inputs = self.evaluate(input_layer(features))
+ feature_layer = FeatureLayer(fc.numeric_column('a'))
+ inputs = self.evaluate(feature_layer(features))
self.assertAllClose([[0.]], inputs)
def test_reuses_variables(self):
@@ -2657,7 +2641,7 @@ class InputLayerTest(test.TestCase):
dense_shape=(3, 3))
# Create feature columns (categorical and embedding).
- categorical_column = fc_old.categorical_column_with_identity(
+ categorical_column = fc.categorical_column_with_identity(
key='a', num_buckets=3)
embedding_dimension = 2
def _embedding_column_initializer(shape, dtype, partition_info):
@@ -2670,16 +2654,16 @@ class InputLayerTest(test.TestCase):
(1, 1)) # id 2
return embedding_values
- embedding_column = fc_old.embedding_column(
+ embedding_column = fc.embedding_column(
categorical_column,
dimension=embedding_dimension,
initializer=_embedding_column_initializer)
- input_layer = InputLayer([embedding_column])
+ feature_layer = FeatureLayer([embedding_column])
features = {'a': sparse_input}
- inputs = input_layer(features)
- variables = input_layer.variables
+ inputs = feature_layer(features)
+ variables = feature_layer.variables
# Sanity check: test that the inputs are correct.
self.assertAllEqual([[1, 0], [0, 1], [1, 1]], inputs)
@@ -2687,13 +2671,13 @@ class InputLayerTest(test.TestCase):
# Check that only one variable was created.
self.assertEqual(1, len(variables))
- # Check that invoking input_layer on the same features does not create
+ # Check that invoking feature_layer on the same features does not create
# additional variables
- _ = input_layer(features)
+ _ = feature_layer(features)
self.assertEqual(1, len(variables))
- self.assertEqual(variables[0], input_layer.variables[0])
+ self.assertEqual(variables[0], feature_layer.variables[0])
- def test_feature_column_input_layer_gradient(self):
+ def test_feature_column_feature_layer_gradient(self):
with context.eager_mode():
sparse_input = sparse_tensor.SparseTensor(
indices=((0, 0), (1, 0), (2, 0)),
@@ -2701,7 +2685,7 @@ class InputLayerTest(test.TestCase):
dense_shape=(3, 3))
# Create feature columns (categorical and embedding).
- categorical_column = fc_old.categorical_column_with_identity(
+ categorical_column = fc.categorical_column_with_identity(
key='a', num_buckets=3)
embedding_dimension = 2
@@ -2715,16 +2699,16 @@ class InputLayerTest(test.TestCase):
(1, 1)) # id 2
return embedding_values
- embedding_column = fc_old.embedding_column(
+ embedding_column = fc.embedding_column(
categorical_column,
dimension=embedding_dimension,
initializer=_embedding_column_initializer)
- input_layer = InputLayer([embedding_column])
+ feature_layer = FeatureLayer([embedding_column])
features = {'a': sparse_input}
def scale_matrix():
- matrix = input_layer(features)
+ matrix = feature_layer(features)
return 2 * matrix
# Sanity check: Verify that scale_matrix returns the correct output.
@@ -2739,185 +2723,139 @@ class InputLayerTest(test.TestCase):
self.assertAllEqual([0, 1, 2], indexed_slice.indices)
self.assertAllEqual([[2, 2], [2, 2], [2, 2]], gradient)
-
-class FunctionalInputLayerTest(test.TestCase):
-
def test_raises_if_empty_feature_columns(self):
with self.assertRaisesRegexp(ValueError,
'feature_columns must not be empty'):
- fc.input_layer(features={}, feature_columns=[])
+ FeatureLayer(feature_columns=[])(features={})
def test_should_be_dense_column(self):
- with self.assertRaisesRegexp(ValueError, 'must be a _DenseColumn'):
- fc.input_layer(
- features={'a': [[0]]},
- feature_columns=[
- fc_old.categorical_column_with_hash_bucket('wire_cast', 4)
- ])
+ with self.assertRaisesRegexp(ValueError, 'must be a DenseColumn'):
+ FeatureLayer(feature_columns=[
+ fc.categorical_column_with_hash_bucket('wire_cast', 4)
+ ])(
+ features={
+ 'a': [[0]]
+ })
def test_does_not_support_dict_columns(self):
with self.assertRaisesRegexp(
ValueError, 'Expected feature_columns to be iterable, found dict.'):
- fc.input_layer(
- features={'a': [[0]]},
- feature_columns={'a': fc_old.numeric_column('a')})
+ FeatureLayer(feature_columns={'a': fc.numeric_column('a')})(
+ features={
+ 'a': [[0]]
+ })
def test_bare_column(self):
with ops.Graph().as_default():
features = features = {'a': [0.]}
- net = fc.input_layer(features, fc_old.numeric_column('a'))
+ net = FeatureLayer(fc.numeric_column('a'))(features)
with _initialized_session():
self.assertAllClose([[0.]], net.eval())
def test_column_generator(self):
with ops.Graph().as_default():
features = features = {'a': [0.], 'b': [1.]}
- columns = (fc_old.numeric_column(key) for key in features)
- net = fc.input_layer(features, columns)
+ columns = (fc.numeric_column(key) for key in features)
+ net = FeatureLayer(columns)(features)
with _initialized_session():
self.assertAllClose([[0., 1.]], net.eval())
def test_raises_if_duplicate_name(self):
with self.assertRaisesRegexp(
ValueError, 'Duplicate feature column name found for columns'):
- fc.input_layer(
- features={'a': [[0]]},
- feature_columns=[
- fc_old.numeric_column('a'),
- fc_old.numeric_column('a')
- ])
+ FeatureLayer(
+ feature_columns=[fc.numeric_column('a'),
+ fc.numeric_column('a')])(
+ features={
+ 'a': [[0]]
+ })
def test_one_column(self):
- price = fc_old.numeric_column('price')
+ price = fc.numeric_column('price')
with ops.Graph().as_default():
features = {'price': [[1.], [5.]]}
- net = fc.input_layer(features, [price])
+ net = FeatureLayer([price])(features)
with _initialized_session():
self.assertAllClose([[1.], [5.]], net.eval())
def test_multi_dimension(self):
- price = fc_old.numeric_column('price', shape=2)
+ price = fc.numeric_column('price', shape=2)
with ops.Graph().as_default():
features = {'price': [[1., 2.], [5., 6.]]}
- net = fc.input_layer(features, [price])
+ net = FeatureLayer([price])(features)
with _initialized_session():
self.assertAllClose([[1., 2.], [5., 6.]], net.eval())
def test_raises_if_shape_mismatch(self):
- price = fc_old.numeric_column('price', shape=2)
+ price = fc.numeric_column('price', shape=2)
with ops.Graph().as_default():
features = {'price': [[1.], [5.]]}
with self.assertRaisesRegexp(
Exception,
r'Cannot reshape a tensor with 2 elements to shape \[2,2\]'):
- fc.input_layer(features, [price])
+ FeatureLayer([price])(features)
def test_reshaping(self):
- price = fc_old.numeric_column('price', shape=[1, 2])
+ price = fc.numeric_column('price', shape=[1, 2])
with ops.Graph().as_default():
features = {'price': [[[1., 2.]], [[5., 6.]]]}
- net = fc.input_layer(features, [price])
+ net = FeatureLayer([price])(features)
with _initialized_session():
self.assertAllClose([[1., 2.], [5., 6.]], net.eval())
def test_multi_column(self):
- price1 = fc_old.numeric_column('price1', shape=2)
- price2 = fc_old.numeric_column('price2')
+ price1 = fc.numeric_column('price1', shape=2)
+ price2 = fc.numeric_column('price2')
with ops.Graph().as_default():
features = {
'price1': [[1., 2.], [5., 6.]],
'price2': [[3.], [4.]]
}
- net = fc.input_layer(features, [price1, price2])
+ net = FeatureLayer([price1, price2])(features)
with _initialized_session():
self.assertAllClose([[1., 2., 3.], [5., 6., 4.]], net.eval())
- def test_fills_cols_to_vars(self):
- # Provide three _DenseColumn's to input_layer: a _NumericColumn, a
- # _BucketizedColumn, and an _EmbeddingColumn. Only the _EmbeddingColumn
- # creates a Variable.
- price1 = fc_old.numeric_column('price1')
- dense_feature = fc_old.numeric_column('dense_feature')
- dense_feature_bucketized = fc_old.bucketized_column(
- dense_feature, boundaries=[0.])
- some_sparse_column = fc_old.categorical_column_with_hash_bucket(
- 'sparse_feature', hash_bucket_size=5)
- some_embedding_column = fc_old.embedding_column(
- some_sparse_column, dimension=10)
- with ops.Graph().as_default():
- features = {
- 'price1': [[3.], [4.]],
- 'dense_feature': [[-1.], [4.]],
- 'sparse_feature': [['a'], ['x']],
- }
- cols_to_vars = {}
- all_cols = [price1, dense_feature_bucketized, some_embedding_column]
- fc.input_layer(features, all_cols, cols_to_vars=cols_to_vars)
- self.assertItemsEqual(list(cols_to_vars.keys()), all_cols)
- self.assertEqual(0, len(cols_to_vars[price1]))
- self.assertEqual(0, len(cols_to_vars[dense_feature_bucketized]))
- self.assertEqual(1, len(cols_to_vars[some_embedding_column]))
- self.assertIsInstance(cols_to_vars[some_embedding_column][0],
- variables_lib.Variable)
- self.assertAllEqual(cols_to_vars[some_embedding_column][0].shape, [5, 10])
-
- def test_fills_cols_to_vars_partitioned_variables(self):
- price1 = fc_old.numeric_column('price1')
- dense_feature = fc_old.numeric_column('dense_feature')
- dense_feature_bucketized = fc_old.bucketized_column(
- dense_feature, boundaries=[0.])
- some_sparse_column = fc_old.categorical_column_with_hash_bucket(
- 'sparse_feature', hash_bucket_size=5)
- some_embedding_column = fc_old.embedding_column(
- some_sparse_column, dimension=10)
+ def test_cols_to_output_tensors(self):
+ price1 = fc.numeric_column('price1', shape=2)
+ price2 = fc.numeric_column('price2')
with ops.Graph().as_default():
- features = {
- 'price1': [[3.], [4.]],
- 'dense_feature': [[-1.], [4.]],
- 'sparse_feature': [['a'], ['x']],
- }
- cols_to_vars = {}
- all_cols = [price1, dense_feature_bucketized, some_embedding_column]
- with variable_scope.variable_scope(
- 'input_from_feature_columns',
- partitioner=partitioned_variables.fixed_size_partitioner(3, axis=0)):
- fc.input_layer(features, all_cols, cols_to_vars=cols_to_vars)
- self.assertItemsEqual(list(cols_to_vars.keys()), all_cols)
- self.assertEqual(0, len(cols_to_vars[price1]))
- self.assertEqual(0, len(cols_to_vars[dense_feature_bucketized]))
- self.assertEqual(3, len(cols_to_vars[some_embedding_column]))
- self.assertAllEqual(cols_to_vars[some_embedding_column][0].shape, [2, 10])
- self.assertAllEqual(cols_to_vars[some_embedding_column][1].shape, [2, 10])
- self.assertAllEqual(cols_to_vars[some_embedding_column][2].shape, [1, 10])
+ cols_dict = {}
+ features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]}
+ feature_layer = FeatureLayer([price1, price2])
+ net = feature_layer(features, cols_dict)
+ with _initialized_session():
+ self.assertAllClose([[1., 2.], [5., 6.]], cols_dict[price1].eval())
+ self.assertAllClose([[3.], [4.]], cols_dict[price2].eval())
+ self.assertAllClose([[1., 2., 3.], [5., 6., 4.]], net.eval())
def test_column_order(self):
- price_a = fc_old.numeric_column('price_a')
- price_b = fc_old.numeric_column('price_b')
+ price_a = fc.numeric_column('price_a')
+ price_b = fc.numeric_column('price_b')
with ops.Graph().as_default():
features = {
'price_a': [[1.]],
'price_b': [[3.]],
}
- net1 = fc.input_layer(features, [price_a, price_b])
- net2 = fc.input_layer(features, [price_b, price_a])
+ net1 = FeatureLayer([price_a, price_b])(features)
+ net2 = FeatureLayer([price_b, price_a])(features)
with _initialized_session():
self.assertAllClose([[1., 3.]], net1.eval())
self.assertAllClose([[1., 3.]], net2.eval())
def test_fails_for_categorical_column(self):
- animal = fc_old.categorical_column_with_identity('animal', num_buckets=4)
+ animal = fc.categorical_column_with_identity('animal', num_buckets=4)
with ops.Graph().as_default():
features = {
'animal':
sparse_tensor.SparseTensor(
indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2])
}
- with self.assertRaisesRegexp(Exception, 'must be a _DenseColumn'):
- fc.input_layer(features, [animal])
+ with self.assertRaisesRegexp(Exception, 'must be a DenseColumn'):
+ FeatureLayer([animal])(features)
def test_static_batch_size_mismatch(self):
- price1 = fc_old.numeric_column('price1')
- price2 = fc_old.numeric_column('price2')
+ price1 = fc.numeric_column('price1')
+ price2 = fc.numeric_column('price2')
with ops.Graph().as_default():
features = {
'price1': [[1.], [5.], [7.]], # batchsize = 3
@@ -2926,12 +2864,12 @@ class FunctionalInputLayerTest(test.TestCase):
with self.assertRaisesRegexp(
ValueError,
'Batch size \(first dimension\) of each feature must be same.'): # pylint: disable=anomalous-backslash-in-string
- fc.input_layer(features, [price1, price2])
+ FeatureLayer([price1, price2])(features)
def test_subset_of_static_batch_size_mismatch(self):
- price1 = fc_old.numeric_column('price1')
- price2 = fc_old.numeric_column('price2')
- price3 = fc_old.numeric_column('price3')
+ price1 = fc.numeric_column('price1')
+ price2 = fc.numeric_column('price2')
+ price3 = fc.numeric_column('price3')
with ops.Graph().as_default():
features = {
'price1': array_ops.placeholder(dtype=dtypes.int64), # batchsize = 3
@@ -2941,31 +2879,31 @@ class FunctionalInputLayerTest(test.TestCase):
with self.assertRaisesRegexp(
ValueError,
'Batch size \(first dimension\) of each feature must be same.'): # pylint: disable=anomalous-backslash-in-string
- fc.input_layer(features, [price1, price2, price3])
+ FeatureLayer([price1, price2, price3])(features)
def test_runtime_batch_size_mismatch(self):
- price1 = fc_old.numeric_column('price1')
- price2 = fc_old.numeric_column('price2')
+ price1 = fc.numeric_column('price1')
+ price2 = fc.numeric_column('price2')
with ops.Graph().as_default():
features = {
'price1': array_ops.placeholder(dtype=dtypes.int64), # batchsize = 3
'price2': [[3.], [4.]] # batchsize = 2
}
- net = fc.input_layer(features, [price1, price2])
+ net = FeatureLayer([price1, price2])(features)
with _initialized_session() as sess:
with self.assertRaisesRegexp(errors.OpError,
'Dimensions of inputs should match'):
sess.run(net, feed_dict={features['price1']: [[1.], [5.], [7.]]})
def test_runtime_batch_size_matches(self):
- price1 = fc_old.numeric_column('price1')
- price2 = fc_old.numeric_column('price2')
+ price1 = fc.numeric_column('price1')
+ price2 = fc.numeric_column('price2')
with ops.Graph().as_default():
features = {
'price1': array_ops.placeholder(dtype=dtypes.int64), # batchsize = 2
'price2': array_ops.placeholder(dtype=dtypes.int64), # batchsize = 2
}
- net = fc.input_layer(features, [price1, price2])
+ net = FeatureLayer([price1, price2])(features)
with _initialized_session() as sess:
sess.run(
net,
@@ -2975,9 +2913,9 @@ class FunctionalInputLayerTest(test.TestCase):
})
def test_multiple_layers_with_same_embedding_column(self):
- some_sparse_column = fc_old.categorical_column_with_hash_bucket(
+ some_sparse_column = fc.categorical_column_with_hash_bucket(
'sparse_feature', hash_bucket_size=5)
- some_embedding_column = fc_old.embedding_column(
+ some_embedding_column = fc.embedding_column(
some_sparse_column, dimension=10)
with ops.Graph().as_default():
@@ -2985,28 +2923,30 @@ class FunctionalInputLayerTest(test.TestCase):
'sparse_feature': [['a'], ['x']],
}
all_cols = [some_embedding_column]
- fc.input_layer(features, all_cols)
- fc.input_layer(features, all_cols)
+ FeatureLayer(all_cols)(features)
+ FeatureLayer(all_cols)(features)
# Make sure that 2 variables get created in this case.
self.assertEqual(2, len(
ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)))
expected_var_names = [
- 'input_layer/sparse_feature_embedding/embedding_weights:0',
- 'input_layer_1/sparse_feature_embedding/embedding_weights:0'
+ 'feature_layer/sparse_feature_embedding/embedding_weights:0',
+ 'feature_layer_1/sparse_feature_embedding/embedding_weights:0'
]
self.assertItemsEqual(
expected_var_names,
[v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])
def test_multiple_layers_with_same_shared_embedding_column(self):
- categorical_column_a = fc_old.categorical_column_with_identity(
+ categorical_column_a = fc.categorical_column_with_identity(
key='aaa', num_buckets=3)
- categorical_column_b = fc_old.categorical_column_with_identity(
+ categorical_column_b = fc.categorical_column_with_identity(
key='bbb', num_buckets=3)
embedding_dimension = 2
- embedding_column_b, embedding_column_a = fc_old.shared_embedding_columns(
+ embedding_column_b, embedding_column_a = fc.shared_embedding_columns_v2(
[categorical_column_b, categorical_column_a],
dimension=embedding_dimension)
+ shared_state_manager = fc.SharedEmbeddingStateManager(
+ name='shared_feature_layer')
with ops.Graph().as_default():
features = {
@@ -3022,27 +2962,33 @@ class FunctionalInputLayerTest(test.TestCase):
dense_shape=(2, 2)),
}
all_cols = [embedding_column_a, embedding_column_b]
- fc.input_layer(features, all_cols)
- fc.input_layer(features, all_cols)
+ FeatureLayer(
+ all_cols, shared_state_manager=shared_state_manager)(
+ features)
+ FeatureLayer(
+ all_cols, shared_state_manager=shared_state_manager)(
+ features)
# Make sure that only 1 variable gets created in this case.
self.assertEqual(1, len(
ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)))
self.assertItemsEqual(
- ['input_layer/aaa_bbb_shared_embedding/embedding_weights:0'],
+ ['shared_feature_layer/aaa_bbb_shared_embedding:0'],
[v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])
def test_multiple_layers_with_same_shared_embedding_column_diff_graphs(self):
- categorical_column_a = fc_old.categorical_column_with_identity(
+ categorical_column_a = fc.categorical_column_with_identity(
key='aaa', num_buckets=3)
- categorical_column_b = fc_old.categorical_column_with_identity(
+ categorical_column_b = fc.categorical_column_with_identity(
key='bbb', num_buckets=3)
embedding_dimension = 2
- embedding_column_b, embedding_column_a = fc_old.shared_embedding_columns(
+ embedding_column_b, embedding_column_a = fc.shared_embedding_columns_v2(
[categorical_column_b, categorical_column_a],
dimension=embedding_dimension)
all_cols = [embedding_column_a, embedding_column_b]
with ops.Graph().as_default():
+ shared_state_manager1 = fc.SharedEmbeddingStateManager(
+ name='shared_feature_layer')
features = {
'aaa':
sparse_tensor.SparseTensor(
@@ -3055,12 +3001,16 @@ class FunctionalInputLayerTest(test.TestCase):
values=(1, 2, 1),
dense_shape=(2, 2)),
}
- fc.input_layer(features, all_cols)
+ FeatureLayer(
+ all_cols, shared_state_manager=shared_state_manager1)(
+ features)
# Make sure that only 1 variable gets created in this case.
self.assertEqual(1, len(
ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)))
with ops.Graph().as_default():
+ shared_state_manager2 = fc.SharedEmbeddingStateManager(
+ name='shared_feature_layer')
features1 = {
'aaa':
sparse_tensor.SparseTensor(
@@ -3074,12 +3024,14 @@ class FunctionalInputLayerTest(test.TestCase):
dense_shape=(2, 2)),
}
- fc.input_layer(features1, all_cols)
+ FeatureLayer(
+ all_cols, shared_state_manager=shared_state_manager2)(
+ features1)
# Make sure that only 1 variable gets created in this case.
self.assertEqual(1, len(
ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)))
self.assertItemsEqual(
- ['input_layer/aaa_bbb_shared_embedding/embedding_weights:0'],
+ ['shared_feature_layer/aaa_bbb_shared_embedding:0'],
[v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])
def test_with_numpy_input_fn(self):
@@ -3092,14 +3044,14 @@ class FunctionalInputLayerTest(test.TestCase):
del shape, dtype, partition_info
return embedding_values
- # price has 1 dimension in input_layer
- price = fc_old.numeric_column('price')
- body_style = fc_old.categorical_column_with_vocabulary_list(
+ # price has 1 dimension in feature_layer
+ price = fc.numeric_column('price')
+ body_style = fc.categorical_column_with_vocabulary_list(
'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
- # one_hot_body_style has 3 dims in input_layer.
- one_hot_body_style = fc_old.indicator_column(body_style)
- # embedded_body_style has 5 dims in input_layer.
- embedded_body_style = fc_old.embedding_column(
+ # one_hot_body_style has 3 dims in feature_layer.
+ one_hot_body_style = fc.indicator_column(body_style)
+ # embedded_body_style has 5 dims in feature_layer.
+ embedded_body_style = fc.embedding_column(
body_style, dimension=5, initializer=_initializer)
input_fn = numpy_io.numpy_input_fn(
@@ -3110,8 +3062,8 @@ class FunctionalInputLayerTest(test.TestCase):
batch_size=2,
shuffle=False)
features = input_fn()
- net = fc.input_layer(features,
- [price, one_hot_body_style, embedded_body_style])
+ net = FeatureLayer([price, one_hot_body_style, embedded_body_style])(
+ features)
self.assertEqual(1 + 3 + 5, net.shape[1])
with _initialized_session() as sess:
coord = coordinator.Coordinator()
@@ -3137,18 +3089,18 @@ class FunctionalInputLayerTest(test.TestCase):
del shape, dtype, partition_info
return embedding_values
- # price has 1 dimension in input_layer
- price = fc_old.numeric_column('price')
+ # price has 1 dimension in feature_layer
+ price = fc.numeric_column('price')
- # one_hot_body_style has 3 dims in input_layer.
- body_style = fc_old.categorical_column_with_vocabulary_list(
+ # one_hot_body_style has 3 dims in feature_layer.
+ body_style = fc.categorical_column_with_vocabulary_list(
'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
- one_hot_body_style = fc_old.indicator_column(body_style)
+ one_hot_body_style = fc.indicator_column(body_style)
- # embedded_body_style has 5 dims in input_layer.
- country = fc_old.categorical_column_with_vocabulary_list(
+ # embedded_body_style has 5 dims in feature_layer.
+ country = fc.categorical_column_with_vocabulary_list(
'country', vocabulary_list=['US', 'JP', 'CA'])
- embedded_country = fc_old.embedding_column(
+ embedded_country = fc.embedding_column(
country, dimension=5, initializer=_initializer)
# Provides 1-dim tensor and dense tensor.
@@ -3165,8 +3117,7 @@ class FunctionalInputLayerTest(test.TestCase):
self.assertEqual(1, features['body-style'].dense_shape.get_shape()[0])
self.assertEqual(1, features['country'].shape.ndims)
- net = fc.input_layer(features,
- [price, one_hot_body_style, embedded_country])
+ net = FeatureLayer([price, one_hot_body_style, embedded_country])(features)
self.assertEqual(1 + 3 + 5, net.shape[1])
with _initialized_session() as sess:
@@ -3187,18 +3138,18 @@ class FunctionalInputLayerTest(test.TestCase):
del shape, dtype, partition_info
return embedding_values
- # price has 1 dimension in input_layer
- price = fc_old.numeric_column('price')
+ # price has 1 dimension in feature_layer
+ price = fc.numeric_column('price')
- # one_hot_body_style has 3 dims in input_layer.
- body_style = fc_old.categorical_column_with_vocabulary_list(
+ # one_hot_body_style has 3 dims in feature_layer.
+ body_style = fc.categorical_column_with_vocabulary_list(
'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
- one_hot_body_style = fc_old.indicator_column(body_style)
+ one_hot_body_style = fc.indicator_column(body_style)
- # embedded_body_style has 5 dims in input_layer.
- country = fc_old.categorical_column_with_vocabulary_list(
+ # embedded_body_style has 5 dims in feature_layer.
+ country = fc.categorical_column_with_vocabulary_list(
'country', vocabulary_list=['US', 'JP', 'CA'])
- embedded_country = fc_old.embedding_column(
+ embedded_country = fc.embedding_column(
country, dimension=2, initializer=_initializer)
# Provides 1-dim tensor and dense tensor.
@@ -3219,8 +3170,7 @@ class FunctionalInputLayerTest(test.TestCase):
dense_shape=(2,))
country_data = np.array([['US'], ['CA']])
- net = fc.input_layer(features,
- [price, one_hot_body_style, embedded_country])
+ net = FeatureLayer([price, one_hot_body_style, embedded_country])(features)
self.assertEqual(1 + 3 + 2, net.shape[1])
with _initialized_session() as sess:
@@ -3237,8 +3187,8 @@ class FunctionalInputLayerTest(test.TestCase):
}))
def test_with_rank_0_feature(self):
- # price has 1 dimension in input_layer
- price = fc_old.numeric_column('price')
+ # price has 1 dimension in feature_layer
+ price = fc.numeric_column('price')
features = {
'price': constant_op.constant(0),
}
@@ -3246,13 +3196,13 @@ class FunctionalInputLayerTest(test.TestCase):
# Static rank 0 should fail
with self.assertRaisesRegexp(ValueError, 'Feature .* cannot have rank 0'):
- fc.input_layer(features, [price])
+ FeatureLayer([price])(features)
# Dynamic rank 0 should fail
features = {
'price': array_ops.placeholder(dtypes.float32),
}
- net = fc.input_layer(features, [price])
+ net = FeatureLayer([price])(features)
self.assertEqual(1, net.shape[1])
with _initialized_session() as sess:
with self.assertRaisesOpError('Feature .* cannot have rank 0'):
@@ -3267,7 +3217,7 @@ class MakeParseExampleSpecTest(test.TestCase):
@property
def name(self):
- return "_TestFeatureColumn"
+ return '_TestFeatureColumn'
def transform_feature(self, transformation_cache, state_manager):
pass
@@ -3593,25 +3543,6 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
dense_shape=inputs.dense_shape),
id_tensor.eval())
- def DISABLED_test_get_sparse_tensors_weight_collections(self):
- column = fc.categorical_column_with_vocabulary_file(
- key='aaa',
- vocabulary_file=self._wire_vocabulary_file_name,
- vocabulary_size=self._wire_vocabulary_size)
- inputs = sparse_tensor.SparseTensor(
- values=['omar', 'stringer', 'marlo'],
- indices=[[0, 0], [1, 0], [1, 1]],
- dense_shape=[2, 2])
- column.get_sparse_tensors(
- FeatureTransformationCache({
- 'aaa': inputs
- }),
- weight_collections=('my_weights',))
-
- self.assertItemsEqual(
- [], ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES))
- self.assertItemsEqual([], ops.get_collection('my_weights'))
-
def test_get_sparse_tensors_dense_input(self):
column = fc.categorical_column_with_vocabulary_file(
key='aaa',
@@ -4043,24 +3974,6 @@ class VocabularyListCategoricalColumnTest(test.TestCase):
dense_shape=inputs.dense_shape),
id_tensor.eval())
- def DISABLED_test_get_sparse_tensors_weight_collections(self):
- column = fc.categorical_column_with_vocabulary_list(
- key='aaa',
- vocabulary_list=('omar', 'stringer', 'marlo'))
- inputs = sparse_tensor.SparseTensor(
- values=['omar', 'stringer', 'marlo'],
- indices=[[0, 0], [1, 0], [1, 1]],
- dense_shape=[2, 2])
- column.get_sparse_tensors(
- FeatureTransformationCache({
- 'aaa': inputs
- }),
- weight_collections=('my_weights',))
-
- self.assertItemsEqual(
- [], ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES))
- self.assertItemsEqual([], ops.get_collection('my_weights'))
-
def test_get_sparse_tensors_dense_input(self):
column = fc.categorical_column_with_vocabulary_list(
key='aaa',
@@ -4356,22 +4269,6 @@ class IdentityCategoricalColumnTest(test.TestCase):
dense_shape=inputs.dense_shape),
id_tensor.eval())
- def DISABLED_test_get_sparse_tensors_weight_collections(self):
- column = fc.categorical_column_with_identity(key='aaa', num_buckets=3)
- inputs = sparse_tensor.SparseTensorValue(
- indices=((0, 0), (1, 0), (1, 1)),
- values=(0, 1, 0),
- dense_shape=(2, 2))
- column.get_sparse_tensors(
- FeatureTransformationCache({
- 'aaa': inputs
- }),
- weight_collections=('my_weights',))
-
- self.assertItemsEqual(
- [], ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES))
- self.assertItemsEqual([], ops.get_collection('my_weights'))
-
def test_get_sparse_tensors_dense_input(self):
column = fc.categorical_column_with_identity(key='aaa', num_buckets=3)
id_weight_pair = column.get_sparse_tensors(
@@ -4765,16 +4662,16 @@ class IndicatorColumnTest(test.TestCase):
weight_var.assign([[1.], [2.], [3.], [4.]]).eval()
self.assertAllClose([[2. + 3.]], predictions.eval())
- def test_input_layer(self):
- animal = fc_old.indicator_column(
- fc_old.categorical_column_with_identity('animal', num_buckets=4))
+ def test_feature_layer(self):
+ animal = fc.indicator_column(
+ fc.categorical_column_with_identity('animal', num_buckets=4))
with ops.Graph().as_default():
features = {
'animal':
sparse_tensor.SparseTensor(
indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2])
}
- net = fc.input_layer(features, [animal])
+ net = FeatureLayer([animal])(features)
with _initialized_session():
self.assertAllClose([[0., 1., 1., 0.]], net.eval())
@@ -4786,12 +4683,13 @@ class _TestStateManager(StateManager):
self._all_variables = {}
self._trainable = trainable
- def get_variable(self,
- feature_column,
- name,
- shape,
- dtype=None,
- initializer=None):
+ def create_variable(self,
+ feature_column,
+ name,
+ shape,
+ dtype=None,
+ trainable=True,
+ initializer=None):
if feature_column not in self._all_variables:
self._all_variables[feature_column] = {}
var_dict = self._all_variables[feature_column]
@@ -4801,11 +4699,19 @@ class _TestStateManager(StateManager):
var = variable_scope.get_variable(
name=name,
shape=shape,
- initializer=initializer,
- trainable=self._trainable)
+ dtype=dtype,
+ trainable=self._trainable and trainable,
+ initializer=initializer)
var_dict[name] = var
return var
+ def get_variable(self, feature_column, name):
+ if feature_column not in self._all_variables:
+ raise ValueError('Do not recognize FeatureColumn.')
+ if name in self._all_variables[feature_column]:
+ return self._all_variables[feature_column][name]
+ raise ValueError('Could not find variable.')
+
class EmbeddingColumnTest(test.TestCase):
@@ -4967,6 +4873,7 @@ class EmbeddingColumnTest(test.TestCase):
categorical_column, dimension=embedding_dimension,
initializer=_initializer)
state_manager = _TestStateManager()
+ embedding_column.create_state(state_manager)
# Provide sparse input and get dense result.
embedding_lookup = embedding_column.get_dense_tensor(
@@ -5028,6 +4935,7 @@ class EmbeddingColumnTest(test.TestCase):
categorical_column, dimension=embedding_dimension,
initializer=_initializer)
state_manager = _TestStateManager()
+ embedding_column.create_state(state_manager)
# Provide sparse input and get dense result.
embedding_lookup = embedding_column.get_dense_tensor(
@@ -5043,36 +4951,6 @@ class EmbeddingColumnTest(test.TestCase):
self.assertAllEqual(embedding_values, global_vars[0].eval())
self.assertAllEqual(expected_lookups, embedding_lookup.eval())
- def DISABLED_test_get_dense_tensor_weight_collections(self):
- sparse_input = sparse_tensor.SparseTensorValue(
- # example 0, ids [2]
- # example 1, ids [0, 1]
- # example 2, ids []
- # example 3, ids [1]
- indices=((0, 0), (1, 0), (1, 4), (3, 0)),
- values=(2, 0, 1, 1),
- dense_shape=(4, 5))
-
- # Build columns.
- categorical_column = fc.categorical_column_with_identity(
- key='aaa', num_buckets=3)
- embedding_column = fc.embedding_column(categorical_column, dimension=2)
-
- # Provide sparse input and get dense result.
- embedding_column.get_dense_tensor(
- FeatureTransformationCache({
- 'aaa': sparse_input
- }),
- weight_collections=('my_vars',))
-
- # Assert expected embedding variable and lookups.
- global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
- self.assertItemsEqual(('embedding_weights:0',),
- tuple([v.name for v in global_vars]))
- my_vars = ops.get_collection('my_vars')
- self.assertItemsEqual(
- ('embedding_weights:0',), tuple([v.name for v in my_vars]))
-
def test_get_dense_tensor_placeholder_inputs(self):
# Inputs.
vocabulary_size = 3
@@ -5117,6 +4995,7 @@ class EmbeddingColumnTest(test.TestCase):
categorical_column, dimension=embedding_dimension,
initializer=_initializer)
state_manager = _TestStateManager()
+ embedding_column.create_state(state_manager)
# Provide sparse input and get dense result.
input_indices = array_ops.placeholder(dtype=dtypes.int64)
@@ -5187,6 +5066,7 @@ class EmbeddingColumnTest(test.TestCase):
ckpt_to_load_from=ckpt_path,
tensor_name_in_ckpt=ckpt_tensor)
state_manager = _TestStateManager()
+ embedding_column.create_state(state_manager)
# Provide sparse input and get dense result.
embedding_lookup = embedding_column.get_dense_tensor(
@@ -5354,7 +5234,7 @@ class EmbeddingColumnTest(test.TestCase):
# = [4*7 + 6*11, 4*2 + 6*3.5, 4*0 + 6*0, 4*3 + 6*5] = [94, 29, 0, 42]
self.assertAllClose(((94.,), (29.,), (0.,), (42.,)), predictions.eval())
- def test_input_layer(self):
+ def test_feature_layer(self):
# Inputs.
vocabulary_size = 3
sparse_input = sparse_tensor.SparseTensorValue(
@@ -5392,30 +5272,29 @@ class EmbeddingColumnTest(test.TestCase):
)
# Build columns.
- categorical_column = fc_old.categorical_column_with_identity(
+ categorical_column = fc.categorical_column_with_identity(
key='aaa', num_buckets=vocabulary_size)
- embedding_column = fc_old.embedding_column(
+ embedding_column = fc.embedding_column(
categorical_column,
dimension=embedding_dimension,
initializer=_initializer)
# Provide sparse input and get dense result.
- input_layer = fc.input_layer({'aaa': sparse_input}, (embedding_column,))
+ l = FeatureLayer((embedding_column,))
+ feature_layer = l({'aaa': sparse_input})
# Assert expected embedding variable and lookups.
global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
- self.assertItemsEqual(
- ('input_layer/aaa_embedding/embedding_weights:0',),
- tuple([v.name for v in global_vars]))
+ self.assertItemsEqual(('feature_layer/aaa_embedding/embedding_weights:0',),
+ tuple([v.name for v in global_vars]))
trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
- self.assertItemsEqual(
- ('input_layer/aaa_embedding/embedding_weights:0',),
- tuple([v.name for v in trainable_vars]))
+ self.assertItemsEqual(('feature_layer/aaa_embedding/embedding_weights:0',),
+ tuple([v.name for v in trainable_vars]))
with _initialized_session():
self.assertAllEqual(embedding_values, trainable_vars[0].eval())
- self.assertAllEqual(expected_lookups, input_layer.eval())
+ self.assertAllEqual(expected_lookups, feature_layer.eval())
- def test_input_layer_not_trainable(self):
+ def test_feature_layer_not_trainable(self):
# Inputs.
vocabulary_size = 3
sparse_input = sparse_tensor.SparseTensorValue(
@@ -5453,65 +5332,26 @@ class EmbeddingColumnTest(test.TestCase):
)
# Build columns.
- categorical_column = fc_old.categorical_column_with_identity(
+ categorical_column = fc.categorical_column_with_identity(
key='aaa', num_buckets=vocabulary_size)
- embedding_column = fc_old.embedding_column(
+ embedding_column = fc.embedding_column(
categorical_column,
dimension=embedding_dimension,
initializer=_initializer,
trainable=False)
# Provide sparse input and get dense result.
- input_layer = fc.input_layer({'aaa': sparse_input}, (embedding_column,))
+ feature_layer = FeatureLayer((embedding_column,))({'aaa': sparse_input})
# Assert expected embedding variable and lookups.
global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
- self.assertItemsEqual(
- ('input_layer/aaa_embedding/embedding_weights:0',),
- tuple([v.name for v in global_vars]))
+ self.assertItemsEqual(('feature_layer/aaa_embedding/embedding_weights:0',),
+ tuple([v.name for v in global_vars]))
self.assertItemsEqual(
[], ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES))
with _initialized_session():
self.assertAllEqual(embedding_values, global_vars[0].eval())
- self.assertAllEqual(expected_lookups, input_layer.eval())
-
-
-class _TestSharedEmbeddingStateManager(StateManager):
- """Manages the state for shared embedding columns.
-
- This can handle multiple groups of shared embedding columns.
- """
-
- def __init__(self, trainable=True):
- # Dict of shared_embedding_collection_name to a dict of variables.
- self._all_variables = {}
- self._trainable = trainable
-
- def get_variable(self,
- feature_column,
- name,
- shape,
- dtype=None,
- initializer=None):
- if not isinstance(feature_column, fc.SharedEmbeddingColumn):
- raise ValueError(
- 'SharedEmbeddingStateManager can only handle SharedEmbeddingColumns. '
- 'Given type: {} '.format(type(feature_column)))
-
- collection_name = feature_column.shared_collection_name
- if collection_name not in self._all_variables:
- self._all_variables[collection_name] = {}
- var_dict = self._all_variables[collection_name]
- if name in var_dict:
- return var_dict[name]
- else:
- var = variable_scope.get_variable(
- name=name,
- shape=shape,
- initializer=initializer,
- trainable=self._trainable)
- var_dict[name] = var
- return var
+ self.assertAllEqual(expected_lookups, feature_layer.eval())
class SharedEmbeddingColumnTest(test.TestCase):
@@ -5522,7 +5362,7 @@ class SharedEmbeddingColumnTest(test.TestCase):
categorical_column_b = fc.categorical_column_with_identity(
key='bbb', num_buckets=3)
embedding_dimension = 2
- embedding_column_b, embedding_column_a = fc.shared_embedding_columns(
+ embedding_column_b, embedding_column_a = fc.shared_embedding_columns_v2(
[categorical_column_b, categorical_column_a],
dimension=embedding_dimension)
self.assertIs(categorical_column_a, embedding_column_a.categorical_column)
@@ -5560,7 +5400,7 @@ class SharedEmbeddingColumnTest(test.TestCase):
categorical_column_b = fc.categorical_column_with_identity(
key='bbb', num_buckets=3)
embedding_dimension = 2
- embedding_column_a, embedding_column_b = fc.shared_embedding_columns(
+ embedding_column_a, embedding_column_b = fc.shared_embedding_columns_v2(
[categorical_column_a, categorical_column_b],
dimension=embedding_dimension,
combiner='my_combiner',
@@ -5605,7 +5445,7 @@ class SharedEmbeddingColumnTest(test.TestCase):
categorical_column_b = fc.categorical_column_with_identity(
key='bbb', num_buckets=3)
embedding_dimension = 2
- original_a, _ = fc.shared_embedding_columns(
+ original_a, _ = fc.shared_embedding_columns_v2(
[categorical_column_a, categorical_column_b],
dimension=embedding_dimension,
combiner='my_combiner',
@@ -5613,7 +5453,8 @@ class SharedEmbeddingColumnTest(test.TestCase):
shared_embedding_collection_name='shared_embedding_collection_name',
ckpt_to_load_from='my_ckpt',
tensor_name_in_ckpt='my_ckpt_tensor',
- max_norm=42., trainable=False)
+ max_norm=42.,
+ trainable=False)
for embedding_column_a in (original_a, copy.deepcopy(original_a)):
self.assertEqual('aaa', embedding_column_a.categorical_column.name)
self.assertEqual(3, embedding_column_a.categorical_column.num_buckets)
@@ -5642,8 +5483,9 @@ class SharedEmbeddingColumnTest(test.TestCase):
categorical_column_b = fc.categorical_column_with_identity(
key='bbb', num_buckets=3)
with self.assertRaisesRegexp(ValueError, 'initializer must be callable'):
- fc.shared_embedding_columns(
- [categorical_column_a, categorical_column_b], dimension=2,
+ fc.shared_embedding_columns_v2(
+ [categorical_column_a, categorical_column_b],
+ dimension=2,
initializer='not_fn')
def test_incompatible_column_type(self):
@@ -5656,7 +5498,7 @@ class SharedEmbeddingColumnTest(test.TestCase):
with self.assertRaisesRegexp(
ValueError, 'all categorical_columns must have the same type.*'
'IdentityCategoricalColumn.*HashedCategoricalColumn'):
- fc.shared_embedding_columns(
+ fc.shared_embedding_columns_v2(
[categorical_column_a, categorical_column_b, categorical_column_c],
dimension=2)
@@ -5669,11 +5511,11 @@ class SharedEmbeddingColumnTest(test.TestCase):
key='bbb', num_buckets=3)
weighted_categorical_column_b = fc.weighted_categorical_column(
categorical_column_b, weight_feature_key='bbb_weights')
- fc.shared_embedding_columns(
+ fc.shared_embedding_columns_v2(
[weighted_categorical_column_a, categorical_column_b], dimension=2)
- fc.shared_embedding_columns(
+ fc.shared_embedding_columns_v2(
[categorical_column_a, weighted_categorical_column_b], dimension=2)
- fc.shared_embedding_columns(
+ fc.shared_embedding_columns_v2(
[weighted_categorical_column_a, weighted_categorical_column_b],
dimension=2)
@@ -5682,8 +5524,7 @@ class SharedEmbeddingColumnTest(test.TestCase):
key='aaa', vocabulary_list=('omar', 'stringer', 'marlo'))
b = fc.categorical_column_with_vocabulary_list(
key='bbb', vocabulary_list=('omar', 'stringer', 'marlo'))
- a_embedded, b_embedded = fc.shared_embedding_columns(
- [a, b], dimension=2)
+ a_embedded, b_embedded = fc.shared_embedding_columns_v2([a, b], dimension=2)
data = example_pb2.Example(features=feature_pb2.Features(
feature={
'aaa':
@@ -5717,8 +5558,7 @@ class SharedEmbeddingColumnTest(test.TestCase):
def test_transform_feature(self):
a = fc.categorical_column_with_identity(key='aaa', num_buckets=3)
b = fc.categorical_column_with_identity(key='bbb', num_buckets=3)
- a_embedded, b_embedded = fc.shared_embedding_columns(
- [a, b], dimension=2)
+ a_embedded, b_embedded = fc.shared_embedding_columns_v2([a, b], dimension=2)
features = {
'aaa': sparse_tensor.SparseTensor(
indices=((0, 0), (1, 0), (1, 1)),
@@ -5788,10 +5628,13 @@ class SharedEmbeddingColumnTest(test.TestCase):
key='aaa', num_buckets=vocabulary_size)
categorical_column_b = fc.categorical_column_with_identity(
key='bbb', num_buckets=vocabulary_size)
- embedding_column_a, embedding_column_b = fc.shared_embedding_columns(
+ embedding_column_a, embedding_column_b = fc.shared_embedding_columns_v2(
[categorical_column_a, categorical_column_b],
- dimension=embedding_dimension, initializer=_initializer)
- state_manager = _TestSharedEmbeddingStateManager()
+ dimension=embedding_dimension,
+ initializer=_initializer)
+ state_manager = fc.SharedEmbeddingStateManager(name='shared_feature_layer')
+ embedding_column_a.create_state(state_manager)
+ embedding_column_b.create_state(state_manager)
# Provide sparse input and get dense result.
embedding_lookup_a = embedding_column_a.get_dense_tensor(
@@ -5801,7 +5644,7 @@ class SharedEmbeddingColumnTest(test.TestCase):
# Assert expected embedding variable and lookups.
global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
- self.assertItemsEqual(('embedding_weights:0',),
+ self.assertItemsEqual(('shared_feature_layer/aaa_bbb_shared_embedding:0',),
tuple([v.name for v in global_vars]))
embedding_var = global_vars[0]
with _initialized_session():
@@ -5809,58 +5652,6 @@ class SharedEmbeddingColumnTest(test.TestCase):
self.assertAllEqual(expected_lookups_a, embedding_lookup_a.eval())
self.assertAllEqual(expected_lookups_b, embedding_lookup_b.eval())
- def DISABLED_test_get_dense_tensor_weight_collections(self):
- # Inputs.
- vocabulary_size = 3
- # -1 values are ignored.
- input_a = np.array([
- [2, -1, -1], # example 0, ids [2]
- [0, 1, -1]
- ]) # example 1, ids [0, 1]
- input_b = np.array([
- [0, -1, -1], # example 0, ids [0]
- [-1, -1, -1]
- ]) # example 1, ids []
- input_features = {'aaa': input_a, 'bbb': input_b}
-
- # Embedding variable.
- embedding_dimension = 2
- embedding_values = (
- (1., 2.), # id 0
- (3., 5.), # id 1
- (7., 11.) # id 2
- )
-
- def _initializer(shape, dtype, partition_info):
- self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
- self.assertEqual(dtypes.float32, dtype)
- self.assertIsNone(partition_info)
- return embedding_values
-
- # Build columns.
- categorical_column_a = fc.categorical_column_with_identity(
- key='aaa', num_buckets=vocabulary_size)
- categorical_column_b = fc.categorical_column_with_identity(
- key='bbb', num_buckets=vocabulary_size)
- embedding_column_a, embedding_column_b = fc.shared_embedding_columns(
- [categorical_column_a, categorical_column_b],
- dimension=embedding_dimension,
- initializer=_initializer)
-
- fc.input_layer(
- input_features, [embedding_column_a, embedding_column_b],
- weight_collections=('my_vars',))
-
- # Assert expected embedding variable and lookups.
- global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
- self.assertItemsEqual(
- ('input_layer/aaa_bbb_shared_embedding/embedding_weights:0',),
- tuple(v.name for v in global_vars))
- my_vars = ops.get_collection('my_vars')
- self.assertItemsEqual(
- ('input_layer/aaa_bbb_shared_embedding/embedding_weights:0',),
- tuple(v.name for v in my_vars))
-
def test_get_dense_tensor_placeholder_inputs(self):
# Inputs.
vocabulary_size = 3
@@ -5903,10 +5694,13 @@ class SharedEmbeddingColumnTest(test.TestCase):
key='aaa', num_buckets=vocabulary_size)
categorical_column_b = fc.categorical_column_with_identity(
key='bbb', num_buckets=vocabulary_size)
- embedding_column_a, embedding_column_b = fc.shared_embedding_columns(
+ embedding_column_a, embedding_column_b = fc.shared_embedding_columns_v2(
[categorical_column_a, categorical_column_b],
- dimension=embedding_dimension, initializer=_initializer)
- state_manager = _TestSharedEmbeddingStateManager()
+ dimension=embedding_dimension,
+ initializer=_initializer)
+ state_manager = fc.SharedEmbeddingStateManager()
+ embedding_column_a.create_state(state_manager)
+ embedding_column_b.create_state(state_manager)
# Provide sparse input and get dense result.
embedding_lookup_a = embedding_column_a.get_dense_tensor(
@@ -6096,7 +5890,7 @@ class SharedEmbeddingColumnTest(test.TestCase):
# = [3*1 + 5*2, 3*0 +5*0] = [13, 0]
self.assertAllClose([[94. + 13.], [29.]], predictions.eval())
- def _test_input_layer(self, trainable=True):
+ def _test_feature_layer(self, trainable=True):
# Inputs.
vocabulary_size = 3
sparse_input_a = sparse_tensor.SparseTensorValue(
@@ -6111,6 +5905,18 @@ class SharedEmbeddingColumnTest(test.TestCase):
indices=((0, 0),),
values=(0,),
dense_shape=(2, 5))
+ sparse_input_c = sparse_tensor.SparseTensorValue(
+ # example 0, ids [2]
+ # example 1, ids [0, 1]
+ indices=((0, 1), (1, 1), (1, 3)),
+ values=(2, 0, 1),
+ dense_shape=(2, 5))
+ sparse_input_d = sparse_tensor.SparseTensorValue(
+ # example 0, ids [2]
+ # example 1, ids []
+ indices=((0, 1),),
+ values=(2,),
+ dense_shape=(2, 5))
# Embedding variable.
embedding_dimension = 2
@@ -6130,51 +5936,127 @@ class SharedEmbeddingColumnTest(test.TestCase):
# example 0:
# A ids [2], embedding = [7, 11]
# B ids [0], embedding = [1, 2]
- (7., 11., 1., 2.),
+ # C ids [2], embedding = [7, 11]
+ # D ids [2], embedding = [7, 11]
+ (7., 11., 1., 2., 7., 11., 7., 11.),
# example 1:
# A ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
# B ids [], embedding = [0, 0]
- (2., 3.5, 0., 0.),
+ # C ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
+ # D ids [], embedding = [0, 0]
+ (2., 3.5, 0., 0., 2., 3.5, 0., 0.),
)
# Build columns.
- categorical_column_a = fc_old.categorical_column_with_identity(
+ categorical_column_a = fc.categorical_column_with_identity(
key='aaa', num_buckets=vocabulary_size)
- categorical_column_b = fc_old.categorical_column_with_identity(
+ categorical_column_b = fc.categorical_column_with_identity(
key='bbb', num_buckets=vocabulary_size)
- embedding_column_a, embedding_column_b = fc_old.shared_embedding_columns(
+ categorical_column_c = fc.categorical_column_with_identity(
+ key='ccc', num_buckets=vocabulary_size)
+ categorical_column_d = fc.categorical_column_with_identity(
+ key='ddd', num_buckets=vocabulary_size)
+
+ embedding_column_a, embedding_column_b = fc.shared_embedding_columns_v2(
[categorical_column_a, categorical_column_b],
dimension=embedding_dimension,
initializer=_initializer,
trainable=trainable)
+ embedding_column_c, embedding_column_d = fc.shared_embedding_columns_v2(
+ [categorical_column_c, categorical_column_d],
+ dimension=embedding_dimension,
+ initializer=_initializer,
+ trainable=trainable)
+ shared_state_manager = fc.SharedEmbeddingStateManager(
+ name='shared_feature_layer')
+
+ features = {
+ 'aaa': sparse_input_a,
+ 'bbb': sparse_input_b,
+ 'ccc': sparse_input_c,
+ 'ddd': sparse_input_d
+ }
# Provide sparse input and get dense result.
- input_layer = fc.input_layer(
- features={'aaa': sparse_input_a, 'bbb': sparse_input_b},
- feature_columns=(embedding_column_b, embedding_column_a))
+ feature_layer = FeatureLayer(
+ feature_columns=(embedding_column_b, embedding_column_a,
+ embedding_column_c, embedding_column_d),
+ shared_state_manager=shared_state_manager)(
+ features)
# Assert expected embedding variable and lookups.
global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
- self.assertItemsEqual(
- ['input_layer/aaa_bbb_shared_embedding/embedding_weights:0'],
- tuple([v.name for v in global_vars]))
+ self.assertItemsEqual([
+ 'shared_feature_layer/aaa_bbb_shared_embedding:0',
+ 'shared_feature_layer/ccc_ddd_shared_embedding:0'
+ ], tuple([v.name for v in global_vars]))
trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
if trainable:
- self.assertItemsEqual(
- ['input_layer/aaa_bbb_shared_embedding/embedding_weights:0'],
- tuple([v.name for v in trainable_vars]))
+ self.assertItemsEqual([
+ 'shared_feature_layer/aaa_bbb_shared_embedding:0',
+ 'shared_feature_layer/ccc_ddd_shared_embedding:0'
+ ], tuple([v.name for v in trainable_vars]))
else:
self.assertItemsEqual([], tuple([v.name for v in trainable_vars]))
shared_embedding_vars = global_vars
with _initialized_session():
self.assertAllEqual(embedding_values, shared_embedding_vars[0].eval())
- self.assertAllEqual(expected_lookups, input_layer.eval())
+ self.assertAllEqual(expected_lookups, feature_layer.eval())
+
+ def test_feature_layer(self):
+ self._test_feature_layer()
+
+ def test_feature_layer_no_trainable(self):
+ self._test_feature_layer(trainable=False)
+
- def test_input_layer(self):
- self._test_input_layer()
+class SharedEmbeddingStateManagerTest(test.TestCase):
- def test_input_layer_no_trainable(self):
- self._test_input_layer(trainable=False)
+ def test_basic(self):
+ categorical_column_a = fc.categorical_column_with_identity(
+ key='aaa', num_buckets=3)
+ categorical_column_b = fc.categorical_column_with_identity(
+ key='bbb', num_buckets=3)
+ fc.shared_embedding_columns_v2(
+ [categorical_column_a, categorical_column_b], dimension=2)
+ shared_state_manager = fc.SharedEmbeddingStateManager(
+ name='shared_feature_layer')
+ var_a = shared_state_manager.create_variable('aaa_bbb_shared_embedding',
+ [5, 10])
+ var_b = shared_state_manager.create_variable('aaa_bbb_shared_embedding',
+ [5, 10])
+ self.assertEqual(var_a, var_b)
+ self.assertEqual('shared_feature_layer/aaa_bbb_shared_embedding:0',
+ var_a.name)
+ self.assertIsInstance(var_a, variables_lib.Variable)
+
+ def test_multiple_sets(self):
+ categorical_column_a = fc.categorical_column_with_identity(
+ key='aaa', num_buckets=3)
+ categorical_column_b = fc.categorical_column_with_identity(
+ key='bbb', num_buckets=3)
+ categorical_column_c = fc.categorical_column_with_identity(
+ key='ccc', num_buckets=3)
+ categorical_column_d = fc.categorical_column_with_identity(
+ key='ddd', num_buckets=3)
+
+ fc.shared_embedding_columns_v2(
+ [categorical_column_a, categorical_column_b], dimension=2)
+ fc.shared_embedding_columns_v2(
+ [categorical_column_c, categorical_column_d], dimension=2)
+ shared_state_manager = fc.SharedEmbeddingStateManager(
+ name='shared_feature_layer')
+ var_a = shared_state_manager.create_variable('aaa_bbb_shared_embedding',
+ [5, 10])
+ var_c = shared_state_manager.create_variable('ccc_ddd_shared_embedding',
+ [5, 10])
+ self.assertIsInstance(var_a, variables_lib.Variable)
+ self.assertIsInstance(var_c, variables_lib.Variable)
+ self.assertNotEquals(var_a, var_c)
+ self.assertEqual('shared_feature_layer/aaa_bbb_shared_embedding:0',
+ var_a.name)
+ self.assertEqual('shared_feature_layer/ccc_ddd_shared_embedding:0',
+ var_c.name)
class WeightedCategoricalColumnTest(test.TestCase):