diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2018-03-12 21:53:54 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2018-03-12 21:58:13 -0700 |
commit | ed21fd502316634fe9d139a3ba4d4d1002137e0b (patch) | |
tree | fcfcbcd674c70291151a307ecfebae02d68f8adf /tensorflow/contrib/feature_column | |
parent | a0a11bc349e3aa28da050d10a0c509241dbee414 (diff) |
Merges (embedding|indicator)_column with their sequence counterparts, and exposes sequence methods under tf.contrib.feature_column.
PiperOrigin-RevId: 188826187
Diffstat (limited to 'tensorflow/contrib/feature_column')
4 files changed, 138 insertions, 323 deletions
diff --git a/tensorflow/contrib/feature_column/BUILD b/tensorflow/contrib/feature_column/BUILD index 8ba0823a71..3614b2b15a 100644 --- a/tensorflow/contrib/feature_column/BUILD +++ b/tensorflow/contrib/feature_column/BUILD @@ -26,6 +26,7 @@ py_library( srcs_version = "PY2AND3", deps = [ ":sequence_feature_column", + "//tensorflow/python:util", ], ) @@ -38,7 +39,6 @@ py_library( "//tensorflow/python:check_ops", "//tensorflow/python:dtypes", "//tensorflow/python:framework_ops", - "//tensorflow/python:math_ops", "//tensorflow/python:parsing_ops", "//tensorflow/python:sparse_ops", "//tensorflow/python:tensor_shape", diff --git a/tensorflow/contrib/feature_column/__init__.py b/tensorflow/contrib/feature_column/__init__.py index 650a80144f..baa8c1567a 100644 --- a/tensorflow/contrib/feature_column/__init__.py +++ b/tensorflow/contrib/feature_column/__init__.py @@ -25,6 +25,12 @@ from tensorflow.python.util.all_util import remove_undocumented # pylint: enable=unused-import,line-too-long,wildcard-import _allowed_symbols = [ + 'sequence_categorical_column_with_hash_bucket', + 'sequence_categorical_column_with_identity', + 'sequence_categorical_column_with_vocabulary_list', + 'sequence_categorical_column_with_vocabulary_file', + 'sequence_input_layer', + 'sequence_numeric_column', ] remove_undocumented(__name__, allowed_exception_list=_allowed_symbols) diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py index f57557c1cc..e60116966f 100644 --- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py +++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py @@ -19,7 +19,6 @@ from __future__ import division from __future__ import print_function -import abc import collections @@ -29,7 +28,6 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops -from tensorflow.python.ops import math_ops from tensorflow.python.ops import parsing_ops from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import variable_scope @@ -99,9 +97,11 @@ def sequence_input_layer( """ feature_columns = fc._clean_feature_columns(feature_columns) for c in feature_columns: - if not isinstance(c, _SequenceDenseColumn): + if not isinstance(c, fc._SequenceDenseColumn): raise ValueError( 'All feature_columns must be of type _SequenceDenseColumn. ' + 'You can wrap a sequence_categorical_column with an embedding_column ' + 'or indicator_column. ' 'Given (type {}): {}'.format(type(c), c)) with variable_scope.variable_scope( @@ -136,6 +136,10 @@ def sequence_categorical_column_with_identity( key, num_buckets, default_value=None): """Returns a feature column that represents sequences of integers. + Pass this to `embedding_column` or `indicator_column` to convert sequence + categorical data into dense representation for input to sequence NN, such as + RNN. + Example: ```python @@ -163,7 +167,7 @@ def sequence_categorical_column_with_identity( Returns: A `_SequenceCategoricalColumn`. """ - return _SequenceCategoricalColumn( + return fc._SequenceCategoricalColumn( fc.categorical_column_with_identity( key=key, num_buckets=num_buckets, @@ -174,6 +178,10 @@ def sequence_categorical_column_with_hash_bucket( key, hash_bucket_size, dtype=dtypes.string): """A sequence of categorical terms where ids are set by hashing. + Pass this to `embedding_column` or `indicator_column` to convert sequence + categorical data into dense representation for input to sequence NN, such as + RNN. + Example: ```python @@ -198,7 +206,7 @@ def sequence_categorical_column_with_hash_bucket( Returns: A `_SequenceCategoricalColumn`. """ - return _SequenceCategoricalColumn( + return fc._SequenceCategoricalColumn( fc.categorical_column_with_hash_bucket( key=key, hash_bucket_size=hash_bucket_size, @@ -210,6 +218,10 @@ def sequence_categorical_column_with_vocabulary_file( default_value=None, dtype=dtypes.string): """A sequence of categorical terms where ids use a vocabulary file. + Pass this to `embedding_column` or `indicator_column` to convert sequence + categorical data into dense representation for input to sequence NN, such as + RNN. + Example: ```python @@ -246,7 +258,7 @@ def sequence_categorical_column_with_vocabulary_file( Returns: A `_SequenceCategoricalColumn`. """ - return _SequenceCategoricalColumn( + return fc._SequenceCategoricalColumn( fc.categorical_column_with_vocabulary_file( key=key, vocabulary_file=vocabulary_file, @@ -260,6 +272,10 @@ def sequence_categorical_column_with_vocabulary_list( key, vocabulary_list, dtype=None, default_value=-1, num_oov_buckets=0): """A sequence of categorical terms where ids use an in-memory list. + Pass this to `embedding_column` or `indicator_column` to convert sequence + categorical data into dense representation for input to sequence NN, such as + RNN. + Example: ```python @@ -296,7 +312,7 @@ def sequence_categorical_column_with_vocabulary_list( Returns: A `_SequenceCategoricalColumn`. """ - return _SequenceCategoricalColumn( + return fc._SequenceCategoricalColumn( fc.categorical_column_with_vocabulary_list( key=key, vocabulary_list=vocabulary_list, @@ -305,108 +321,6 @@ def sequence_categorical_column_with_vocabulary_list( num_oov_buckets=num_oov_buckets)) -# TODO(b/73160931): Merge with embedding_column -def _sequence_embedding_column( - categorical_column, dimension, initializer=None, ckpt_to_load_from=None, - tensor_name_in_ckpt=None, max_norm=None, trainable=True): - """Returns a feature column that represents sequences of embeddings. - - Use this to convert sequence categorical data into dense representation for - input to sequence NN, such as RNN. - - Example: - - ```python - watches = sequence_categorical_column_with_identity( - 'watches', num_buckets=1000) - watches_embedding = _sequence_embedding_column(watches, dimension=10) - columns = [watches] - - features = tf.parse_example(..., features=make_parse_example_spec(columns)) - input_layer, sequence_length = sequence_input_layer(features, columns) - - rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size) - outputs, state = tf.nn.dynamic_rnn( - rnn_cell, inputs=input_layer, sequence_length=sequence_length) - ``` - - Args: - categorical_column: A `_SequenceCategoricalColumn` created with a - `sequence_cateogrical_column_with_*` function. - dimension: Integer dimension of the embedding. - initializer: Initializer function used to initialize the embeddings. - ckpt_to_load_from: String representing checkpoint name/pattern from which to - restore column weights. Required if `tensor_name_in_ckpt` is not `None`. - tensor_name_in_ckpt: Name of the `Tensor` in `ckpt_to_load_from` from - which to restore the column weights. Required if `ckpt_to_load_from` is - not `None`. - max_norm: If not `None`, embedding values are l2-normalized to this value. - trainable: Whether or not the embedding is trainable. Default is True. - - Returns: - A `_SequenceCategoricalToDenseColumn`. - - Raises: - ValueError: If `categorical_column` is not the right type. - """ - if not isinstance(categorical_column, _SequenceCategoricalColumn): - raise ValueError( - 'categorical_column must be of type _SequenceCategoricalColumn. ' - 'Given (type {}): {}'.format( - type(categorical_column), categorical_column)) - return _SequenceCategoricalToDenseColumn( - fc.embedding_column( - categorical_column, - dimension=dimension, - initializer=initializer, - ckpt_to_load_from=ckpt_to_load_from, - tensor_name_in_ckpt=tensor_name_in_ckpt, - max_norm=max_norm, - trainable=trainable)) - - -# TODO(b/73160931): Merge with indicator_column -def _sequence_indicator_column(categorical_column): - """Returns a feature column that represents sequences of multi-hot tensors. - - Use this to convert sequence categorical data into dense representation for - input to sequence NN, such as RNN. - - Example: - - ```python - colors = sequence_categorical_column_with_vocabulary_list( - key='colors', vocabulary_list=('R', 'G', 'B', 'Y')) - colors_indicator = _sequence_indicator_column(colors) - columns = [colors] - - features = tf.parse_example(..., features=make_parse_example_spec(columns)) - input_layer, sequence_length = sequence_input_layer(features, columns) - - rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size) - outputs, state = tf.nn.dynamic_rnn( - rnn_cell, inputs=input_layer, sequence_length=sequence_length) - ``` - - Args: - categorical_column: A `_SequenceCategoricalColumn` created with a - `sequence_cateogrical_column_with_*` function. - - Returns: - A `_SequenceCategoricalToDenseColumn`. - - Raises: - ValueError: If `categorical_column` is not the right type. - """ - if not isinstance(categorical_column, _SequenceCategoricalColumn): - raise ValueError( - 'categorical_column must be of type _SequenceCategoricalColumn. ' - 'Given (type {}): {}'.format( - type(categorical_column), categorical_column)) - return _SequenceCategoricalToDenseColumn( - fc.indicator_column(categorical_column)) - - def sequence_numeric_column( key, shape=(1,), @@ -459,129 +373,8 @@ def _assert_all_equal_and_return(tensors, name=None): return array_ops.identity(tensors[0]) -class _SequenceDenseColumn(fc._FeatureColumn): - """Represents dense sequence data.""" - - __metaclass__ = abc.ABCMeta - - TensorSequenceLengthPair = collections.namedtuple( # pylint: disable=invalid-name - 'TensorSequenceLengthPair', ['dense_tensor', 'sequence_length']) - - @abc.abstractproperty - def _variable_shape(self): - """`TensorShape` without batch and sequence dimensions.""" - pass - - @abc.abstractmethod - def _get_sequence_dense_tensor( - self, inputs, weight_collections=None, trainable=None): - """Returns a `TensorSequenceLengthPair`.""" - pass - - -def _sequence_length_from_sparse_tensor(sp_tensor, num_elements=1): - with ops.name_scope(None, 'sequence_length') as name_scope: - row_ids = sp_tensor.indices[:, 0] - column_ids = sp_tensor.indices[:, 1] - column_ids += array_ops.ones_like(column_ids) - seq_length = math_ops.to_int64( - math_ops.segment_max(column_ids, segment_ids=row_ids) / num_elements) - # If the last n rows do not have ids, seq_length will have shape - # [batch_size - n]. Pad the remaining values with zeros. - n_pad = array_ops.shape(sp_tensor)[:1] - array_ops.shape(seq_length)[:1] - padding = array_ops.zeros(n_pad, dtype=seq_length.dtype) - return array_ops.concat([seq_length, padding], axis=0, name=name_scope) - - -class _SequenceCategoricalColumn( - fc._CategoricalColumn, - collections.namedtuple( - '_SequenceCategoricalColumn', ['categorical_column'])): - """Represents sequences of categorical data.""" - - @property - def name(self): - return self.categorical_column.name - - @property - def _parse_example_spec(self): - return self.categorical_column._parse_example_spec - - def _transform_feature(self, inputs): - return self.categorical_column._transform_feature(inputs) - - @property - def _num_buckets(self): - return self.categorical_column._num_buckets - - def _get_sparse_tensors(self, inputs, weight_collections=None, - trainable=None): - sparse_tensors = self.categorical_column._get_sparse_tensors(inputs) - id_tensor = sparse_tensors.id_tensor - weight_tensor = sparse_tensors.weight_tensor - # Expands final dimension, so that embeddings are not combined during - # embedding lookup. - check_id_rank = check_ops.assert_equal( - array_ops.rank(id_tensor), 2, - data=[ - 'Column {} expected ID tensor of rank 2. '.format(self.name), - 'id_tensor shape: ', array_ops.shape(id_tensor)]) - with ops.control_dependencies([check_id_rank]): - id_tensor = sparse_ops.sparse_reshape( - id_tensor, - shape=array_ops.concat([id_tensor.dense_shape, [1]], axis=0)) - if weight_tensor is not None: - check_weight_rank = check_ops.assert_equal( - array_ops.rank(weight_tensor), 2, - data=[ - 'Column {} expected weight tensor of rank 2.'.format(self.name), - 'weight_tensor shape:', array_ops.shape(weight_tensor)]) - with ops.control_dependencies([check_weight_rank]): - weight_tensor = sparse_ops.sparse_reshape( - weight_tensor, - shape=array_ops.concat([weight_tensor.dense_shape, [1]], axis=0)) - return fc._CategoricalColumn.IdWeightPair(id_tensor, weight_tensor) - - def _sequence_length(self, inputs): - sparse_tensors = self.categorical_column._get_sparse_tensors(inputs) - return _sequence_length_from_sparse_tensor(sparse_tensors.id_tensor) - - -class _SequenceCategoricalToDenseColumn( - _SequenceDenseColumn, - collections.namedtuple( - '_SequenceCategoricalToDenseColumn', ['dense_column'])): - """Densifies a _SequenceCategoricalColumn using the specified column.""" - - @property - def name(self): - return self.dense_column.name - - @property - def _parse_example_spec(self): - return self.dense_column._parse_example_spec - - def _transform_feature(self, inputs): - return self.dense_column._transform_feature(inputs) - - @property - def _variable_shape(self): - return self.dense_column._variable_shape - - def _get_sequence_dense_tensor( - self, inputs, weight_collections=None, trainable=None): - dense_tensor = self.dense_column._get_dense_tensor( - inputs=inputs, - weight_collections=weight_collections, - trainable=trainable) - sequence_length = self.dense_column.categorical_column._sequence_length( - inputs) - return _SequenceDenseColumn.TensorSequenceLengthPair( - dense_tensor=dense_tensor, sequence_length=sequence_length) - - class _SequenceNumericColumn( - _SequenceDenseColumn, + fc._SequenceDenseColumn, collections.namedtuple( '_SequenceNumericColumn', ['key', 'shape', 'default_value', 'dtype'])): @@ -616,9 +409,9 @@ class _SequenceNumericColumn( [array_ops.shape(dense_tensor)[:1], [-1], self._variable_shape], axis=0) dense_tensor = array_ops.reshape(dense_tensor, shape=dense_shape) - sequence_length = _sequence_length_from_sparse_tensor( + sequence_length = fc._sequence_length_from_sparse_tensor( sp_tensor, num_elements=self._variable_shape.num_elements()) - return _SequenceDenseColumn.TensorSequenceLengthPair( + return fc._SequenceDenseColumn.TensorSequenceLengthPair( dense_tensor=dense_tensor, sequence_length=sequence_length) # pylint: enable=protected-access diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py index c077f03291..b64f086376 100644 --- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py +++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py @@ -22,6 +22,7 @@ import os import numpy as np from tensorflow.contrib.feature_column.python.feature_column import sequence_feature_column as sfc +from tensorflow.python.feature_column import feature_column as fc from tensorflow.python.feature_column.feature_column import _LazyBuilder from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors @@ -78,12 +79,12 @@ class SequenceInputLayerTest(test.TestCase): categorical_column_a = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) - embedding_column_a = sfc._sequence_embedding_column( + embedding_column_a = fc.embedding_column( categorical_column_a, dimension=embedding_dimension_a, initializer=_get_initializer(embedding_dimension_a, embedding_values_a)) categorical_column_b = sfc.sequence_categorical_column_with_identity( key='bbb', num_buckets=vocabulary_size) - embedding_column_b = sfc._sequence_embedding_column( + embedding_column_b = fc.embedding_column( categorical_column_b, dimension=embedding_dimension_b, initializer=_get_initializer(embedding_dimension_b, embedding_values_b)) @@ -107,6 +108,29 @@ class SequenceInputLayerTest(test.TestCase): self.assertAllEqual( expected_sequence_length, sequence_length.eval(session=sess)) + def test_embedding_column_with_non_sequence_categorical(self): + """Tests that error is raised for non-sequence categorical column.""" + vocabulary_size = 3 + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 1)), + values=(2, 0, 1), + dense_shape=(2, 2)) + + categorical_column_a = fc.categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + embedding_column_a = fc.embedding_column( + categorical_column_a, dimension=2) + + with self.assertRaisesRegexp( + ValueError, + r'In embedding_column: aaa_embedding\. categorical_column must be of ' + r'type _SequenceCategoricalColumn to use sequence_input_layer\.'): + _, _ = sfc.sequence_input_layer( + features={'aaa': sparse_input}, + feature_columns=[embedding_column_a]) + def test_indicator_column(self): vocabulary_size_a = 3 sparse_input_a = sparse_tensor.SparseTensorValue( @@ -133,10 +157,10 @@ class SequenceInputLayerTest(test.TestCase): categorical_column_a = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size_a) - indicator_column_a = sfc._sequence_indicator_column(categorical_column_a) + indicator_column_a = fc.indicator_column(categorical_column_a) categorical_column_b = sfc.sequence_categorical_column_with_identity( key='bbb', num_buckets=vocabulary_size_b) - indicator_column_b = sfc._sequence_indicator_column(categorical_column_b) + indicator_column_b = fc.indicator_column(categorical_column_b) input_layer, sequence_length = sfc.sequence_input_layer( features={ 'aaa': sparse_input_a, @@ -150,6 +174,28 @@ class SequenceInputLayerTest(test.TestCase): self.assertAllEqual( expected_sequence_length, sequence_length.eval(session=sess)) + def test_indicator_column_with_non_sequence_categorical(self): + """Tests that error is raised for non-sequence categorical column.""" + vocabulary_size = 3 + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 1)), + values=(2, 0, 1), + dense_shape=(2, 2)) + + categorical_column_a = fc.categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + indicator_column_a = fc.indicator_column(categorical_column_a) + + with self.assertRaisesRegexp( + ValueError, + r'In indicator_column: aaa_indicator\. categorical_column must be of ' + r'type _SequenceCategoricalColumn to use sequence_input_layer\.'): + _, _ = sfc.sequence_input_layer( + features={'aaa': sparse_input}, + feature_columns=[indicator_column_a]) + def test_numeric_column(self): sparse_input = sparse_tensor.SparseTensorValue( # example 0, values [[0.], [1]] @@ -230,6 +276,55 @@ class SequenceInputLayerTest(test.TestCase): sess.run(sequence_length) +class InputLayerTest(test.TestCase): + """Tests input_layer with sequence feature columns.""" + + def test_embedding_column(self): + """Tests that error is raised for sequence embedding column.""" + vocabulary_size = 3 + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 1)), + values=(2, 0, 1), + dense_shape=(2, 2)) + + categorical_column_a = sfc.sequence_categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + embedding_column_a = fc.embedding_column( + categorical_column_a, dimension=2) + + with self.assertRaisesRegexp( + ValueError, + r'In embedding_column: aaa_embedding\. categorical_column must not be ' + r'of type _SequenceCategoricalColumn\.'): + _ = fc.input_layer( + features={'aaa': sparse_input}, + feature_columns=[embedding_column_a]) + + def test_indicator_column(self): + """Tests that error is raised for sequence indicator column.""" + vocabulary_size = 3 + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 1)), + values=(2, 0, 1), + dense_shape=(2, 2)) + + categorical_column_a = sfc.sequence_categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + indicator_column_a = fc.indicator_column(categorical_column_a) + + with self.assertRaisesRegexp( + ValueError, + r'In indicator_column: aaa_indicator\. categorical_column must not be ' + r'of type _SequenceCategoricalColumn\.'): + _ = fc.input_layer( + features={'aaa': sparse_input}, + feature_columns=[indicator_column_a]) + + def _assert_sparse_tensor_value(test_case, expected, actual): _assert_sparse_tensor_indices_shape(test_case, expected, actual) @@ -287,37 +382,6 @@ class SequenceCategoricalColumnWithIdentityTest(test.TestCase): with monitored_session.MonitoredSession() as sess: id_weight_pair.id_tensor.eval(session=sess) - def test_sequence_length(self): - column = sfc.sequence_categorical_column_with_identity( - 'aaa', num_buckets=3) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=(1, 2, 0), - dense_shape=(2, 2)) - expected_sequence_length = [1, 2] - - sequence_length = column._sequence_length(_LazyBuilder({'aaa': inputs})) - - with monitored_session.MonitoredSession() as sess: - sequence_length = sess.run(sequence_length) - self.assertAllEqual(expected_sequence_length, sequence_length) - self.assertEqual(np.int64, sequence_length.dtype) - - def test_sequence_length_with_zeros(self): - column = sfc.sequence_categorical_column_with_identity( - 'aaa', num_buckets=3) - inputs = sparse_tensor.SparseTensorValue( - indices=((1, 0), (3, 0), (3, 1)), - values=(1, 2, 0), - dense_shape=(5, 2)) - expected_sequence_length = [0, 1, 0, 2, 0] - - sequence_length = column._sequence_length(_LazyBuilder({'aaa': inputs})) - - with monitored_session.MonitoredSession() as sess: - self.assertAllEqual( - expected_sequence_length, sequence_length.eval(session=sess)) - class SequenceCategoricalColumnWithHashBucketTest(test.TestCase): @@ -344,21 +408,6 @@ class SequenceCategoricalColumnWithHashBucketTest(test.TestCase): expected_sparse_ids, id_weight_pair.id_tensor.eval(session=sess)) - def test_sequence_length(self): - column = sfc.sequence_categorical_column_with_hash_bucket( - 'aaa', hash_bucket_size=10) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=('omar', 'stringer', 'marlo'), - dense_shape=(2, 2)) - expected_sequence_length = [1, 2] - - sequence_length = column._sequence_length(_LazyBuilder({'aaa': inputs})) - - with monitored_session.MonitoredSession() as sess: - self.assertAllEqual( - expected_sequence_length, sequence_length.eval(session=sess)) - class SequenceCategoricalColumnWithVocabularyFileTest(test.TestCase): @@ -399,23 +448,6 @@ class SequenceCategoricalColumnWithVocabularyFileTest(test.TestCase): expected_sparse_ids, id_weight_pair.id_tensor.eval(session=sess)) - def test_sequence_length(self): - column = sfc.sequence_categorical_column_with_vocabulary_file( - key='aaa', - vocabulary_file=self._wire_vocabulary_file_name, - vocabulary_size=self._wire_vocabulary_size) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=('marlo', 'skywalker', 'omar'), - dense_shape=(2, 2)) - expected_sequence_length = [1, 2] - - sequence_length = column._sequence_length(_LazyBuilder({'aaa': inputs})) - - with monitored_session.MonitoredSession() as sess: - self.assertAllEqual( - expected_sequence_length, sequence_length.eval(session=sess)) - class SequenceCategoricalColumnWithVocabularyListTest(test.TestCase): @@ -441,22 +473,6 @@ class SequenceCategoricalColumnWithVocabularyListTest(test.TestCase): expected_sparse_ids, id_weight_pair.id_tensor.eval(session=sess)) - def test_sequence_length(self): - column = sfc.sequence_categorical_column_with_vocabulary_list( - key='aaa', - vocabulary_list=('omar', 'stringer', 'marlo')) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=('marlo', 'skywalker', 'omar'), - dense_shape=(2, 2)) - expected_sequence_length = [1, 2] - - sequence_length = column._sequence_length(_LazyBuilder({'aaa': inputs})) - - with monitored_session.MonitoredSession() as sess: - self.assertAllEqual( - expected_sequence_length, sequence_length.eval(session=sess)) - class SequenceEmbeddingColumnTest(test.TestCase): @@ -496,7 +512,7 @@ class SequenceEmbeddingColumnTest(test.TestCase): categorical_column = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) - embedding_column = sfc._sequence_embedding_column( + embedding_column = fc.embedding_column( categorical_column, dimension=embedding_dimension, initializer=_initializer) @@ -522,7 +538,7 @@ class SequenceEmbeddingColumnTest(test.TestCase): categorical_column = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) - embedding_column = sfc._sequence_embedding_column( + embedding_column = fc.embedding_column( categorical_column, dimension=2) _, sequence_length = embedding_column._get_sequence_dense_tensor( @@ -550,7 +566,7 @@ class SequenceEmbeddingColumnTest(test.TestCase): categorical_column = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) - embedding_column = sfc._sequence_embedding_column( + embedding_column = fc.embedding_column( categorical_column, dimension=2) _, sequence_length = embedding_column._get_sequence_dense_tensor( @@ -587,7 +603,7 @@ class SequenceIndicatorColumnTest(test.TestCase): categorical_column = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) - indicator_column = sfc._sequence_indicator_column(categorical_column) + indicator_column = fc.indicator_column(categorical_column) indicator_tensor, _ = indicator_column._get_sequence_dense_tensor( _LazyBuilder({'aaa': sparse_input})) @@ -607,7 +623,7 @@ class SequenceIndicatorColumnTest(test.TestCase): categorical_column = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) - indicator_column = sfc._sequence_indicator_column(categorical_column) + indicator_column = fc.indicator_column(categorical_column) _, sequence_length = indicator_column._get_sequence_dense_tensor( _LazyBuilder({'aaa': sparse_input})) @@ -634,7 +650,7 @@ class SequenceIndicatorColumnTest(test.TestCase): categorical_column = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) - indicator_column = sfc._sequence_indicator_column(categorical_column) + indicator_column = fc.indicator_column(categorical_column) _, sequence_length = indicator_column._get_sequence_dense_tensor( _LazyBuilder({'aaa': sparse_input})) |