aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/contrib/feature_column
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2018-03-12 21:53:54 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-03-12 21:58:13 -0700
commited21fd502316634fe9d139a3ba4d4d1002137e0b (patch)
treefcfcbcd674c70291151a307ecfebae02d68f8adf /tensorflow/contrib/feature_column
parenta0a11bc349e3aa28da050d10a0c509241dbee414 (diff)
Merges (embedding|indicator)_column with their sequence counterparts, and exposes sequence methods under tf.contrib.feature_column.
PiperOrigin-RevId: 188826187
Diffstat (limited to 'tensorflow/contrib/feature_column')
-rw-r--r--tensorflow/contrib/feature_column/BUILD2
-rw-r--r--tensorflow/contrib/feature_column/__init__.py6
-rw-r--r--tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py259
-rw-r--r--tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py194
4 files changed, 138 insertions, 323 deletions
diff --git a/tensorflow/contrib/feature_column/BUILD b/tensorflow/contrib/feature_column/BUILD
index 8ba0823a71..3614b2b15a 100644
--- a/tensorflow/contrib/feature_column/BUILD
+++ b/tensorflow/contrib/feature_column/BUILD
@@ -26,6 +26,7 @@ py_library(
srcs_version = "PY2AND3",
deps = [
":sequence_feature_column",
+ "//tensorflow/python:util",
],
)
@@ -38,7 +39,6 @@ py_library(
"//tensorflow/python:check_ops",
"//tensorflow/python:dtypes",
"//tensorflow/python:framework_ops",
- "//tensorflow/python:math_ops",
"//tensorflow/python:parsing_ops",
"//tensorflow/python:sparse_ops",
"//tensorflow/python:tensor_shape",
diff --git a/tensorflow/contrib/feature_column/__init__.py b/tensorflow/contrib/feature_column/__init__.py
index 650a80144f..baa8c1567a 100644
--- a/tensorflow/contrib/feature_column/__init__.py
+++ b/tensorflow/contrib/feature_column/__init__.py
@@ -25,6 +25,12 @@ from tensorflow.python.util.all_util import remove_undocumented
# pylint: enable=unused-import,line-too-long,wildcard-import
_allowed_symbols = [
+ 'sequence_categorical_column_with_hash_bucket',
+ 'sequence_categorical_column_with_identity',
+ 'sequence_categorical_column_with_vocabulary_list',
+ 'sequence_categorical_column_with_vocabulary_file',
+ 'sequence_input_layer',
+ 'sequence_numeric_column',
]
remove_undocumented(__name__, allowed_exception_list=_allowed_symbols)
diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py
index f57557c1cc..e60116966f 100644
--- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py
+++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py
@@ -19,7 +19,6 @@ from __future__ import division
from __future__ import print_function
-import abc
import collections
@@ -29,7 +28,6 @@ from tensorflow.python.framework import ops
from tensorflow.python.framework import tensor_shape
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import math_ops
from tensorflow.python.ops import parsing_ops
from tensorflow.python.ops import sparse_ops
from tensorflow.python.ops import variable_scope
@@ -99,9 +97,11 @@ def sequence_input_layer(
"""
feature_columns = fc._clean_feature_columns(feature_columns)
for c in feature_columns:
- if not isinstance(c, _SequenceDenseColumn):
+ if not isinstance(c, fc._SequenceDenseColumn):
raise ValueError(
'All feature_columns must be of type _SequenceDenseColumn. '
+ 'You can wrap a sequence_categorical_column with an embedding_column '
+ 'or indicator_column. '
'Given (type {}): {}'.format(type(c), c))
with variable_scope.variable_scope(
@@ -136,6 +136,10 @@ def sequence_categorical_column_with_identity(
key, num_buckets, default_value=None):
"""Returns a feature column that represents sequences of integers.
+ Pass this to `embedding_column` or `indicator_column` to convert sequence
+ categorical data into dense representation for input to sequence NN, such as
+ RNN.
+
Example:
```python
@@ -163,7 +167,7 @@ def sequence_categorical_column_with_identity(
Returns:
A `_SequenceCategoricalColumn`.
"""
- return _SequenceCategoricalColumn(
+ return fc._SequenceCategoricalColumn(
fc.categorical_column_with_identity(
key=key,
num_buckets=num_buckets,
@@ -174,6 +178,10 @@ def sequence_categorical_column_with_hash_bucket(
key, hash_bucket_size, dtype=dtypes.string):
"""A sequence of categorical terms where ids are set by hashing.
+ Pass this to `embedding_column` or `indicator_column` to convert sequence
+ categorical data into dense representation for input to sequence NN, such as
+ RNN.
+
Example:
```python
@@ -198,7 +206,7 @@ def sequence_categorical_column_with_hash_bucket(
Returns:
A `_SequenceCategoricalColumn`.
"""
- return _SequenceCategoricalColumn(
+ return fc._SequenceCategoricalColumn(
fc.categorical_column_with_hash_bucket(
key=key,
hash_bucket_size=hash_bucket_size,
@@ -210,6 +218,10 @@ def sequence_categorical_column_with_vocabulary_file(
default_value=None, dtype=dtypes.string):
"""A sequence of categorical terms where ids use a vocabulary file.
+ Pass this to `embedding_column` or `indicator_column` to convert sequence
+ categorical data into dense representation for input to sequence NN, such as
+ RNN.
+
Example:
```python
@@ -246,7 +258,7 @@ def sequence_categorical_column_with_vocabulary_file(
Returns:
A `_SequenceCategoricalColumn`.
"""
- return _SequenceCategoricalColumn(
+ return fc._SequenceCategoricalColumn(
fc.categorical_column_with_vocabulary_file(
key=key,
vocabulary_file=vocabulary_file,
@@ -260,6 +272,10 @@ def sequence_categorical_column_with_vocabulary_list(
key, vocabulary_list, dtype=None, default_value=-1, num_oov_buckets=0):
"""A sequence of categorical terms where ids use an in-memory list.
+ Pass this to `embedding_column` or `indicator_column` to convert sequence
+ categorical data into dense representation for input to sequence NN, such as
+ RNN.
+
Example:
```python
@@ -296,7 +312,7 @@ def sequence_categorical_column_with_vocabulary_list(
Returns:
A `_SequenceCategoricalColumn`.
"""
- return _SequenceCategoricalColumn(
+ return fc._SequenceCategoricalColumn(
fc.categorical_column_with_vocabulary_list(
key=key,
vocabulary_list=vocabulary_list,
@@ -305,108 +321,6 @@ def sequence_categorical_column_with_vocabulary_list(
num_oov_buckets=num_oov_buckets))
-# TODO(b/73160931): Merge with embedding_column
-def _sequence_embedding_column(
- categorical_column, dimension, initializer=None, ckpt_to_load_from=None,
- tensor_name_in_ckpt=None, max_norm=None, trainable=True):
- """Returns a feature column that represents sequences of embeddings.
-
- Use this to convert sequence categorical data into dense representation for
- input to sequence NN, such as RNN.
-
- Example:
-
- ```python
- watches = sequence_categorical_column_with_identity(
- 'watches', num_buckets=1000)
- watches_embedding = _sequence_embedding_column(watches, dimension=10)
- columns = [watches]
-
- features = tf.parse_example(..., features=make_parse_example_spec(columns))
- input_layer, sequence_length = sequence_input_layer(features, columns)
-
- rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size)
- outputs, state = tf.nn.dynamic_rnn(
- rnn_cell, inputs=input_layer, sequence_length=sequence_length)
- ```
-
- Args:
- categorical_column: A `_SequenceCategoricalColumn` created with a
- `sequence_cateogrical_column_with_*` function.
- dimension: Integer dimension of the embedding.
- initializer: Initializer function used to initialize the embeddings.
- ckpt_to_load_from: String representing checkpoint name/pattern from which to
- restore column weights. Required if `tensor_name_in_ckpt` is not `None`.
- tensor_name_in_ckpt: Name of the `Tensor` in `ckpt_to_load_from` from
- which to restore the column weights. Required if `ckpt_to_load_from` is
- not `None`.
- max_norm: If not `None`, embedding values are l2-normalized to this value.
- trainable: Whether or not the embedding is trainable. Default is True.
-
- Returns:
- A `_SequenceCategoricalToDenseColumn`.
-
- Raises:
- ValueError: If `categorical_column` is not the right type.
- """
- if not isinstance(categorical_column, _SequenceCategoricalColumn):
- raise ValueError(
- 'categorical_column must be of type _SequenceCategoricalColumn. '
- 'Given (type {}): {}'.format(
- type(categorical_column), categorical_column))
- return _SequenceCategoricalToDenseColumn(
- fc.embedding_column(
- categorical_column,
- dimension=dimension,
- initializer=initializer,
- ckpt_to_load_from=ckpt_to_load_from,
- tensor_name_in_ckpt=tensor_name_in_ckpt,
- max_norm=max_norm,
- trainable=trainable))
-
-
-# TODO(b/73160931): Merge with indicator_column
-def _sequence_indicator_column(categorical_column):
- """Returns a feature column that represents sequences of multi-hot tensors.
-
- Use this to convert sequence categorical data into dense representation for
- input to sequence NN, such as RNN.
-
- Example:
-
- ```python
- colors = sequence_categorical_column_with_vocabulary_list(
- key='colors', vocabulary_list=('R', 'G', 'B', 'Y'))
- colors_indicator = _sequence_indicator_column(colors)
- columns = [colors]
-
- features = tf.parse_example(..., features=make_parse_example_spec(columns))
- input_layer, sequence_length = sequence_input_layer(features, columns)
-
- rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size)
- outputs, state = tf.nn.dynamic_rnn(
- rnn_cell, inputs=input_layer, sequence_length=sequence_length)
- ```
-
- Args:
- categorical_column: A `_SequenceCategoricalColumn` created with a
- `sequence_cateogrical_column_with_*` function.
-
- Returns:
- A `_SequenceCategoricalToDenseColumn`.
-
- Raises:
- ValueError: If `categorical_column` is not the right type.
- """
- if not isinstance(categorical_column, _SequenceCategoricalColumn):
- raise ValueError(
- 'categorical_column must be of type _SequenceCategoricalColumn. '
- 'Given (type {}): {}'.format(
- type(categorical_column), categorical_column))
- return _SequenceCategoricalToDenseColumn(
- fc.indicator_column(categorical_column))
-
-
def sequence_numeric_column(
key,
shape=(1,),
@@ -459,129 +373,8 @@ def _assert_all_equal_and_return(tensors, name=None):
return array_ops.identity(tensors[0])
-class _SequenceDenseColumn(fc._FeatureColumn):
- """Represents dense sequence data."""
-
- __metaclass__ = abc.ABCMeta
-
- TensorSequenceLengthPair = collections.namedtuple( # pylint: disable=invalid-name
- 'TensorSequenceLengthPair', ['dense_tensor', 'sequence_length'])
-
- @abc.abstractproperty
- def _variable_shape(self):
- """`TensorShape` without batch and sequence dimensions."""
- pass
-
- @abc.abstractmethod
- def _get_sequence_dense_tensor(
- self, inputs, weight_collections=None, trainable=None):
- """Returns a `TensorSequenceLengthPair`."""
- pass
-
-
-def _sequence_length_from_sparse_tensor(sp_tensor, num_elements=1):
- with ops.name_scope(None, 'sequence_length') as name_scope:
- row_ids = sp_tensor.indices[:, 0]
- column_ids = sp_tensor.indices[:, 1]
- column_ids += array_ops.ones_like(column_ids)
- seq_length = math_ops.to_int64(
- math_ops.segment_max(column_ids, segment_ids=row_ids) / num_elements)
- # If the last n rows do not have ids, seq_length will have shape
- # [batch_size - n]. Pad the remaining values with zeros.
- n_pad = array_ops.shape(sp_tensor)[:1] - array_ops.shape(seq_length)[:1]
- padding = array_ops.zeros(n_pad, dtype=seq_length.dtype)
- return array_ops.concat([seq_length, padding], axis=0, name=name_scope)
-
-
-class _SequenceCategoricalColumn(
- fc._CategoricalColumn,
- collections.namedtuple(
- '_SequenceCategoricalColumn', ['categorical_column'])):
- """Represents sequences of categorical data."""
-
- @property
- def name(self):
- return self.categorical_column.name
-
- @property
- def _parse_example_spec(self):
- return self.categorical_column._parse_example_spec
-
- def _transform_feature(self, inputs):
- return self.categorical_column._transform_feature(inputs)
-
- @property
- def _num_buckets(self):
- return self.categorical_column._num_buckets
-
- def _get_sparse_tensors(self, inputs, weight_collections=None,
- trainable=None):
- sparse_tensors = self.categorical_column._get_sparse_tensors(inputs)
- id_tensor = sparse_tensors.id_tensor
- weight_tensor = sparse_tensors.weight_tensor
- # Expands final dimension, so that embeddings are not combined during
- # embedding lookup.
- check_id_rank = check_ops.assert_equal(
- array_ops.rank(id_tensor), 2,
- data=[
- 'Column {} expected ID tensor of rank 2. '.format(self.name),
- 'id_tensor shape: ', array_ops.shape(id_tensor)])
- with ops.control_dependencies([check_id_rank]):
- id_tensor = sparse_ops.sparse_reshape(
- id_tensor,
- shape=array_ops.concat([id_tensor.dense_shape, [1]], axis=0))
- if weight_tensor is not None:
- check_weight_rank = check_ops.assert_equal(
- array_ops.rank(weight_tensor), 2,
- data=[
- 'Column {} expected weight tensor of rank 2.'.format(self.name),
- 'weight_tensor shape:', array_ops.shape(weight_tensor)])
- with ops.control_dependencies([check_weight_rank]):
- weight_tensor = sparse_ops.sparse_reshape(
- weight_tensor,
- shape=array_ops.concat([weight_tensor.dense_shape, [1]], axis=0))
- return fc._CategoricalColumn.IdWeightPair(id_tensor, weight_tensor)
-
- def _sequence_length(self, inputs):
- sparse_tensors = self.categorical_column._get_sparse_tensors(inputs)
- return _sequence_length_from_sparse_tensor(sparse_tensors.id_tensor)
-
-
-class _SequenceCategoricalToDenseColumn(
- _SequenceDenseColumn,
- collections.namedtuple(
- '_SequenceCategoricalToDenseColumn', ['dense_column'])):
- """Densifies a _SequenceCategoricalColumn using the specified column."""
-
- @property
- def name(self):
- return self.dense_column.name
-
- @property
- def _parse_example_spec(self):
- return self.dense_column._parse_example_spec
-
- def _transform_feature(self, inputs):
- return self.dense_column._transform_feature(inputs)
-
- @property
- def _variable_shape(self):
- return self.dense_column._variable_shape
-
- def _get_sequence_dense_tensor(
- self, inputs, weight_collections=None, trainable=None):
- dense_tensor = self.dense_column._get_dense_tensor(
- inputs=inputs,
- weight_collections=weight_collections,
- trainable=trainable)
- sequence_length = self.dense_column.categorical_column._sequence_length(
- inputs)
- return _SequenceDenseColumn.TensorSequenceLengthPair(
- dense_tensor=dense_tensor, sequence_length=sequence_length)
-
-
class _SequenceNumericColumn(
- _SequenceDenseColumn,
+ fc._SequenceDenseColumn,
collections.namedtuple(
'_SequenceNumericColumn',
['key', 'shape', 'default_value', 'dtype'])):
@@ -616,9 +409,9 @@ class _SequenceNumericColumn(
[array_ops.shape(dense_tensor)[:1], [-1], self._variable_shape],
axis=0)
dense_tensor = array_ops.reshape(dense_tensor, shape=dense_shape)
- sequence_length = _sequence_length_from_sparse_tensor(
+ sequence_length = fc._sequence_length_from_sparse_tensor(
sp_tensor, num_elements=self._variable_shape.num_elements())
- return _SequenceDenseColumn.TensorSequenceLengthPair(
+ return fc._SequenceDenseColumn.TensorSequenceLengthPair(
dense_tensor=dense_tensor, sequence_length=sequence_length)
# pylint: enable=protected-access
diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py
index c077f03291..b64f086376 100644
--- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py
+++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py
@@ -22,6 +22,7 @@ import os
import numpy as np
from tensorflow.contrib.feature_column.python.feature_column import sequence_feature_column as sfc
+from tensorflow.python.feature_column import feature_column as fc
from tensorflow.python.feature_column.feature_column import _LazyBuilder
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import errors
@@ -78,12 +79,12 @@ class SequenceInputLayerTest(test.TestCase):
categorical_column_a = sfc.sequence_categorical_column_with_identity(
key='aaa', num_buckets=vocabulary_size)
- embedding_column_a = sfc._sequence_embedding_column(
+ embedding_column_a = fc.embedding_column(
categorical_column_a, dimension=embedding_dimension_a,
initializer=_get_initializer(embedding_dimension_a, embedding_values_a))
categorical_column_b = sfc.sequence_categorical_column_with_identity(
key='bbb', num_buckets=vocabulary_size)
- embedding_column_b = sfc._sequence_embedding_column(
+ embedding_column_b = fc.embedding_column(
categorical_column_b, dimension=embedding_dimension_b,
initializer=_get_initializer(embedding_dimension_b, embedding_values_b))
@@ -107,6 +108,29 @@ class SequenceInputLayerTest(test.TestCase):
self.assertAllEqual(
expected_sequence_length, sequence_length.eval(session=sess))
+ def test_embedding_column_with_non_sequence_categorical(self):
+ """Tests that error is raised for non-sequence categorical column."""
+ vocabulary_size = 3
+ sparse_input = sparse_tensor.SparseTensorValue(
+ # example 0, ids [2]
+ # example 1, ids [0, 1]
+ indices=((0, 0), (1, 0), (1, 1)),
+ values=(2, 0, 1),
+ dense_shape=(2, 2))
+
+ categorical_column_a = fc.categorical_column_with_identity(
+ key='aaa', num_buckets=vocabulary_size)
+ embedding_column_a = fc.embedding_column(
+ categorical_column_a, dimension=2)
+
+ with self.assertRaisesRegexp(
+ ValueError,
+ r'In embedding_column: aaa_embedding\. categorical_column must be of '
+ r'type _SequenceCategoricalColumn to use sequence_input_layer\.'):
+ _, _ = sfc.sequence_input_layer(
+ features={'aaa': sparse_input},
+ feature_columns=[embedding_column_a])
+
def test_indicator_column(self):
vocabulary_size_a = 3
sparse_input_a = sparse_tensor.SparseTensorValue(
@@ -133,10 +157,10 @@ class SequenceInputLayerTest(test.TestCase):
categorical_column_a = sfc.sequence_categorical_column_with_identity(
key='aaa', num_buckets=vocabulary_size_a)
- indicator_column_a = sfc._sequence_indicator_column(categorical_column_a)
+ indicator_column_a = fc.indicator_column(categorical_column_a)
categorical_column_b = sfc.sequence_categorical_column_with_identity(
key='bbb', num_buckets=vocabulary_size_b)
- indicator_column_b = sfc._sequence_indicator_column(categorical_column_b)
+ indicator_column_b = fc.indicator_column(categorical_column_b)
input_layer, sequence_length = sfc.sequence_input_layer(
features={
'aaa': sparse_input_a,
@@ -150,6 +174,28 @@ class SequenceInputLayerTest(test.TestCase):
self.assertAllEqual(
expected_sequence_length, sequence_length.eval(session=sess))
+ def test_indicator_column_with_non_sequence_categorical(self):
+ """Tests that error is raised for non-sequence categorical column."""
+ vocabulary_size = 3
+ sparse_input = sparse_tensor.SparseTensorValue(
+ # example 0, ids [2]
+ # example 1, ids [0, 1]
+ indices=((0, 0), (1, 0), (1, 1)),
+ values=(2, 0, 1),
+ dense_shape=(2, 2))
+
+ categorical_column_a = fc.categorical_column_with_identity(
+ key='aaa', num_buckets=vocabulary_size)
+ indicator_column_a = fc.indicator_column(categorical_column_a)
+
+ with self.assertRaisesRegexp(
+ ValueError,
+ r'In indicator_column: aaa_indicator\. categorical_column must be of '
+ r'type _SequenceCategoricalColumn to use sequence_input_layer\.'):
+ _, _ = sfc.sequence_input_layer(
+ features={'aaa': sparse_input},
+ feature_columns=[indicator_column_a])
+
def test_numeric_column(self):
sparse_input = sparse_tensor.SparseTensorValue(
# example 0, values [[0.], [1]]
@@ -230,6 +276,55 @@ class SequenceInputLayerTest(test.TestCase):
sess.run(sequence_length)
+class InputLayerTest(test.TestCase):
+ """Tests input_layer with sequence feature columns."""
+
+ def test_embedding_column(self):
+ """Tests that error is raised for sequence embedding column."""
+ vocabulary_size = 3
+ sparse_input = sparse_tensor.SparseTensorValue(
+ # example 0, ids [2]
+ # example 1, ids [0, 1]
+ indices=((0, 0), (1, 0), (1, 1)),
+ values=(2, 0, 1),
+ dense_shape=(2, 2))
+
+ categorical_column_a = sfc.sequence_categorical_column_with_identity(
+ key='aaa', num_buckets=vocabulary_size)
+ embedding_column_a = fc.embedding_column(
+ categorical_column_a, dimension=2)
+
+ with self.assertRaisesRegexp(
+ ValueError,
+ r'In embedding_column: aaa_embedding\. categorical_column must not be '
+ r'of type _SequenceCategoricalColumn\.'):
+ _ = fc.input_layer(
+ features={'aaa': sparse_input},
+ feature_columns=[embedding_column_a])
+
+ def test_indicator_column(self):
+ """Tests that error is raised for sequence indicator column."""
+ vocabulary_size = 3
+ sparse_input = sparse_tensor.SparseTensorValue(
+ # example 0, ids [2]
+ # example 1, ids [0, 1]
+ indices=((0, 0), (1, 0), (1, 1)),
+ values=(2, 0, 1),
+ dense_shape=(2, 2))
+
+ categorical_column_a = sfc.sequence_categorical_column_with_identity(
+ key='aaa', num_buckets=vocabulary_size)
+ indicator_column_a = fc.indicator_column(categorical_column_a)
+
+ with self.assertRaisesRegexp(
+ ValueError,
+ r'In indicator_column: aaa_indicator\. categorical_column must not be '
+ r'of type _SequenceCategoricalColumn\.'):
+ _ = fc.input_layer(
+ features={'aaa': sparse_input},
+ feature_columns=[indicator_column_a])
+
+
def _assert_sparse_tensor_value(test_case, expected, actual):
_assert_sparse_tensor_indices_shape(test_case, expected, actual)
@@ -287,37 +382,6 @@ class SequenceCategoricalColumnWithIdentityTest(test.TestCase):
with monitored_session.MonitoredSession() as sess:
id_weight_pair.id_tensor.eval(session=sess)
- def test_sequence_length(self):
- column = sfc.sequence_categorical_column_with_identity(
- 'aaa', num_buckets=3)
- inputs = sparse_tensor.SparseTensorValue(
- indices=((0, 0), (1, 0), (1, 1)),
- values=(1, 2, 0),
- dense_shape=(2, 2))
- expected_sequence_length = [1, 2]
-
- sequence_length = column._sequence_length(_LazyBuilder({'aaa': inputs}))
-
- with monitored_session.MonitoredSession() as sess:
- sequence_length = sess.run(sequence_length)
- self.assertAllEqual(expected_sequence_length, sequence_length)
- self.assertEqual(np.int64, sequence_length.dtype)
-
- def test_sequence_length_with_zeros(self):
- column = sfc.sequence_categorical_column_with_identity(
- 'aaa', num_buckets=3)
- inputs = sparse_tensor.SparseTensorValue(
- indices=((1, 0), (3, 0), (3, 1)),
- values=(1, 2, 0),
- dense_shape=(5, 2))
- expected_sequence_length = [0, 1, 0, 2, 0]
-
- sequence_length = column._sequence_length(_LazyBuilder({'aaa': inputs}))
-
- with monitored_session.MonitoredSession() as sess:
- self.assertAllEqual(
- expected_sequence_length, sequence_length.eval(session=sess))
-
class SequenceCategoricalColumnWithHashBucketTest(test.TestCase):
@@ -344,21 +408,6 @@ class SequenceCategoricalColumnWithHashBucketTest(test.TestCase):
expected_sparse_ids,
id_weight_pair.id_tensor.eval(session=sess))
- def test_sequence_length(self):
- column = sfc.sequence_categorical_column_with_hash_bucket(
- 'aaa', hash_bucket_size=10)
- inputs = sparse_tensor.SparseTensorValue(
- indices=((0, 0), (1, 0), (1, 1)),
- values=('omar', 'stringer', 'marlo'),
- dense_shape=(2, 2))
- expected_sequence_length = [1, 2]
-
- sequence_length = column._sequence_length(_LazyBuilder({'aaa': inputs}))
-
- with monitored_session.MonitoredSession() as sess:
- self.assertAllEqual(
- expected_sequence_length, sequence_length.eval(session=sess))
-
class SequenceCategoricalColumnWithVocabularyFileTest(test.TestCase):
@@ -399,23 +448,6 @@ class SequenceCategoricalColumnWithVocabularyFileTest(test.TestCase):
expected_sparse_ids,
id_weight_pair.id_tensor.eval(session=sess))
- def test_sequence_length(self):
- column = sfc.sequence_categorical_column_with_vocabulary_file(
- key='aaa',
- vocabulary_file=self._wire_vocabulary_file_name,
- vocabulary_size=self._wire_vocabulary_size)
- inputs = sparse_tensor.SparseTensorValue(
- indices=((0, 0), (1, 0), (1, 1)),
- values=('marlo', 'skywalker', 'omar'),
- dense_shape=(2, 2))
- expected_sequence_length = [1, 2]
-
- sequence_length = column._sequence_length(_LazyBuilder({'aaa': inputs}))
-
- with monitored_session.MonitoredSession() as sess:
- self.assertAllEqual(
- expected_sequence_length, sequence_length.eval(session=sess))
-
class SequenceCategoricalColumnWithVocabularyListTest(test.TestCase):
@@ -441,22 +473,6 @@ class SequenceCategoricalColumnWithVocabularyListTest(test.TestCase):
expected_sparse_ids,
id_weight_pair.id_tensor.eval(session=sess))
- def test_sequence_length(self):
- column = sfc.sequence_categorical_column_with_vocabulary_list(
- key='aaa',
- vocabulary_list=('omar', 'stringer', 'marlo'))
- inputs = sparse_tensor.SparseTensorValue(
- indices=((0, 0), (1, 0), (1, 1)),
- values=('marlo', 'skywalker', 'omar'),
- dense_shape=(2, 2))
- expected_sequence_length = [1, 2]
-
- sequence_length = column._sequence_length(_LazyBuilder({'aaa': inputs}))
-
- with monitored_session.MonitoredSession() as sess:
- self.assertAllEqual(
- expected_sequence_length, sequence_length.eval(session=sess))
-
class SequenceEmbeddingColumnTest(test.TestCase):
@@ -496,7 +512,7 @@ class SequenceEmbeddingColumnTest(test.TestCase):
categorical_column = sfc.sequence_categorical_column_with_identity(
key='aaa', num_buckets=vocabulary_size)
- embedding_column = sfc._sequence_embedding_column(
+ embedding_column = fc.embedding_column(
categorical_column, dimension=embedding_dimension,
initializer=_initializer)
@@ -522,7 +538,7 @@ class SequenceEmbeddingColumnTest(test.TestCase):
categorical_column = sfc.sequence_categorical_column_with_identity(
key='aaa', num_buckets=vocabulary_size)
- embedding_column = sfc._sequence_embedding_column(
+ embedding_column = fc.embedding_column(
categorical_column, dimension=2)
_, sequence_length = embedding_column._get_sequence_dense_tensor(
@@ -550,7 +566,7 @@ class SequenceEmbeddingColumnTest(test.TestCase):
categorical_column = sfc.sequence_categorical_column_with_identity(
key='aaa', num_buckets=vocabulary_size)
- embedding_column = sfc._sequence_embedding_column(
+ embedding_column = fc.embedding_column(
categorical_column, dimension=2)
_, sequence_length = embedding_column._get_sequence_dense_tensor(
@@ -587,7 +603,7 @@ class SequenceIndicatorColumnTest(test.TestCase):
categorical_column = sfc.sequence_categorical_column_with_identity(
key='aaa', num_buckets=vocabulary_size)
- indicator_column = sfc._sequence_indicator_column(categorical_column)
+ indicator_column = fc.indicator_column(categorical_column)
indicator_tensor, _ = indicator_column._get_sequence_dense_tensor(
_LazyBuilder({'aaa': sparse_input}))
@@ -607,7 +623,7 @@ class SequenceIndicatorColumnTest(test.TestCase):
categorical_column = sfc.sequence_categorical_column_with_identity(
key='aaa', num_buckets=vocabulary_size)
- indicator_column = sfc._sequence_indicator_column(categorical_column)
+ indicator_column = fc.indicator_column(categorical_column)
_, sequence_length = indicator_column._get_sequence_dense_tensor(
_LazyBuilder({'aaa': sparse_input}))
@@ -634,7 +650,7 @@ class SequenceIndicatorColumnTest(test.TestCase):
categorical_column = sfc.sequence_categorical_column_with_identity(
key='aaa', num_buckets=vocabulary_size)
- indicator_column = sfc._sequence_indicator_column(categorical_column)
+ indicator_column = fc.indicator_column(categorical_column)
_, sequence_length = indicator_column._get_sequence_dense_tensor(
_LazyBuilder({'aaa': sparse_input}))