aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/contrib/layers
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2018-08-20 14:35:20 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-08-20 14:41:46 -0700
commit0440ccfc199cbffc10aae19fde07f0100c823ed9 (patch)
treeb1a5df0802a3cf820acfe1d3b94f22244415bfdf /tensorflow/contrib/layers
parent713cf64262995838711cbe28dc1b69709f0b028a (diff)
Fix the issue of losing last dimension for one hot column with weights
PiperOrigin-RevId: 209485789
Diffstat (limited to 'tensorflow/contrib/layers')
-rw-r--r--tensorflow/contrib/layers/python/layers/feature_column.py9
-rw-r--r--tensorflow/contrib/layers/python/layers/feature_column_test.py51
2 files changed, 58 insertions, 2 deletions
diff --git a/tensorflow/contrib/layers/python/layers/feature_column.py b/tensorflow/contrib/layers/python/layers/feature_column.py
index 3ae07cedab..28d19a0445 100644
--- a/tensorflow/contrib/layers/python/layers/feature_column.py
+++ b/tensorflow/contrib/layers/python/layers/feature_column.py
@@ -997,9 +997,14 @@ class _OneHotColumn(
# Remove (?, -1) index
weighted_column = sparse_ops.sparse_slice(
weighted_column,
- [0, 0],
+ array_ops.zeros_like(weighted_column.dense_shape),
weighted_column.dense_shape)
- return sparse_ops.sparse_tensor_to_dense(weighted_column)
+ dense_tensor = sparse_ops.sparse_tensor_to_dense(weighted_column)
+ batch_shape = array_ops.shape(dense_tensor)[:-1]
+ dense_tensor_shape = array_ops.concat(
+ [batch_shape, [self.length]], axis=0)
+ dense_tensor = array_ops.reshape(dense_tensor, dense_tensor_shape)
+ return dense_tensor
dense_id_tensor = sparse_ops.sparse_tensor_to_dense(sparse_id_column,
default_value=-1)
diff --git a/tensorflow/contrib/layers/python/layers/feature_column_test.py b/tensorflow/contrib/layers/python/layers/feature_column_test.py
index 1de9ab7056..eaaf9f8d5f 100644
--- a/tensorflow/contrib/layers/python/layers/feature_column_test.py
+++ b/tensorflow/contrib/layers/python/layers/feature_column_test.py
@@ -57,6 +57,29 @@ def _sparse_id_tensor(shape, vocab_size, seed=112123):
indices=indices, values=values, dense_shape=shape)
+def _sparse_id_tensor_with_weights(shape, vocab_size, seed=112123):
+ # Returns a arbitrary `SparseTensor` with given shape and vocab size.
+ assert vocab_size >= shape[-1]
+ np.random.seed(seed)
+ indices = np.array(list(itertools.product(*[range(s) for s in shape])))
+
+ # Values must be distinct from the vocab
+ values = np.ndarray.flatten(np.array([
+ np.random.choice(vocab_size, size=shape[-1], replace=False)
+ for _ in range(np.prod(shape[:-1]))]))
+ weights = np.sort(np.random.rand(*shape), axis=len(shape)-1)
+
+ # Remove entries if weight < 0.5 for sparsity.
+ keep = np.ndarray.flatten(weights < 0.5) # Remove half of them
+ indices = indices[keep]
+ values = values[keep]
+ weights = np.ndarray.flatten(weights)[keep]
+ return (sparse_tensor_lib.SparseTensor(
+ indices=indices, values=values, dense_shape=shape),
+ sparse_tensor_lib.SparseTensor(
+ indices=indices, values=weights, dense_shape=shape))
+
+
class FeatureColumnTest(test.TestCase):
def testImmutability(self):
@@ -329,6 +352,34 @@ class FeatureColumnTest(test.TestCase):
self.assertEqual(one_hot.sparse_id_column.name, "ids_weighted_by_weights")
self.assertEqual(one_hot.length, 3)
+ def testIntegerizedOneHotColumnForWeightedSparseColumn(self):
+ vocab_size = 5
+ ids = fc.sparse_column_with_integerized_feature("ids", vocab_size)
+ weighted_ids = fc.weighted_sparse_column(ids, "weights")
+ one_hot = fc.one_hot_column(weighted_ids)
+ self.assertEqual(one_hot.sparse_id_column.name, "ids_weighted_by_weights")
+ self.assertEqual(one_hot.length, vocab_size)
+
+ def testIntegerizedOneHotWeightedSparseColumnShape(self):
+ vocab_size = 5
+ for id_tensor_shape in [[4, 3], [2, 4], [3, 3, 3]]:
+ output_rank = len(id_tensor_shape)
+ a = fc.sparse_column_with_integerized_feature("a", vocab_size)
+ weighted = fc.weighted_sparse_column(a, "weights")
+ one_hot = fc.one_hot_column(weighted)
+ id_tensor, weight_tensor = _sparse_id_tensor_with_weights(
+ id_tensor_shape, vocab_size)
+
+ one_hot_output = one_hot._to_dnn_input_layer(
+ (id_tensor, weight_tensor),
+ output_rank=output_rank)
+ one_hot_output_shape = one_hot_output.get_shape().as_list()
+ expected_shape = id_tensor_shape[:-1] + [vocab_size]
+ self.assertEquals(expected_shape, one_hot_output_shape)
+ with self.test_session() as sess:
+ one_hot_value = sess.run(one_hot_output)
+ self.assertEquals(expected_shape, list(one_hot_value.shape))
+
def testOneHotColumnWithSparseColumnWithHashKeys(self):
input_values = ["marlo", "unknown", "omar"]
inputs = constant_op.constant(input_values)