diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2018-08-20 14:35:20 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2018-08-20 14:41:46 -0700 |
commit | 0440ccfc199cbffc10aae19fde07f0100c823ed9 (patch) | |
tree | b1a5df0802a3cf820acfe1d3b94f22244415bfdf /tensorflow/contrib/layers | |
parent | 713cf64262995838711cbe28dc1b69709f0b028a (diff) |
Fix the issue of losing last dimension for one hot column with weights
PiperOrigin-RevId: 209485789
Diffstat (limited to 'tensorflow/contrib/layers')
-rw-r--r-- | tensorflow/contrib/layers/python/layers/feature_column.py | 9 | ||||
-rw-r--r-- | tensorflow/contrib/layers/python/layers/feature_column_test.py | 51 |
2 files changed, 58 insertions, 2 deletions
diff --git a/tensorflow/contrib/layers/python/layers/feature_column.py b/tensorflow/contrib/layers/python/layers/feature_column.py index 3ae07cedab..28d19a0445 100644 --- a/tensorflow/contrib/layers/python/layers/feature_column.py +++ b/tensorflow/contrib/layers/python/layers/feature_column.py @@ -997,9 +997,14 @@ class _OneHotColumn( # Remove (?, -1) index weighted_column = sparse_ops.sparse_slice( weighted_column, - [0, 0], + array_ops.zeros_like(weighted_column.dense_shape), weighted_column.dense_shape) - return sparse_ops.sparse_tensor_to_dense(weighted_column) + dense_tensor = sparse_ops.sparse_tensor_to_dense(weighted_column) + batch_shape = array_ops.shape(dense_tensor)[:-1] + dense_tensor_shape = array_ops.concat( + [batch_shape, [self.length]], axis=0) + dense_tensor = array_ops.reshape(dense_tensor, dense_tensor_shape) + return dense_tensor dense_id_tensor = sparse_ops.sparse_tensor_to_dense(sparse_id_column, default_value=-1) diff --git a/tensorflow/contrib/layers/python/layers/feature_column_test.py b/tensorflow/contrib/layers/python/layers/feature_column_test.py index 1de9ab7056..eaaf9f8d5f 100644 --- a/tensorflow/contrib/layers/python/layers/feature_column_test.py +++ b/tensorflow/contrib/layers/python/layers/feature_column_test.py @@ -57,6 +57,29 @@ def _sparse_id_tensor(shape, vocab_size, seed=112123): indices=indices, values=values, dense_shape=shape) +def _sparse_id_tensor_with_weights(shape, vocab_size, seed=112123): + # Returns a arbitrary `SparseTensor` with given shape and vocab size. + assert vocab_size >= shape[-1] + np.random.seed(seed) + indices = np.array(list(itertools.product(*[range(s) for s in shape]))) + + # Values must be distinct from the vocab + values = np.ndarray.flatten(np.array([ + np.random.choice(vocab_size, size=shape[-1], replace=False) + for _ in range(np.prod(shape[:-1]))])) + weights = np.sort(np.random.rand(*shape), axis=len(shape)-1) + + # Remove entries if weight < 0.5 for sparsity. + keep = np.ndarray.flatten(weights < 0.5) # Remove half of them + indices = indices[keep] + values = values[keep] + weights = np.ndarray.flatten(weights)[keep] + return (sparse_tensor_lib.SparseTensor( + indices=indices, values=values, dense_shape=shape), + sparse_tensor_lib.SparseTensor( + indices=indices, values=weights, dense_shape=shape)) + + class FeatureColumnTest(test.TestCase): def testImmutability(self): @@ -329,6 +352,34 @@ class FeatureColumnTest(test.TestCase): self.assertEqual(one_hot.sparse_id_column.name, "ids_weighted_by_weights") self.assertEqual(one_hot.length, 3) + def testIntegerizedOneHotColumnForWeightedSparseColumn(self): + vocab_size = 5 + ids = fc.sparse_column_with_integerized_feature("ids", vocab_size) + weighted_ids = fc.weighted_sparse_column(ids, "weights") + one_hot = fc.one_hot_column(weighted_ids) + self.assertEqual(one_hot.sparse_id_column.name, "ids_weighted_by_weights") + self.assertEqual(one_hot.length, vocab_size) + + def testIntegerizedOneHotWeightedSparseColumnShape(self): + vocab_size = 5 + for id_tensor_shape in [[4, 3], [2, 4], [3, 3, 3]]: + output_rank = len(id_tensor_shape) + a = fc.sparse_column_with_integerized_feature("a", vocab_size) + weighted = fc.weighted_sparse_column(a, "weights") + one_hot = fc.one_hot_column(weighted) + id_tensor, weight_tensor = _sparse_id_tensor_with_weights( + id_tensor_shape, vocab_size) + + one_hot_output = one_hot._to_dnn_input_layer( + (id_tensor, weight_tensor), + output_rank=output_rank) + one_hot_output_shape = one_hot_output.get_shape().as_list() + expected_shape = id_tensor_shape[:-1] + [vocab_size] + self.assertEquals(expected_shape, one_hot_output_shape) + with self.test_session() as sess: + one_hot_value = sess.run(one_hot_output) + self.assertEquals(expected_shape, list(one_hot_value.shape)) + def testOneHotColumnWithSparseColumnWithHashKeys(self): input_values = ["marlo", "unknown", "omar"] inputs = constant_op.constant(input_values) |