From 6c391166b8b6ba43d2b0151e6fb9cf14864131a2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Oct 2018 16:23:35 -0700 Subject: Add 'remove' operation to MutableHashTable and MutableDenseHashTable. PiperOrigin-RevId: 216443201 --- .../linear_optimizer/python/ops/sdca_ops.py | 3 +- .../python/ops/sharded_mutable_dense_hashtable.py | 2 + .../ops/sharded_mutable_dense_hashtable_test.py | 6 + tensorflow/contrib/lookup/lookup_ops.py | 81 ++++- tensorflow/contrib/lookup/lookup_ops_test.py | 336 +++++++++++++++++---- .../timeseries/python/timeseries/math_utils.py | 19 +- .../python/timeseries/math_utils_test.py | 8 +- .../python/timeseries/state_management.py | 1 + 8 files changed, 378 insertions(+), 78 deletions(-) (limited to 'tensorflow/contrib') diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py index 48ac429701..b5099a0bf6 100644 --- a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py +++ b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py @@ -152,7 +152,8 @@ class SdcaModel(object): default_value=[0.0, 0.0, 0.0, 0.0], # SdcaFprint never returns 0 or 1 for the low64 bits, so this a safe # empty_key (that will never collide with actual payloads). - empty_key=[0, 0]) + empty_key=[0, 0], + deleted_key=[1, 1]) summary.scalar('approximate_duality_gap', self.approximate_duality_gap()) summary.scalar('examples_seen', self._hashtable.size()) diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable.py b/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable.py index 5015fb0848..44a869f7c2 100644 --- a/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable.py +++ b/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable.py @@ -48,6 +48,7 @@ class ShardedMutableDenseHashTable(lookup.LookupInterface): value_dtype, default_value, empty_key, + deleted_key, num_shards=1, checkpoint=True, name='ShardedMutableHashTable'): @@ -62,6 +63,7 @@ class ShardedMutableDenseHashTable(lookup.LookupInterface): value_dtype=value_dtype, default_value=default_value, empty_key=empty_key, + deleted_key=deleted_key, checkpoint=checkpoint, name='%s-%d-of-%d' % (name, i + 1, num_shards))) self._table_shards = table_shards diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable_test.py b/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable_test.py index 553b116a3b..2b56d0fa3a 100644 --- a/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable_test.py +++ b/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable_test.py @@ -33,6 +33,7 @@ class ShardedMutableDenseHashTableTest(TensorFlowTestCase): with self.cached_session(): default_val = -1 empty_key = 0 + deleted_key = -1 keys = constant_op.constant([11, 12, 13], dtypes.int64) values = constant_op.constant([0, 1, 2], dtypes.int64) table = ShardedMutableDenseHashTable( @@ -40,6 +41,7 @@ class ShardedMutableDenseHashTableTest(TensorFlowTestCase): dtypes.int64, default_val, empty_key, + deleted_key, num_shards=num_shards) self.assertAllEqual(0, table.size().eval()) @@ -56,6 +58,7 @@ class ShardedMutableDenseHashTableTest(TensorFlowTestCase): with self.cached_session(): default_val = [-0.1, 0.2] empty_key = [0, 1] + deleted_key = [1, 0] keys = constant_op.constant([[11, 12], [13, 14], [15, 16]], dtypes.int64) values = constant_op.constant([[0.5, 0.6], [1.5, 1.6], [2.5, 2.6]], @@ -65,6 +68,7 @@ class ShardedMutableDenseHashTableTest(TensorFlowTestCase): dtypes.float32, default_val, empty_key, + deleted_key, num_shards=num_shards) self.assertAllEqual(0, table.size().eval()) @@ -81,6 +85,7 @@ class ShardedMutableDenseHashTableTest(TensorFlowTestCase): def testExportSharded(self): with self.cached_session(): empty_key = -2 + deleted_key = -3 default_val = -1 num_shards = 2 keys = constant_op.constant([10, 11, 12], dtypes.int64) @@ -90,6 +95,7 @@ class ShardedMutableDenseHashTableTest(TensorFlowTestCase): dtypes.int64, default_val, empty_key, + deleted_key, num_shards=num_shards) self.assertAllEqual(0, table.size().eval()) diff --git a/tensorflow/contrib/lookup/lookup_ops.py b/tensorflow/contrib/lookup/lookup_ops.py index f83765a48d..5abef822e8 100644 --- a/tensorflow/contrib/lookup/lookup_ops.py +++ b/tensorflow/contrib/lookup/lookup_ops.py @@ -292,8 +292,8 @@ def index_to_string(tensor, mapping, default_value="UNK", name=None): class MutableHashTable(LookupInterface, checkpointable.CheckpointableBase): """A generic mutable hash table implementation. - Data can be inserted by calling the insert method. It does not support - initialization via the init method. + Data can be inserted by calling the insert method and removed by calling the + remove method. It does not support initialization via the init method. Example usage: @@ -391,6 +391,34 @@ class MutableHashTable(LookupInterface, checkpointable.CheckpointableBase): with ops.colocate_with(self._table_ref): return gen_lookup_ops.lookup_table_size_v2(self._table_ref, name=name) + def remove(self, keys, name=None): + """Removes `keys` and its associated values from the table. + + If a key is not present in the table, it is silently ignored. + + Args: + keys: Keys to remove. Can be a tensor of any shape. Must match the table's + key type. + name: A name for the operation (optional). + + Returns: + The created Operation. + + Raises: + TypeError: when `keys` do not match the table data types. + """ + if keys.dtype != self._key_dtype: + raise TypeError("Signature mismatch. Keys must be dtype %s, got %s." % + (self._key_dtype, keys.dtype)) + + with ops.name_scope(name, "%s_lookup_table_remove" % self._name, + (self._table_ref, keys, self._default_value)) as name: + # pylint: disable=protected-access + op = gen_lookup_ops.lookup_table_remove_v2( + self._table_ref, keys, name=name) + + return op + def lookup(self, keys, name=None): """Looks up `keys` in a table, outputs the corresponding values. @@ -487,11 +515,11 @@ class MutableHashTable(LookupInterface, checkpointable.CheckpointableBase): class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase): """A generic mutable hash table implementation using tensors as backing store. - Data can be inserted by calling the insert method. It does not support - initialization via the init method. + Data can be inserted by calling the insert method and removed by calling the + remove method. It does not support initialization via the init method. It uses "open addressing" with quadratic reprobing to resolve collisions. - Compared to `MutableHashTable` the insert and lookup operations in a + Compared to `MutableHashTable` the insert, remove and lookup operations in a `MutableDenseHashTable` are typically faster, but memory usage can be higher. However, `MutableDenseHashTable` does not require additional memory for temporary tensors created during checkpointing and restore operations. @@ -502,7 +530,9 @@ class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase): table = tf.contrib.lookup.MutableDenseHashTable(key_dtype=tf.int64, value_dtype=tf.int64, default_value=-1, - empty_key=0) + empty_key=0, + deleted_key=-1) + sess.run(table.insert(keys, values)) out = table.lookup(query_keys) print(out.eval()) @@ -516,6 +546,7 @@ class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase): value_dtype, default_value, empty_key, + deleted_key, initial_num_buckets=None, shared_name=None, name="MutableDenseHashTable", @@ -530,7 +561,7 @@ class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase): value_dtype: the type of the value tensors. default_value: The value to use if a key is missing in the table. empty_key: the key to use to represent empty buckets internally. Must not - be used in insert or lookup operations. + be used in insert, remove or lookup operations. initial_num_buckets: the initial number of buckets. shared_name: If non-empty, this table will be shared under the given name across multiple sessions. @@ -538,9 +569,12 @@ class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase): checkpoint: if True, the contents of the table are saved to and restored from checkpoints. If `shared_name` is empty for a checkpointed table, it is shared using the table node name. + deleted_key: the key to use to represent deleted buckets internally. Must + not be used in insert, remove or lookup operations and be different from + the empty_key. Returns: - A `MutableHashTable` object. + A `MutableDenseHashTable` object. Raises: ValueError: If checkpoint is True and no name was specified. @@ -555,6 +589,8 @@ class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase): use_node_name_sharing = checkpoint and shared_name is None empty_key = ops.convert_to_tensor( empty_key, dtype=key_dtype, name="empty_key") + deleted_key = ops.convert_to_tensor( + deleted_key, dtype=key_dtype, name="deleted_key") executing_eagerly = context.executing_eagerly() if executing_eagerly and shared_name is None: # TODO(allenl): This will leak memory due to kernel caching by the @@ -564,6 +600,7 @@ class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase): shared_name = "table_%d" % (ops.uid(),) self._table_ref = gen_lookup_ops.mutable_dense_hash_table_v2( empty_key=empty_key, + deleted_key=deleted_key, shared_name=shared_name, use_node_name_sharing=use_node_name_sharing, value_dtype=value_dtype, @@ -648,6 +685,34 @@ class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase): self._table_ref, keys, values, name=name) return op + def remove(self, keys, name=None): + """Removes `keys` and its associated values from the table. + + If a key is not present in the table, it is silently ignored. + + Args: + keys: Keys to remove. Can be a tensor of any shape. Must match the table's + key type. + name: A name for the operation (optional). + + Returns: + The created Operation. + + Raises: + TypeError: when `keys` do not match the table data types. + """ + if keys.dtype != self._key_dtype: + raise TypeError("Signature mismatch. Keys must be dtype %s, got %s." % + (self._key_dtype, keys.dtype)) + + with ops.name_scope(name, "%s_lookup_table_remove" % self._name, + (self._table_ref, keys, self._default_value)) as name: + # pylint: disable=protected-access + op = gen_lookup_ops.lookup_table_remove_v2( + self._table_ref, keys, name=name) + + return op + def export(self, name=None): """Returns tensors of all keys and values in the table. diff --git a/tensorflow/contrib/lookup/lookup_ops_test.py b/tensorflow/contrib/lookup/lookup_ops_test.py index 9e9345e875..35b0d1bc44 100644 --- a/tensorflow/contrib/lookup/lookup_ops_test.py +++ b/tensorflow/contrib/lookup/lookup_ops_test.py @@ -303,13 +303,17 @@ class MutableHashTableOpTest(test.TestCase): def testMutableHashTable(self): with self.cached_session(): default_val = -1 - keys = constant_op.constant(["brain", "salad", "surgery"]) - values = constant_op.constant([0, 1, 2], dtypes.int64) + keys = constant_op.constant(["brain", "salad", "surgery", "tarkus"]) + values = constant_op.constant([0, 1, 2, 3], dtypes.int64) table = lookup.MutableHashTable(dtypes.string, dtypes.int64, default_val) self.assertAllEqual(0, table.size().eval()) table.insert(keys, values).run() + self.assertAllEqual(4, table.size().eval()) + + remove_string = constant_op.constant(["tarkus", "tank"]) + table.remove(remove_string).run() self.assertAllEqual(3, table.size().eval()) input_string = constant_op.constant(["brain", "salad", "tank"]) @@ -472,13 +476,18 @@ class MutableHashTableOpTest(test.TestCase): def testMutableHashTableOfTensors(self): with self.cached_session(): default_val = constant_op.constant([-1, -1], dtypes.int64) - keys = constant_op.constant(["brain", "salad", "surgery"]) - values = constant_op.constant([[0, 1], [2, 3], [4, 5]], dtypes.int64) + keys = constant_op.constant(["brain", "salad", "surgery", "tarkus"]) + values = constant_op.constant([[0, 1], [2, 3], [4, 5], [6, 7]], + dtypes.int64) table = lookup.MutableHashTable(dtypes.string, dtypes.int64, default_val) self.assertAllEqual(0, table.size().eval()) table.insert(keys, values).run() + self.assertAllEqual(4, table.size().eval()) + + remove_string = constant_op.constant(["tarkus", "tank"]) + table.remove(remove_string).run() self.assertAllEqual(3, table.size().eval()) input_string = constant_op.constant(["brain", "salad", "tank"]) @@ -624,6 +633,26 @@ class MutableHashTableOpTest(test.TestCase): result = output.eval() self.assertAllEqual([0, 1, 3, -1], result) + def testMutableHashTableRemoveHighRank(self): + with self.test_session(): + default_val = -1 + keys = constant_op.constant([["brain", "salad"], ["surgery", "tank"]]) + values = constant_op.constant([[0, 1], [2, 3]], dtypes.int64) + table = lookup.MutableHashTable(dtypes.string, dtypes.int64, default_val) + + table.insert(keys, values).run() + self.assertAllEqual(4, table.size().eval()) + + remove_string = constant_op.constant(["salad", "tarkus"]) + table.remove(remove_string).run() + self.assertAllEqual(3, table.size().eval()) + + input_string = constant_op.constant(["brain", "salad", "tank", "tarkus"]) + output = table.lookup(input_string) + + result = output.eval() + self.assertAllEqual([0, -1, 3, -1], result) + def testMutableHashTableOfTensorsFindHighRank(self): with self.cached_session(): default_val = constant_op.constant([-1, -1, -1], dtypes.int64) @@ -645,6 +674,30 @@ class MutableHashTableOpTest(test.TestCase): self.assertAllEqual( [[[0, 1, 2], [2, 3, 4]], [[-1, -1, -1], [-1, -1, -1]]], result) + def testMutableHashTableOfTensorsRemoveHighRank(self): + with self.test_session(): + default_val = constant_op.constant([-1, -1, -1], dtypes.int64) + keys = constant_op.constant(["brain", "salad", "surgery"]) + values = constant_op.constant([[0, 1, 2], [2, 3, 4], [4, 5, 6]], + dtypes.int64) + table = lookup.MutableHashTable(dtypes.string, dtypes.int64, default_val) + + table.insert(keys, values).run() + self.assertAllEqual(3, table.size().eval()) + + remove_string = constant_op.constant([["brain", "tank"]]) + table.remove(remove_string).run() + self.assertAllEqual(2, table.size().eval()) + + input_string = constant_op.constant([["brain", "salad"], + ["surgery", "tank"]]) + output = table.lookup(input_string) + self.assertAllEqual([2, 2, 3], output.get_shape()) + + result = output.eval() + self.assertAllEqual( + [[[-1, -1, -1], [2, 3, 4]], [[4, 5, 6], [-1, -1, -1]]], result) + def testMultipleMutableHashTables(self): with self.cached_session() as sess: default_val = -1 @@ -792,13 +845,22 @@ class MutableDenseHashTableOpTest(test.TestCase): def testBasic(self): with self.cached_session(): - keys = constant_op.constant([11, 12, 13], dtypes.int64) - values = constant_op.constant([0, 1, 2], dtypes.int64) + + keys = constant_op.constant([11, 12, 13, 14], dtypes.int64) + values = constant_op.constant([0, 1, 2, 3], dtypes.int64) table = lookup.MutableDenseHashTable( - dtypes.int64, dtypes.int64, default_value=-1, empty_key=0) + dtypes.int64, + dtypes.int64, + default_value=-1, + empty_key=0, + deleted_key=-1) self.assertAllEqual(0, table.size().eval()) table.insert(keys, values).run() + self.assertAllEqual(4, table.size().eval()) + + remove_string = constant_op.constant([12, 15], dtypes.int64) + table.remove(remove_string).run() self.assertAllEqual(3, table.size().eval()) input_string = constant_op.constant([11, 12, 15], dtypes.int64) @@ -806,17 +868,26 @@ class MutableDenseHashTableOpTest(test.TestCase): self.assertAllEqual([3], output.get_shape()) result = output.eval() - self.assertAllEqual([0, 1, -1], result) + self.assertAllEqual([0, -1, -1], result) def testBasicBool(self): with self.cached_session(): - keys = constant_op.constant([11, 12, 13], dtypes.int64) - values = constant_op.constant([True, True, True], dtypes.bool) + + keys = constant_op.constant([11, 12, 13, 14], dtypes.int64) + values = constant_op.constant([True, True, True, True], dtypes.bool) table = lookup.MutableDenseHashTable( - dtypes.int64, dtypes.bool, default_value=False, empty_key=0) + dtypes.int64, + dtypes.bool, + default_value=False, + empty_key=0, + deleted_key=-1) self.assertAllEqual(0, table.size().eval()) table.insert(keys, values).run() + self.assertAllEqual(4, table.size().eval()) + + remove_string = constant_op.constant([11, 15], dtypes.int64) + table.remove(remove_string).run() self.assertAllEqual(3, table.size().eval()) input_string = constant_op.constant([11, 12, 15], dtypes.int64) @@ -824,14 +895,30 @@ class MutableDenseHashTableOpTest(test.TestCase): self.assertAllEqual([3], output.get_shape()) result = output.eval() - self.assertAllEqual([True, True, False], result) + self.assertAllEqual([False, True, False], result) + + def testSameEmptyAndDeletedKey(self): + with self.cached_session(): + with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, + "deleted_key"): + table = lookup.MutableDenseHashTable( + dtypes.int64, + dtypes.int64, + default_value=-1, + empty_key=42, + deleted_key=42) + self.assertAllEqual(0, table.size().eval()) def testLookupUnknownShape(self): with self.cached_session(): keys = constant_op.constant([11, 12, 13], dtypes.int64) values = constant_op.constant([0, 1, 2], dtypes.int64) table = lookup.MutableDenseHashTable( - dtypes.int64, dtypes.int64, default_value=-1, empty_key=0) + dtypes.int64, + dtypes.int64, + default_value=-1, + empty_key=0, + deleted_key=-1) table.insert(keys, values).run() self.assertAllEqual(3, table.size().eval()) @@ -844,45 +931,60 @@ class MutableDenseHashTableOpTest(test.TestCase): def testMapStringToFloat(self): with self.cached_session(): - keys = constant_op.constant(["a", "b", "c"], dtypes.string) - values = constant_op.constant([0.0, 1.1, 2.2], dtypes.float32) + + keys = constant_op.constant(["a", "b", "c", "d"], dtypes.string) + values = constant_op.constant([0.0, 1.1, 2.2, 3.3], dtypes.float32) default_value = constant_op.constant(-1.5, dtypes.float32) table = lookup.MutableDenseHashTable( dtypes.string, dtypes.float32, default_value=default_value, - empty_key="") + empty_key="", + deleted_key="$") self.assertAllEqual(0, table.size().eval()) table.insert(keys, values).run() + self.assertAllEqual(4, table.size().eval()) + + remove_string = constant_op.constant(["b", "e"]) + table.remove(remove_string).run() self.assertAllEqual(3, table.size().eval()) - input_string = constant_op.constant(["a", "b", "d"], dtypes.string) + input_string = constant_op.constant(["a", "b", "d", "e"], dtypes.string) output = table.lookup(input_string) - self.assertAllEqual([3], output.get_shape()) + self.assertAllEqual([4], output.get_shape()) result = output.eval() - self.assertAllClose([0, 1.1, -1.5], result) + self.assertAllClose([0, -1.5, 3.3, -1.5], result) def testMapInt64ToFloat(self): for float_dtype in [dtypes.float32, dtypes.float64]: with self.cached_session(): - keys = constant_op.constant([11, 12, 13], dtypes.int64) - values = constant_op.constant([0.0, 1.1, 2.2], float_dtype) + + keys = constant_op.constant([11, 12, 13, 14], dtypes.int64) + values = constant_op.constant([0.0, 1.1, 2.2, 3.3], float_dtype) default_value = constant_op.constant(-1.5, float_dtype) table = lookup.MutableDenseHashTable( - dtypes.int64, float_dtype, default_value=default_value, empty_key=0) + dtypes.int64, + float_dtype, + default_value=default_value, + empty_key=0, + deleted_key=-1) self.assertAllEqual(0, table.size().eval()) table.insert(keys, values).run() + self.assertAllEqual(4, table.size().eval()) + + remove_string = constant_op.constant([12, 15], dtypes.int64) + table.remove(remove_string).run() self.assertAllEqual(3, table.size().eval()) - input_string = constant_op.constant([11, 12, 15], dtypes.int64) + input_string = constant_op.constant([11, 12, 14, 15], dtypes.int64) output = table.lookup(input_string) - self.assertAllEqual([3], output.get_shape()) + self.assertAllEqual([4], output.get_shape()) result = output.eval() - self.assertAllClose([0, 1.1, -1.5], result) + self.assertAllClose([0, -1.5, 3.3, -1.5], result) def testVectorValues(self): with self.cached_session(): @@ -895,6 +997,7 @@ class MutableDenseHashTableOpTest(test.TestCase): dtypes.int64, default_value=default_value, empty_key=0, + deleted_key=-1, initial_num_buckets=4) self.assertAllEqual(0, table.size().eval()) @@ -908,26 +1011,35 @@ class MutableDenseHashTableOpTest(test.TestCase): self.assertAllEqual(4, table.size().eval()) self.assertAllEqual(8, len(table.export()[0].eval())) - input_string = constant_op.constant([11, 12, 15], dtypes.int64) + remove_string = constant_op.constant([12, 16], dtypes.int64) + table.remove(remove_string).run() + self.assertAllEqual(3, table.size().eval()) + self.assertAllEqual(8, len(table.export()[0].eval())) + + input_string = constant_op.constant([11, 12, 14, 15], dtypes.int64) output = table.lookup(input_string) - self.assertAllEqual( - [3, 4], output.shape, msg="Saw shape: %s" % output.shape) + self.assertAllEqual([4, 4], + output.shape, + msg="Saw shape: %s" % output.shape) result = output.eval() - self.assertAllEqual([[0, 1, 2, 3], [3, 4, 5, 6], [-1, -2, -3, -4]], - result) + self.assertAllEqual( + [[0, 1, 2, 3], [-1, -2, -3, -4], [2, 3, 4, 5], [-1, -2, -3, -4]], + result) def testVectorKeys(self): with self.cached_session(): keys = constant_op.constant([[0, 1], [1, 2], [1, 3]], dtypes.int64) values = constant_op.constant([10, 11, 12], dtypes.int64) empty_key = constant_op.constant([0, 3], dtypes.int64) + deleted_key = constant_op.constant([-1, -1], dtypes.int64) default_value = constant_op.constant(-1, dtypes.int64) table = lookup.MutableDenseHashTable( dtypes.int64, dtypes.int64, default_value=default_value, empty_key=empty_key, + deleted_key=deleted_key, initial_num_buckets=8) self.assertAllEqual(0, table.size().eval()) @@ -940,13 +1052,18 @@ class MutableDenseHashTableOpTest(test.TestCase): self.assertAllEqual(4, table.size().eval()) self.assertAllEqual(8, len(table.export()[0].eval())) - input_string = constant_op.constant([[0, 1], [1, 2], [0, 2]], + remove_string = constant_op.constant([[1, 2], [7, 8]], dtypes.int64) + table.remove(remove_string).run() + self.assertAllEqual(3, table.size().eval()) + self.assertAllEqual(8, len(table.export()[0].eval())) + + input_string = constant_op.constant([[0, 1], [1, 2], [1, 3], [0, 2]], dtypes.int64) output = table.lookup(input_string) - self.assertAllEqual([3], output.get_shape()) + self.assertAllEqual([4], output.get_shape()) result = output.eval() - self.assertAllEqual([10, 11, -1], result) + self.assertAllEqual([10, -1, 12, -1], result) def testResize(self): with self.cached_session(): @@ -957,6 +1074,7 @@ class MutableDenseHashTableOpTest(test.TestCase): dtypes.int64, default_value=-1, empty_key=0, + deleted_key=-1, initial_num_buckets=4) self.assertAllEqual(0, table.size().eval()) @@ -964,31 +1082,42 @@ class MutableDenseHashTableOpTest(test.TestCase): self.assertAllEqual(3, table.size().eval()) self.assertAllEqual(4, len(table.export()[0].eval())) - keys2 = constant_op.constant([13, 14, 15, 16, 17], dtypes.int64) - values2 = constant_op.constant([3, 4, 5, 6, 7], dtypes.int64) + keys2 = constant_op.constant([12, 99], dtypes.int64) + table.remove(keys2).run() + self.assertAllEqual(2, table.size().eval()) + self.assertAllEqual(4, len(table.export()[0].eval())) + + keys3 = constant_op.constant([13, 14, 15, 16, 17], dtypes.int64) + values3 = constant_op.constant([3, 4, 5, 6, 7], dtypes.int64) - table.insert(keys2, values2).run() - self.assertAllEqual(7, table.size().eval()) + table.insert(keys3, values3).run() + self.assertAllEqual(6, table.size().eval()) self.assertAllEqual(16, len(table.export()[0].eval())) - keys3 = constant_op.constant([10, 11, 12, 13, 14, 15, 16, 17, 18], + keys4 = constant_op.constant([10, 11, 12, 13, 14, 15, 16, 17, 18], dtypes.int64) - output = table.lookup(keys3) - self.assertAllEqual([-1, 0, 1, 3, 4, 5, 6, 7, -1], output.eval()) + output = table.lookup(keys4) + self.assertAllEqual([-1, 0, -1, 3, 4, 5, 6, 7, -1], output.eval()) def testExport(self): with self.cached_session(): - keys = constant_op.constant([11, 12, 13], dtypes.int64) - values = constant_op.constant([1, 2, 3], dtypes.int64) + + keys = constant_op.constant([11, 12, 13, 14], dtypes.int64) + values = constant_op.constant([1, 2, 3, 4], dtypes.int64) table = lookup.MutableDenseHashTable( dtypes.int64, dtypes.int64, default_value=-1, empty_key=100, + deleted_key=200, initial_num_buckets=8) self.assertAllEqual(0, table.size().eval()) table.insert(keys, values).run() + self.assertAllEqual(4, table.size().eval()) + + keys2 = constant_op.constant([12, 15], dtypes.int64) + table.remove(keys2).run() self.assertAllEqual(3, table.size().eval()) exported_keys, exported_values = table.export() @@ -1005,8 +1134,8 @@ class MutableDenseHashTableOpTest(test.TestCase): pairs = np.dstack((np_keys.flatten(), np_values.flatten()))[0] # sort by key pairs = pairs[pairs[:, 0].argsort()] - self.assertAllEqual([[11, 1], [12, 2], [13, 3], [100, 0], [100, 0], - [100, 0], [100, 0], [100, 0]], pairs) + self.assertAllEqual([[11, 1], [13, 3], [14, 4], [100, 0], [100, 0], + [100, 0], [100, 0], [200, 2]], pairs) def testSaveRestore(self): save_dir = os.path.join(self.get_temp_dir(), "save_restore") @@ -1015,13 +1144,15 @@ class MutableDenseHashTableOpTest(test.TestCase): with self.session(graph=ops.Graph()) as sess: default_value = -1 empty_key = 0 - keys = constant_op.constant([11, 12, 13], dtypes.int64) - values = constant_op.constant([0, 1, 2], dtypes.int64) + deleted_key = -1 + keys = constant_op.constant([11, 12, 13, 14], dtypes.int64) + values = constant_op.constant([0, 1, 2, 3], dtypes.int64) table = lookup.MutableDenseHashTable( dtypes.int64, dtypes.int64, default_value=default_value, empty_key=empty_key, + deleted_key=deleted_key, name="t1", checkpoint=True, initial_num_buckets=32) @@ -1030,6 +1161,11 @@ class MutableDenseHashTableOpTest(test.TestCase): self.assertAllEqual(0, table.size().eval()) table.insert(keys, values).run() + self.assertAllEqual(4, table.size().eval()) + self.assertAllEqual(32, len(table.export()[0].eval())) + + keys2 = constant_op.constant([12, 15], dtypes.int64) + table.remove(keys2).run() self.assertAllEqual(3, table.size().eval()) self.assertAllEqual(32, len(table.export()[0].eval())) @@ -1043,6 +1179,7 @@ class MutableDenseHashTableOpTest(test.TestCase): dtypes.int64, default_value=default_value, empty_key=empty_key, + deleted_key=deleted_key, name="t1", checkpoint=True, initial_num_buckets=64) @@ -1062,7 +1199,7 @@ class MutableDenseHashTableOpTest(test.TestCase): input_string = constant_op.constant([10, 11, 12, 13, 14], dtypes.int64) output = table.lookup(input_string) - self.assertAllEqual([-1, 0, 1, 2, -1], output.eval()) + self.assertAllEqual([-1, 0, -1, 2, 3], output.eval()) @test_util.run_in_graph_and_eager_modes def testObjectSaveRestore(self): @@ -1071,6 +1208,7 @@ class MutableDenseHashTableOpTest(test.TestCase): default_value = -1 empty_key = 0 + deleted_key = -1 keys = constant_op.constant([11, 12, 13], dtypes.int64) values = constant_op.constant([0, 1, 2], dtypes.int64) save_table = lookup.MutableDenseHashTable( @@ -1078,6 +1216,7 @@ class MutableDenseHashTableOpTest(test.TestCase): dtypes.int64, default_value=default_value, empty_key=empty_key, + deleted_key=deleted_key, name="t1", checkpoint=True, initial_num_buckets=32) @@ -1097,6 +1236,7 @@ class MutableDenseHashTableOpTest(test.TestCase): dtypes.int64, default_value=default_value, empty_key=empty_key, + deleted_key=deleted_key, name="t1", checkpoint=True, initial_num_buckets=64) @@ -1124,14 +1264,18 @@ class MutableDenseHashTableOpTest(test.TestCase): with self.session(graph=ops.Graph()) as sess: empty_key = constant_op.constant([11, 13], dtypes.int64) + deleted_key = constant_op.constant([-2, -3], dtypes.int64) default_value = constant_op.constant([-1, -2], dtypes.int64) - keys = constant_op.constant([[11, 12], [11, 14], [13, 14]], dtypes.int64) - values = constant_op.constant([[0, 1], [2, 3], [4, 5]], dtypes.int64) + keys = constant_op.constant([[11, 12], [11, 14], [12, 13], [13, 14]], + dtypes.int64) + values = constant_op.constant([[0, 1], [2, 3], [2, 4], [4, 5]], + dtypes.int64) table = lookup.MutableDenseHashTable( dtypes.int64, dtypes.int64, default_value=default_value, empty_key=empty_key, + deleted_key=deleted_key, name="t1", checkpoint=True, initial_num_buckets=32) @@ -1140,6 +1284,11 @@ class MutableDenseHashTableOpTest(test.TestCase): self.assertAllEqual(0, table.size().eval()) table.insert(keys, values).run() + self.assertAllEqual(4, table.size().eval()) + self.assertAllEqual(32, len(table.export()[0].eval())) + + keys2 = constant_op.constant([[12, 13], [16, 17]], dtypes.int64) + table.remove(keys2).run() self.assertAllEqual(3, table.size().eval()) self.assertAllEqual(32, len(table.export()[0].eval())) @@ -1149,12 +1298,14 @@ class MutableDenseHashTableOpTest(test.TestCase): with self.session(graph=ops.Graph()) as sess: empty_key = constant_op.constant([11, 13], dtypes.int64) + deleted_key = constant_op.constant([-2, -3], dtypes.int64) default_value = constant_op.constant([-1, -2], dtypes.int64) table = lookup.MutableDenseHashTable( dtypes.int64, dtypes.int64, default_value=default_value, empty_key=empty_key, + deleted_key=deleted_key, name="t1", checkpoint=True, initial_num_buckets=64) @@ -1184,14 +1335,17 @@ class MutableDenseHashTableOpTest(test.TestCase): with self.session(graph=ops.Graph()) as sess: empty_key = constant_op.constant([11, 13], dtypes.int64) + deleted_key = constant_op.constant([-1, -1], dtypes.int64) default_value = constant_op.constant(-1, dtypes.int64) - keys = constant_op.constant([[11, 12], [11, 14], [13, 14]], dtypes.int64) - values = constant_op.constant([0, 1, 2], dtypes.int64) + keys = constant_op.constant([[11, 12], [11, 14], [12, 13], [13, 14]], + dtypes.int64) + values = constant_op.constant([0, 1, 2, 3], dtypes.int64) table = lookup.MutableDenseHashTable( dtypes.int64, dtypes.int64, default_value=default_value, empty_key=empty_key, + deleted_key=deleted_key, name="t2", checkpoint=True, initial_num_buckets=32) @@ -1200,6 +1354,11 @@ class MutableDenseHashTableOpTest(test.TestCase): self.assertAllEqual(0, table.size().eval()) table.insert(keys, values).run() + self.assertAllEqual(4, table.size().eval()) + self.assertAllEqual(32, len(table.export()[0].eval())) + + keys2 = constant_op.constant([[12, 13], [15, 16]], dtypes.int64) + table.remove(keys2).run() self.assertAllEqual(3, table.size().eval()) self.assertAllEqual(32, len(table.export()[0].eval())) @@ -1209,12 +1368,14 @@ class MutableDenseHashTableOpTest(test.TestCase): with self.session(graph=ops.Graph()) as sess: empty_key = constant_op.constant([11, 13], dtypes.int64) + deleted_key = constant_op.constant([-1, -1], dtypes.int64) default_value = constant_op.constant(-1, dtypes.int64) table = lookup.MutableDenseHashTable( dtypes.int64, dtypes.int64, default_value=default_value, empty_key=empty_key, + deleted_key=deleted_key, name="t2", checkpoint=True, initial_num_buckets=64) @@ -1235,7 +1396,7 @@ class MutableDenseHashTableOpTest(test.TestCase): input_string = constant_op.constant( [[11, 12], [11, 14], [11, 15], [13, 14], [13, 15]], dtypes.int64) output = table.lookup(input_string) - self.assertAllEqual([0, 1, -1, 2, -1], output.eval()) + self.assertAllEqual([0, 1, -1, 3, -1], output.eval()) def testReprobe(self): with self.cached_session(): @@ -1248,6 +1409,7 @@ class MutableDenseHashTableOpTest(test.TestCase): dtypes.int64, default_value=-1, empty_key=0, + deleted_key=-1, initial_num_buckets=8) self.assertAllEqual(0, table.size().eval()) @@ -1267,7 +1429,11 @@ class MutableDenseHashTableOpTest(test.TestCase): keys = constant_op.constant([11, 0, 13], dtypes.int64) values = constant_op.constant([0, 1, 2], dtypes.int64) table = lookup.MutableDenseHashTable( - dtypes.int64, dtypes.int64, default_value=-1, empty_key=12) + dtypes.int64, + dtypes.int64, + default_value=-1, + empty_key=12, + deleted_key=-1) self.assertAllEqual(0, table.size().eval()) table.insert(keys, values).run() @@ -1283,19 +1449,35 @@ class MutableDenseHashTableOpTest(test.TestCase): def testErrors(self): with self.cached_session(): table = lookup.MutableDenseHashTable( - dtypes.int64, dtypes.int64, default_value=-1, empty_key=0) + dtypes.int64, + dtypes.int64, + default_value=-1, + empty_key=0, + deleted_key=-1) # Inserting the empty key returns an error - keys = constant_op.constant([11, 0], dtypes.int64) - values = constant_op.constant([0, 1], dtypes.int64) + keys1 = constant_op.constant([11, 0], dtypes.int64) + values1 = constant_op.constant([0, 1], dtypes.int64) with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "empty_key"): - table.insert(keys, values).run() + table.insert(keys1, values1).run() # Looking up the empty key returns an error with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "empty_key"): - table.lookup(keys).eval() + table.lookup(keys1).eval() + + # Inserting the deleted key returns an error + keys2 = constant_op.constant([11, -1], dtypes.int64) + values2 = constant_op.constant([0, 1], dtypes.int64) + with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, + "deleted_key"): + table.insert(keys2, values2).run() + + # Looking up the empty key returns an error + with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, + "deleted_key"): + table.lookup(keys2).eval() # Arbitrary tensors of keys are not supported keys = constant_op.constant([[11, 0], [12, 1]], dtypes.int64) @@ -1312,11 +1494,43 @@ class MutableDenseHashTableOpTest(test.TestCase): dtypes.int64, default_value=-1, empty_key=17, + deleted_key=-1, initial_num_buckets=12) with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "Number of buckets must be"): self.assertAllEqual(0, table2.size().eval()) + with self.assertRaisesRegexp( + errors_impl.InvalidArgumentError, + "Empty and deleted keys must have same shape"): + table3 = lookup.MutableDenseHashTable( + dtypes.int64, + dtypes.int64, + default_value=-1, + empty_key=42, + deleted_key=[1, 2]) + self.assertAllEqual(0, table3.size().eval()) + + with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, + "Empty and deleted keys cannot be equal"): + table4 = lookup.MutableDenseHashTable( + dtypes.int64, + dtypes.int64, + default_value=-1, + empty_key=42, + deleted_key=42) + self.assertAllEqual(0, table4.size().eval()) + + with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, + "Empty and deleted keys cannot be equal"): + table5 = lookup.MutableDenseHashTable( + dtypes.int64, + dtypes.int64, + default_value=-1, + empty_key=[1, 2, 3], + deleted_key=[1, 2, 3]) + self.assertAllEqual(0, table5.size().eval()) + class IndexTableFromFile(test.TestCase): @@ -2558,7 +2772,11 @@ class MutableDenseHashTableBenchmark(MutableHashTableBenchmark): def _create_table(self): return lookup.MutableDenseHashTable( - dtypes.int64, dtypes.float32, default_value=0.0, empty_key=-1) + dtypes.int64, + dtypes.float32, + default_value=0.0, + empty_key=-1, + deleted_key=-2) if __name__ == "__main__": diff --git a/tensorflow/contrib/timeseries/python/timeseries/math_utils.py b/tensorflow/contrib/timeseries/python/timeseries/math_utils.py index 03da2b82e5..9c585fe6a7 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/math_utils.py +++ b/tensorflow/contrib/timeseries/python/timeseries/math_utils.py @@ -543,20 +543,25 @@ class TupleOfTensorsLookup(lookup.LookupInterface): overhead. """ - def __init__( - self, key_dtype, default_values, empty_key, name, checkpoint=True): + def __init__(self, + key_dtype, + default_values, + empty_key, + deleted_key, + name, + checkpoint=True): default_values_flat = nest.flatten(default_values) - self._hash_tables = nest.pack_sequence_as( - default_values, - [TensorValuedMutableDenseHashTable( + self._hash_tables = nest.pack_sequence_as(default_values, [ + TensorValuedMutableDenseHashTable( key_dtype=key_dtype, value_dtype=default_value.dtype.base_dtype, default_value=default_value, empty_key=empty_key, + deleted_key=deleted_key, name=name + "_{}".format(table_number), checkpoint=checkpoint) - for table_number, default_value - in enumerate(default_values_flat)]) + for table_number, default_value in enumerate(default_values_flat) + ]) self._name = name def lookup(self, keys): diff --git a/tensorflow/contrib/timeseries/python/timeseries/math_utils_test.py b/tensorflow/contrib/timeseries/python/timeseries/math_utils_test.py index c0de42b15b..91265b9b2e 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/math_utils_test.py +++ b/tensorflow/contrib/timeseries/python/timeseries/math_utils_test.py @@ -223,10 +223,12 @@ class TestLookupTable(test.TestCase): hash_table = math_utils.TupleOfTensorsLookup( key_dtype=dtypes.int64, default_values=[[ - array_ops.ones([3, 2], dtype=dtypes.float32), array_ops.zeros( - [5], dtype=dtypes.float64) - ], array_ops.ones([7, 7], dtype=dtypes.int64)], + array_ops.ones([3, 2], dtype=dtypes.float32), + array_ops.zeros([5], dtype=dtypes.float64) + ], + array_ops.ones([7, 7], dtype=dtypes.int64)], empty_key=-1, + deleted_key=-2, name="test_lookup") def stack_tensor(base_tensor): return array_ops.stack([base_tensor + 1, base_tensor + 2]) diff --git a/tensorflow/contrib/timeseries/python/timeseries/state_management.py b/tensorflow/contrib/timeseries/python/timeseries/state_management.py index 13eecd4d82..138406c616 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/state_management.py +++ b/tensorflow/contrib/timeseries/python/timeseries/state_management.py @@ -149,6 +149,7 @@ class ChainingStateManager(_OverridableStateManager): key_dtype=dtypes.int64, default_values=self._start_state, empty_key=-1, + deleted_key=-2, name="cached_states", checkpoint=self._checkpoint_state) -- cgit v1.2.3