aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <nobody@tensorflow.org>2016-03-15 13:49:34 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-03-15 15:16:03 -0700
commit0d18da7bdfae192896e87adb8a69199f9b1386b5 (patch)
tree75d1cad727060b2d1c215305945a2907fd5f8bd4
parente42c2af56b054194cd72f46a717fb9a5506071a4 (diff)
Removing the check for duplicate ids.
Change: 117280722
-rw-r--r--tensorflow/contrib/linear_optimizer/kernels/sdca_ops.cc14
-rw-r--r--tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py29
2 files changed, 2 insertions, 41 deletions
diff --git a/tensorflow/contrib/linear_optimizer/kernels/sdca_ops.cc b/tensorflow/contrib/linear_optimizer/kernels/sdca_ops.cc
index c2b403dbe0..852ae2a04f 100644
--- a/tensorflow/contrib/linear_optimizer/kernels/sdca_ops.cc
+++ b/tensorflow/contrib/linear_optimizer/kernels/sdca_ops.cc
@@ -510,20 +510,6 @@ class SdcaSolver : public OpKernel {
"The number of example ids (%ld) should match the number "
"of example weights (%d).",
example_ids.size(), num_examples)));
- const int64 num_duplicate_example_ids = [&] {
- // TODO(katsiapis): Benchmark and/or optimize.
- std::unordered_set<StringPiece, StringPiece::Hasher> unique_ids(
- example_ids.size());
- for (size_t i = 0; i < example_ids.size(); ++i) {
- unique_ids.emplace(example_ids(i));
- }
- return example_ids.size() - unique_ids.size();
- }();
- OP_REQUIRES(context, num_duplicate_example_ids == 0,
- errors::InvalidArgument(strings::Printf(
- "Detected %lld duplicates in example_ids, which usually "
- "indicates a bug in the input data.",
- num_duplicate_example_ids)));
OpMutableInputList sparse_weights_inputs;
OP_REQUIRES_OK(context, context->mutable_input_list(
diff --git a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py
index 3d812a2cbf..e973a88bb7 100644
--- a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py
+++ b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py
@@ -172,33 +172,6 @@ class SdcaOptimizerTest(TensorFlowTestCase):
'No examples found or all examples have zero weight.'):
lr.approximate_duality_gap().eval()
- def testDuplicateExampleIds(self):
- # Setup test data with 1 positive, and 1 negative example.
- example_protos = [
- make_example_proto(
- {'age': [0],
- 'gender': [0]}, 0),
- make_example_proto(
- {'age': [1],
- 'gender': [1]}, 1),
- ]
- example_weights = [1.0, 1.0]
- with self._single_threaded_test_session():
- examples = make_example_dict(example_protos, example_weights)
- examples['example_ids'] = ['duplicate_id'
- for _ in examples['example_ids']]
- variables = make_variable_dict(1, 1)
- options = dict(symmetric_l2_regularization=0.5,
- symmetric_l1_regularization=0,
- loss_type='squared_loss')
-
- lr = SdcaModel(CONTAINER, examples, variables, options)
- tf.initialize_all_variables().run()
- self.assertAllClose([0.0, 0.0], lr.predictions(examples).eval())
- with self.assertRaisesOpError('Detected 1 duplicates in example_ids'):
- lr.minimize().run()
- self.assertAllClose([0.0, 0.0], lr.predictions(examples).eval())
-
class SdcaWithLogisticLossTest(SdcaOptimizerTest):
"""SDCA optimizer test class for logistic loss."""
@@ -464,6 +437,8 @@ class SdcaWithLogisticLossTest(SdcaOptimizerTest):
rtol=1e-2,
atol=1e-2)
+ # TODO(katsiaspis): add a test for the case when examples at the end of an
+ # epoch are repeated, since example id may be duplicated.
class SdcaWithLinearLossTest(SdcaOptimizerTest):
"""SDCA optimizer test class for linear (squared) loss."""