diff options
author | 2016-03-15 13:49:34 -0800 | |
---|---|---|
committer | 2016-03-15 15:16:03 -0700 | |
commit | 0d18da7bdfae192896e87adb8a69199f9b1386b5 (patch) | |
tree | 75d1cad727060b2d1c215305945a2907fd5f8bd4 | |
parent | e42c2af56b054194cd72f46a717fb9a5506071a4 (diff) |
Removing the check for duplicate ids.
Change: 117280722
-rw-r--r-- | tensorflow/contrib/linear_optimizer/kernels/sdca_ops.cc | 14 | ||||
-rw-r--r-- | tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py | 29 |
2 files changed, 2 insertions, 41 deletions
diff --git a/tensorflow/contrib/linear_optimizer/kernels/sdca_ops.cc b/tensorflow/contrib/linear_optimizer/kernels/sdca_ops.cc index c2b403dbe0..852ae2a04f 100644 --- a/tensorflow/contrib/linear_optimizer/kernels/sdca_ops.cc +++ b/tensorflow/contrib/linear_optimizer/kernels/sdca_ops.cc @@ -510,20 +510,6 @@ class SdcaSolver : public OpKernel { "The number of example ids (%ld) should match the number " "of example weights (%d).", example_ids.size(), num_examples))); - const int64 num_duplicate_example_ids = [&] { - // TODO(katsiapis): Benchmark and/or optimize. - std::unordered_set<StringPiece, StringPiece::Hasher> unique_ids( - example_ids.size()); - for (size_t i = 0; i < example_ids.size(); ++i) { - unique_ids.emplace(example_ids(i)); - } - return example_ids.size() - unique_ids.size(); - }(); - OP_REQUIRES(context, num_duplicate_example_ids == 0, - errors::InvalidArgument(strings::Printf( - "Detected %lld duplicates in example_ids, which usually " - "indicates a bug in the input data.", - num_duplicate_example_ids))); OpMutableInputList sparse_weights_inputs; OP_REQUIRES_OK(context, context->mutable_input_list( diff --git a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py index 3d812a2cbf..e973a88bb7 100644 --- a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py +++ b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py @@ -172,33 +172,6 @@ class SdcaOptimizerTest(TensorFlowTestCase): 'No examples found or all examples have zero weight.'): lr.approximate_duality_gap().eval() - def testDuplicateExampleIds(self): - # Setup test data with 1 positive, and 1 negative example. - example_protos = [ - make_example_proto( - {'age': [0], - 'gender': [0]}, 0), - make_example_proto( - {'age': [1], - 'gender': [1]}, 1), - ] - example_weights = [1.0, 1.0] - with self._single_threaded_test_session(): - examples = make_example_dict(example_protos, example_weights) - examples['example_ids'] = ['duplicate_id' - for _ in examples['example_ids']] - variables = make_variable_dict(1, 1) - options = dict(symmetric_l2_regularization=0.5, - symmetric_l1_regularization=0, - loss_type='squared_loss') - - lr = SdcaModel(CONTAINER, examples, variables, options) - tf.initialize_all_variables().run() - self.assertAllClose([0.0, 0.0], lr.predictions(examples).eval()) - with self.assertRaisesOpError('Detected 1 duplicates in example_ids'): - lr.minimize().run() - self.assertAllClose([0.0, 0.0], lr.predictions(examples).eval()) - class SdcaWithLogisticLossTest(SdcaOptimizerTest): """SDCA optimizer test class for logistic loss.""" @@ -464,6 +437,8 @@ class SdcaWithLogisticLossTest(SdcaOptimizerTest): rtol=1e-2, atol=1e-2) + # TODO(katsiaspis): add a test for the case when examples at the end of an + # epoch are repeated, since example id may be duplicated. class SdcaWithLinearLossTest(SdcaOptimizerTest): """SDCA optimizer test class for linear (squared) loss.""" |