From 01fb3ef3652d1040778c45c40b5517c4c474771d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 30 Jun 2016 14:59:48 -0800 Subject: Cleanup SDCA op based on additional review comments. Change: 126354256 --- .../contrib/linear_optimizer/kernels/sdca_ops.cc | 26 +++++++++++++--------- .../python/kernel_tests/sdca_ops_test.py | 9 +++++++- .../linear_optimizer/python/ops/sdca_ops.py | 3 +-- 3 files changed, 25 insertions(+), 13 deletions(-) diff --git a/tensorflow/contrib/linear_optimizer/kernels/sdca_ops.cc b/tensorflow/contrib/linear_optimizer/kernels/sdca_ops.cc index 921dad614c..15deca87e6 100644 --- a/tensorflow/contrib/linear_optimizer/kernels/sdca_ops.cc +++ b/tensorflow/contrib/linear_optimizer/kernels/sdca_ops.cc @@ -600,13 +600,13 @@ Status RunTrainStepsForMiniBatch( const DeviceBase::CpuWorkerThreads& worker_threads, const Regularizations& regularizations, const DualLossUpdater& loss_updater, FeaturesAndWeights* const features_and_weights, - TTypes::Matrix example_state_data) { + TTypes::Matrix* const example_state_data) { // Process examples in parallel, in a partitioned fashion. mutex mu; Status train_step_status GUARDED_BY(mu); auto train_step = [&](const int64 begin, const int64 end) { for (int64 example_index = begin; example_index < end; ++example_index) { - const float dual = example_state_data(example_index, 0); + const float dual = (*example_state_data)(example_index, 0); const float example_weight = example_weights(example_index); float example_label = example_labels(example_index); const Status conversion_status = @@ -641,10 +641,10 @@ Status RunTrainStepsForMiniBatch( example_index, bounded_dual_delta, regularizations.symmetric_l2()); // Update example data. - example_state_data(example_index, 0) = new_dual; - example_state_data(example_index, 1) = primal_loss; - example_state_data(example_index, 2) = dual_loss; - example_state_data(example_index, 3) = example_weight; + (*example_state_data)(example_index, 0) = new_dual; + (*example_state_data)(example_index, 1) = primal_loss; + (*example_state_data)(example_index, 2) = dual_loss; + (*example_state_data)(example_index, 3) = example_weight; } }; // TODO(sibyl-Aix6ihai): Current multiplier 100000 works well empirically @@ -737,11 +737,11 @@ class SdcaSolver : public OpKernel { num_examples, example_labels, example_weights, *context->device()->tensorflow_cpu_worker_threads(), regularizations_, *loss_updater_, &features_and_weights, - example_state_data)); + &example_state_data)); } features_and_weights.AddDeltaWeights(); - context->set_output(0, mutable_example_state_data_t); + context->set_output("example_data_data_out", mutable_example_state_data_t); } private: @@ -775,6 +775,12 @@ class SdcaShrinkL1 : public OpKernel { }; REGISTER_KERNEL_BUILDER(Name("SdcaShrinkL1").Device(DEVICE_CPU), SdcaShrinkL1); +// Computes platform independent, compact and unique (with very high +// probability) representation of an example id. It shouldn't be put in +// persistent storage, as its implementation may change in the future. +// +// The current probability of at least one collision for 1B example_ids is +// approximately 10^-21 (ie 2^60 / 2^129). class SdcaFprint : public OpKernel { public: explicit SdcaFprint(OpKernelConstruction* context) : OpKernel(context) {} @@ -788,8 +794,8 @@ class SdcaFprint : public OpKernel { auto out_values = out->flat(); for (int64 i = 0; i < in_values.size(); ++i) { - const string& s = in_values(i); - Fprint128 fprint = Fingerprint128(s); + const Fprint128 fprint = Fingerprint128(in_values(i)); + // Hex encode the fprint as a string (33 characters). out_values(i) = strings::StrCat(strings::FpToString(fprint.high64), "-", strings::FpToString(fprint.low64)); } diff --git a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py index 8ecf4bfe89..2b9a95a1d6 100644 --- a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py +++ b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py @@ -781,7 +781,14 @@ class SdcaWithHingeLossTest(SdcaOptimizerTest): class SdcaFprintTest(TensorFlowTestCase): - """Tests for the SdcaFprint op.""" + """Tests for the SdcaFprint op. + + This is one way of enforcing the platform-agnostic nature of SdcaFprint. + Basically we are checking against exact values and this test could be running + across different platforms. Note that it is fine for expected values to change + in the future, if the implementation of SdcaFprint changes (ie this is *not* a + frozen test). + """ def testFprint(self): with self.test_session(): diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py index f669407c17..b4e9e5b23d 100644 --- a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py +++ b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py @@ -95,8 +95,7 @@ class SdcaModel(object): ``` In the training program you will just have to run the returned Op from - minimize(). You should also eventually cleanup the temporary state used by - the model, by resetting its (possibly shared) container. + minimize(). ```python # Execute opt_op and train for num_steps. -- cgit v1.2.3