Cleanup SDCA op based on additional review comments.

Change: 126354256
author: A. Unique TensorFlower <gardener@tensorflow.org> 2016-06-30 14:59:48 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2016-06-30 16:03:27 -0700
commit: 01fb3ef3652d1040778c45c40b5517c4c474771d (patch)
tree: 95213bf331ec586b1717f6f90f8ae1865ea6bc17
parent: b70103502b41df370906e8988b6593e55caf69cf (diff)
3 files changed, 25 insertions, 13 deletions
diff --git a/tensorflow/contrib/linear_optimizer/kernels/sdca_ops.cc b/tensorflow/contrib/linear_optimizer/kernels/sdca_ops.cc
index 921dad614c..15deca87e6 100644
--- a/tensorflow/contrib/linear_optimizer/kernels/sdca_ops.cc
+++ b/tensorflow/contrib/linear_optimizer/kernels/sdca_ops.cc
@@ -600,13 +600,13 @@ Status RunTrainStepsForMiniBatch(
     const DeviceBase::CpuWorkerThreads& worker_threads,
     const Regularizations& regularizations, const DualLossUpdater& loss_updater,
     FeaturesAndWeights* const features_and_weights,
-    TTypes<float>::Matrix example_state_data) {
+    TTypes<float>::Matrix* const example_state_data) {
   // Process examples in parallel, in a partitioned fashion.
   mutex mu;
   Status train_step_status GUARDED_BY(mu);
   auto train_step = [&](const int64 begin, const int64 end) {
     for (int64 example_index = begin; example_index < end; ++example_index) {
-      const float dual = example_state_data(example_index, 0);
+      const float dual = (*example_state_data)(example_index, 0);
       const float example_weight = example_weights(example_index);
       float example_label = example_labels(example_index);
       const Status conversion_status =
@@ -641,10 +641,10 @@ Status RunTrainStepsForMiniBatch(
           example_index, bounded_dual_delta, regularizations.symmetric_l2());
 
       // Update example data.
-      example_state_data(example_index, 0) = new_dual;
-      example_state_data(example_index, 1) = primal_loss;
-      example_state_data(example_index, 2) = dual_loss;
-      example_state_data(example_index, 3) = example_weight;
+      (*example_state_data)(example_index, 0) = new_dual;
+      (*example_state_data)(example_index, 1) = primal_loss;
+      (*example_state_data)(example_index, 2) = dual_loss;
+      (*example_state_data)(example_index, 3) = example_weight;
     }
   };
   // TODO(sibyl-Aix6ihai): Current multiplier 100000 works well empirically
@@ -737,11 +737,11 @@ class SdcaSolver : public OpKernel {
                        num_examples, example_labels, example_weights,
                        *context->device()->tensorflow_cpu_worker_threads(),
                        regularizations_, *loss_updater_, &features_and_weights,
-                       example_state_data));
+                       &example_state_data));
     }
     features_and_weights.AddDeltaWeights();
 
-    context->set_output(0, mutable_example_state_data_t);
+    context->set_output("example_data_data_out", mutable_example_state_data_t);
   }
 
  private:
@@ -775,6 +775,12 @@ class SdcaShrinkL1 : public OpKernel {
 };
 REGISTER_KERNEL_BUILDER(Name("SdcaShrinkL1").Device(DEVICE_CPU), SdcaShrinkL1);
 
+// Computes platform independent, compact and unique (with very high
+// probability) representation of an example id. It shouldn't be put in
+// persistent storage, as its implementation may change in the future.
+//
+// The current probability of at least one collision for 1B example_ids is
+// approximately 10^-21 (ie 2^60 / 2^129).
 class SdcaFprint : public OpKernel {
  public:
   explicit SdcaFprint(OpKernelConstruction* context) : OpKernel(context) {}
@@ -788,8 +794,8 @@ class SdcaFprint : public OpKernel {
     auto out_values = out->flat<string>();
 
     for (int64 i = 0; i < in_values.size(); ++i) {
-      const string& s = in_values(i);
-      Fprint128 fprint = Fingerprint128(s);
+      const Fprint128 fprint = Fingerprint128(in_values(i));
+      // Hex encode the fprint as a string (33 characters).
       out_values(i) = strings::StrCat(strings::FpToString(fprint.high64), "-",
                                       strings::FpToString(fprint.low64));
     }
diff --git a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py
index 8ecf4bfe89..2b9a95a1d6 100644
--- a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py
+++ b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py
@@ -781,7 +781,14 @@ class SdcaWithHingeLossTest(SdcaOptimizerTest):
 
 
 class SdcaFprintTest(TensorFlowTestCase):
-  """Tests for the SdcaFprint op."""
+  """Tests for the SdcaFprint op.
+
+  This is one way of enforcing the platform-agnostic nature of SdcaFprint.
+  Basically we are checking against exact values and this test could be running
+  across different platforms. Note that it is fine for expected values to change
+  in the future, if the implementation of SdcaFprint changes (ie this is *not* a
+  frozen test).
+  """
 
   def testFprint(self):
     with self.test_session():
diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
index f669407c17..b4e9e5b23d 100644
--- a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
+++ b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
@@ -95,8 +95,7 @@ class SdcaModel(object):
     ```
 
     In the training program you will just have to run the returned Op from
-    minimize(). You should also eventually cleanup the temporary state used by
-    the model, by resetting its (possibly shared) container.
+    minimize().
 
     ```python
     # Execute opt_op and train for num_steps.
author	A. Unique TensorFlower <gardener@tensorflow.org>	2016-06-30 14:59:48 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2016-06-30 16:03:27 -0700
commit	01fb3ef3652d1040778c45c40b5517c4c474771d (patch)
tree	95213bf331ec586b1717f6f90f8ae1865ea6bc17
parent	b70103502b41df370906e8988b6593e55caf69cf (diff)