Allowing for mixture of V1 and V2 feature columns usage in canned estimators. This is required for TF hub use cases where users might send in new feature columns to old model code. Implemented this support by making V2 feature columns support the V1 API. This is needed temporarily and would definitely be removed by TF 2.0, possibly earlier depending on what guarantees are provided by TF hub.

The only case we don't allow here is mixing in V2 shared embedding columns with V1 Feature columns. V2 Shared FC's depend on a SharedEmbeddingState manager that would have to be passed in to the various API's and there wasn't really a very clean way to make that work. Mixing V2 feature columns with V1 shared embedding columns is fine though and along with all other combinations PiperOrigin-RevId: 216359041
author: Rohan Jain <rohanj@google.com> 2018-10-09 08:16:49 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2018-10-09 08:21:39 -0700
commit: cadcacc6224bcbb8a05bf3b70d625d9024a9c0f3 (patch)
tree: fe73a2d1ed500dbd1e5b0f6f20229e534f813d90 /tensorflow/python/estimator
parent: a0ed9452d5c7f897e26788d8dca5164cb6fba023 (diff)
3 files changed, 263 insertions, 17 deletions
diff --git a/tensorflow/python/estimator/canned/dnn_linear_combined_test.py b/tensorflow/python/estimator/canned/dnn_linear_combined_test.py
index ae968e717a..ab945d7b1a 100644
--- a/tensorflow/python/estimator/canned/dnn_linear_combined_test.py
+++ b/tensorflow/python/estimator/canned/dnn_linear_combined_test.py
@@ -317,16 +317,10 @@ class DNNLinearCombinedRegressorIntegrationTest(test.TestCase):
       writer_cache.FileWriterCache.clear()
       shutil.rmtree(self._model_dir)
 
-  def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
-                          input_dimension, label_dimension, batch_size,
-                          fc_impl):
-    linear_feature_columns = [
-        fc_impl.numeric_column('x', shape=(input_dimension,))
-    ]
-    dnn_feature_columns = [
-        fc_impl.numeric_column('x', shape=(input_dimension,))
-    ]
-    feature_columns = linear_feature_columns + dnn_feature_columns
+  def _test_complete_flow_helper(
+      self, linear_feature_columns, dnn_feature_columns, feature_spec,
+      train_input_fn, eval_input_fn, predict_input_fn, input_dimension,
+      label_dimension, batch_size):
     est = dnn_linear_combined.DNNLinearCombinedRegressor(
         linear_feature_columns=linear_feature_columns,
         dnn_hidden_units=(2, 2),
@@ -351,14 +345,63 @@ class DNNLinearCombinedRegressorIntegrationTest(test.TestCase):
     self.assertAllEqual((batch_size, label_dimension), predictions.shape)
 
     # EXPORT
-    feature_spec = fc_impl.make_parse_example_spec(feature_columns)
     serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
         feature_spec)
     export_dir = est.export_savedmodel(tempfile.mkdtemp(),
                                        serving_input_receiver_fn)
     self.assertTrue(gfile.Exists(export_dir))
 
-  def test_numpy_input_fn(self, fc_impl):
+  def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
+                          input_dimension, label_dimension, batch_size,
+                          fc_impl):
+    linear_feature_columns = [
+        fc_impl.numeric_column('x', shape=(input_dimension,))
+    ]
+    dnn_feature_columns = [
+        fc_impl.numeric_column('x', shape=(input_dimension,))
+    ]
+    feature_columns = linear_feature_columns + dnn_feature_columns
+    feature_spec = fc_impl.make_parse_example_spec(feature_columns)
+    self._test_complete_flow_helper(linear_feature_columns, dnn_feature_columns,
+                                    feature_spec, train_input_fn, eval_input_fn,
+                                    predict_input_fn, input_dimension,
+                                    label_dimension, batch_size)
+
+  def _test_complete_flow_mix1(self, train_input_fn, eval_input_fn,
+                               predict_input_fn, input_dimension,
+                               label_dimension, batch_size, fc_impl):
+    del fc_impl
+    linear_feature_columns = [
+        feature_column.numeric_column('x', shape=(input_dimension,))
+    ]
+    dnn_feature_columns = [
+        feature_column_v2.numeric_column('x', shape=(input_dimension,))
+    ]
+    feature_columns = linear_feature_columns + dnn_feature_columns
+    feature_spec = feature_column.make_parse_example_spec(feature_columns)
+    self._test_complete_flow_helper(linear_feature_columns, dnn_feature_columns,
+                                    feature_spec, train_input_fn, eval_input_fn,
+                                    predict_input_fn, input_dimension,
+                                    label_dimension, batch_size)
+
+  def _test_complete_flow_mix2(self, train_input_fn, eval_input_fn,
+                               predict_input_fn, input_dimension,
+                               label_dimension, batch_size, fc_impl):
+    del fc_impl
+    linear_feature_columns = [
+        feature_column_v2.numeric_column('x', shape=(input_dimension,))
+    ]
+    dnn_feature_columns = [
+        feature_column.numeric_column('x', shape=(input_dimension,))
+    ]
+    feature_columns = linear_feature_columns + dnn_feature_columns
+    feature_spec = feature_column.make_parse_example_spec(feature_columns)
+    self._test_complete_flow_helper(linear_feature_columns, dnn_feature_columns,
+                                    feature_spec, train_input_fn, eval_input_fn,
+                                    predict_input_fn, input_dimension,
+                                    label_dimension, batch_size)
+
+  def _test_numpy_input_fn_helper(self, fc_impl, fn_to_run):
     """Tests complete flow with numpy_input_fn."""
     label_dimension = 2
     batch_size = 10
@@ -381,7 +424,7 @@ class DNNLinearCombinedRegressorIntegrationTest(test.TestCase):
         batch_size=batch_size,
         shuffle=False)
 
-    self._test_complete_flow(
+    fn_to_run(
         train_input_fn=train_input_fn,
         eval_input_fn=eval_input_fn,
         predict_input_fn=predict_input_fn,
@@ -390,7 +433,16 @@ class DNNLinearCombinedRegressorIntegrationTest(test.TestCase):
         batch_size=batch_size,
         fc_impl=fc_impl)
 
-  def test_pandas_input_fn(self, fc_impl):
+  def test_numpy_input_fn_basic(self, fc_impl):
+    self._test_numpy_input_fn_helper(fc_impl, self._test_complete_flow)
+
+  def test_numpy_input_fn_mix1(self, fc_impl):
+    self._test_numpy_input_fn_helper(fc_impl, self._test_complete_flow_mix1)
+
+  def test_numpy_input_fn_mix2(self, fc_impl):
+    self._test_numpy_input_fn_helper(fc_impl, self._test_complete_flow_mix2)
+
+  def _test_pandas_input_fn_helper(self, fc_impl, fn_to_run):
     """Tests complete flow with pandas_input_fn."""
     if not HAS_PANDAS:
       return
@@ -415,7 +467,7 @@ class DNNLinearCombinedRegressorIntegrationTest(test.TestCase):
         batch_size=batch_size,
         shuffle=False)
 
-    self._test_complete_flow(
+    fn_to_run(
         train_input_fn=train_input_fn,
         eval_input_fn=eval_input_fn,
         predict_input_fn=predict_input_fn,
@@ -424,7 +476,16 @@ class DNNLinearCombinedRegressorIntegrationTest(test.TestCase):
         batch_size=batch_size,
         fc_impl=fc_impl)
 
-  def test_input_fn_from_parse_example(self, fc_impl):
+  def test_pandas_input_fn_basic(self, fc_impl):
+    self._test_pandas_input_fn_helper(fc_impl, self._test_complete_flow)
+
+  def test_pandas_input_fn_mix1(self, fc_impl):
+    self._test_pandas_input_fn_helper(fc_impl, self._test_complete_flow_mix1)
+
+  def test_pandas_input_fn_mix2(self, fc_impl):
+    self._test_pandas_input_fn_helper(fc_impl, self._test_complete_flow_mix2)
+
+  def _test_input_fn_from_parse_example_helper(self, fc_impl, fn_to_run):
     """Tests complete flow with input_fn constructed from parse_example."""
     label_dimension = 2
     batch_size = 10
@@ -466,7 +527,7 @@ class DNNLinearCombinedRegressorIntegrationTest(test.TestCase):
       features.pop('y')
       return features, None
 
-    self._test_complete_flow(
+    fn_to_run(
         train_input_fn=_train_input_fn,
         eval_input_fn=_eval_input_fn,
         predict_input_fn=_predict_input_fn,
@@ -475,6 +536,18 @@ class DNNLinearCombinedRegressorIntegrationTest(test.TestCase):
         batch_size=batch_size,
         fc_impl=fc_impl)
 
+  def test_input_fn_from_parse_example_basic(self, fc_impl):
+    self._test_input_fn_from_parse_example_helper(fc_impl,
+                                                  self._test_complete_flow)
+
+  def test_input_fn_from_parse_example_mix1(self, fc_impl):
+    self._test_input_fn_from_parse_example_helper(fc_impl,
+                                                  self._test_complete_flow_mix1)
+
+  def test_input_fn_from_parse_example_mix2(self, fc_impl):
+    self._test_input_fn_from_parse_example_helper(fc_impl,
+                                                  self._test_complete_flow_mix2)
+
 
 # A function to mimic dnn-classifier init reuse same tests.
 def _dnn_classifier_fn(hidden_units,
diff --git a/tensorflow/python/estimator/canned/dnn_testing_utils.py b/tensorflow/python/estimator/canned/dnn_testing_utils.py
index cd66d0a3bd..71d7e54783 100644
--- a/tensorflow/python/estimator/canned/dnn_testing_utils.py
+++ b/tensorflow/python/estimator/canned/dnn_testing_utils.py
@@ -34,6 +34,7 @@ from tensorflow.python.estimator.canned import metric_keys
 from tensorflow.python.estimator.canned import prediction_keys
 from tensorflow.python.estimator.inputs import numpy_io
 from tensorflow.python.feature_column import feature_column
+from tensorflow.python.feature_column import feature_column_v2
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
@@ -479,6 +480,60 @@ class BaseDNNModelFnTest(object):
           else:
             self.fail('Invalid mode: {}'.format(mode))
 
+  def test_multi_feature_column_mix_multi_dim_logits(self):
+    """Tests multiple feature columns and multi-dimensional logits.
+
+    All numbers are the same as test_multi_dim_input_multi_dim_logits. The only
+    difference is that the input consists of two 1D feature columns, instead of
+    one 2D feature column.
+    """
+    base_global_step = 100
+    create_checkpoint((
+        ([[.6, .5], [-.6, -.5]], [.1, -.1]),
+        ([[1., .8], [-.8, -1.]], [.2, -.2]),
+        ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),
+    ), base_global_step, self._model_dir)
+    hidden_units = (2, 2)
+    logits_dimension = 3
+    inputs = ([[10.]], [[8.]])
+    expected_logits = [[-0.48, 0.48, 0.39]]
+
+    for mode in [
+        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
+        model_fn.ModeKeys.PREDICT
+    ]:
+      with ops.Graph().as_default():
+        training_util.create_global_step()
+        head = mock_head(
+            self,
+            hidden_units=hidden_units,
+            logits_dimension=logits_dimension,
+            expected_logits=expected_logits)
+        estimator_spec = self._dnn_model_fn(
+            features={
+                'age': constant_op.constant(inputs[0]),
+                'height': constant_op.constant(inputs[1])
+            },
+            labels=constant_op.constant([[1]]),
+            mode=mode,
+            head=head,
+            hidden_units=hidden_units,
+            feature_columns=[
+                feature_column.numeric_column('age'),
+                feature_column_v2.numeric_column('height')
+            ],
+            optimizer=mock_optimizer(self, hidden_units))
+        with monitored_session.MonitoredTrainingSession(
+            checkpoint_dir=self._model_dir) as sess:
+          if mode == model_fn.ModeKeys.TRAIN:
+            sess.run(estimator_spec.train_op)
+          elif mode == model_fn.ModeKeys.EVAL:
+            sess.run(estimator_spec.loss)
+          elif mode == model_fn.ModeKeys.PREDICT:
+            sess.run(estimator_spec.predictions)
+          else:
+            self.fail('Invalid mode: {}'.format(mode))
+
   def test_features_tensor_raises_value_error(self):
     """Tests that passing a Tensor for features raises a ValueError."""
     hidden_units = (2, 2)
@@ -806,6 +861,60 @@ class BaseDNNLogitFnTest(object):
               checkpoint_dir=self._model_dir) as sess:
             self.assertAllClose(expected_logits, sess.run(logits))
 
+  def test_multi_feature_column_mix_multi_dim_logits(self):
+    """Tests multiple feature columns and multi-dimensional logits.
+
+    All numbers are the same as test_multi_dim_input_multi_dim_logits. The only
+    difference is that the input consists of two 1D feature columns, instead of
+    one 2D feature column.
+    """
+    base_global_step = 100
+    create_checkpoint((
+        ([[.6, .5], [-.6, -.5]], [.1, -.1]),
+        ([[1., .8], [-.8, -1.]], [.2, -.2]),
+        ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),
+    ), base_global_step, self._model_dir)
+
+    hidden_units = (2, 2)
+    logits_dimension = 3
+    inputs = ([[10.]], [[8.]])
+    expected_logits = [[-0.48, 0.48, 0.39]]
+
+    for mode in [
+        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
+        model_fn.ModeKeys.PREDICT
+    ]:
+      with ops.Graph().as_default():
+        # Global step needed for MonitoredSession, which is in turn used to
+        # explicitly set variable weights through a checkpoint.
+        training_util.create_global_step()
+        # Use a variable scope here with 'dnn', emulating the dnn model_fn, so
+        # the checkpoint naming is shared.
+        with variable_scope.variable_scope('dnn'):
+          input_layer_partitioner = (
+              partitioned_variables.min_max_variable_partitioner(
+                  max_partitions=0, min_slice_size=64 << 20))
+          logit_fn = self._dnn_logit_fn_builder(
+              units=logits_dimension,
+              hidden_units=hidden_units,
+              feature_columns=[
+                  feature_column.numeric_column('age'),
+                  feature_column_v2.numeric_column('height')
+              ],
+              activation_fn=nn.relu,
+              dropout=None,
+              input_layer_partitioner=input_layer_partitioner,
+              batch_norm=False)
+          logits = logit_fn(
+              features={
+                  'age': constant_op.constant(inputs[0]),
+                  'height': constant_op.constant(inputs[1])
+              },
+              mode=mode)
+          with monitored_session.MonitoredTrainingSession(
+              checkpoint_dir=self._model_dir) as sess:
+            self.assertAllClose(expected_logits, sess.run(logits))
+
 
 class BaseDNNWarmStartingTest(object):
 
diff --git a/tensorflow/python/estimator/canned/linear_testing_utils.py b/tensorflow/python/estimator/canned/linear_testing_utils.py
index 827352a70b..2cfa2a8e15 100644
--- a/tensorflow/python/estimator/canned/linear_testing_utils.py
+++ b/tensorflow/python/estimator/canned/linear_testing_utils.py
@@ -400,6 +400,45 @@ class BaseLinearRegressorEvaluationTest(object):
     # [213.0, 421.0], while label is [213., 421.]. Loss = 0.
     self.assertAlmostEqual(0, eval_metrics[metric_keys.MetricKeys.LOSS])
 
+  def test_evaluation_for_multiple_feature_columns_mix(self):
+    with ops.Graph().as_default():
+      variables_lib.Variable([[10.0]], name=AGE_WEIGHT_NAME)
+      variables_lib.Variable([[2.0]], name=HEIGHT_WEIGHT_NAME)
+      variables_lib.Variable([5.0], name=BIAS_NAME)
+      variables_lib.Variable(
+          100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    batch_size = 2
+    feature_columns = [
+        feature_column.numeric_column('age'),
+        feature_column_v2.numeric_column('height')
+    ]
+
+    def _input_fn():
+      features_ds = dataset_ops.Dataset.from_tensor_slices({
+          'age': np.array([20, 40]),
+          'height': np.array([4, 8])
+      })
+      labels_ds = dataset_ops.Dataset.from_tensor_slices(
+          np.array([[213.], [421.]]))
+      return (dataset_ops.Dataset.zip((features_ds, labels_ds))
+              .batch(batch_size).repeat(None))
+
+    est = self._linear_regressor_fn(
+        feature_columns=feature_columns, model_dir=self._model_dir)
+
+    eval_metrics = est.evaluate(input_fn=_input_fn, steps=1)
+    self.assertItemsEqual(
+        (metric_keys.MetricKeys.LOSS, metric_keys.MetricKeys.LOSS_MEAN,
+         metric_keys.MetricKeys.PREDICTION_MEAN,
+         metric_keys.MetricKeys.LABEL_MEAN, ops.GraphKeys.GLOBAL_STEP),
+        eval_metrics.keys())
+
+    # Logit is [(20. * 10.0 + 4 * 2.0 + 5.0), (40. * 10.0 + 8 * 2.0 + 5.0)] =
+    # [213.0, 421.0], while label is [213., 421.]. Loss = 0.
+    self.assertAlmostEqual(0, eval_metrics[metric_keys.MetricKeys.LOSS])
+
 
 class BaseLinearRegressorPredictTest(object):
 
@@ -497,6 +536,31 @@ class BaseLinearRegressorPredictTest(object):
     # x0 * weight0 + x1 * weight1 + bias = 2. * 10. + 3. * 20 + .2 = 80.2
     self.assertAllClose([[80.2]], predicted_scores)
 
+  def testTwoFeatureColumnsMix(self):
+    """Tests predict with two feature columns."""
+    with ops.Graph().as_default():
+      variables_lib.Variable([[10.]], name='linear/linear_model/x0/weights')
+      variables_lib.Variable([[20.]], name='linear/linear_model/x1/weights')
+      variables_lib.Variable([.2], name=BIAS_NAME)
+      variables_lib.Variable(100, name='global_step', dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    linear_regressor = self._linear_regressor_fn(
+        feature_columns=(feature_column.numeric_column('x0'),
+                         feature_column_v2.numeric_column('x1')),
+        model_dir=self._model_dir)
+
+    def _predict_input_fn():
+      return dataset_ops.Dataset.from_tensor_slices({
+          'x0': np.array([[2.]]),
+          'x1': np.array([[3.]])
+      }).batch(1)
+
+    predictions = linear_regressor.predict(input_fn=_predict_input_fn)
+    predicted_scores = list([x['predictions'] for x in predictions])
+    # x0 * weight0 + x1 * weight1 + bias = 2. * 10. + 3. * 20 + .2 = 80.2
+    self.assertAllClose([[80.2]], predicted_scores)
+
   def testSparseCombiner(self):
     w_a = 2.0
     w_b = 3.0
author	Rohan Jain <rohanj@google.com>	2018-10-09 08:16:49 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2018-10-09 08:21:39 -0700
commit	cadcacc6224bcbb8a05bf3b70d625d9024a9c0f3 (patch)
tree	fe73a2d1ed500dbd1e5b0f6f20229e534f813d90 /tensorflow/python/estimator
parent	a0ed9452d5c7f897e26788d8dca5164cb6fba023 (diff)