diff options
author | Mehdi Sharifzadeh <mehdisharif@google.com> | 2018-08-17 15:41:04 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2018-08-17 15:45:37 -0700 |
commit | 4cb780bb01aa6a07f49d8db82ec4d42e314e3301 (patch) | |
tree | 6b83655af1b92bb49a6aab353169e79da5c1f556 /tensorflow/contrib/estimator | |
parent | 53c765465cf8ac1ea77e2c5f062a56b5b94dfa9b (diff) |
New Class StepsExporter added which exports the serving graph and checkpoints of the model in user specified steps.
PiperOrigin-RevId: 209218489
Diffstat (limited to 'tensorflow/contrib/estimator')
-rw-r--r-- | tensorflow/contrib/estimator/BUILD | 28 | ||||
-rw-r--r-- | tensorflow/contrib/estimator/python/estimator/exporter.py | 280 | ||||
-rw-r--r-- | tensorflow/contrib/estimator/python/estimator/exporter_test.py | 206 |
3 files changed, 514 insertions, 0 deletions
diff --git a/tensorflow/contrib/estimator/BUILD b/tensorflow/contrib/estimator/BUILD index 82272bf120..77f62df99d 100644 --- a/tensorflow/contrib/estimator/BUILD +++ b/tensorflow/contrib/estimator/BUILD @@ -20,6 +20,7 @@ py_library( ":dnn_linear_combined", ":early_stopping", ":export", + ":exporter", ":extenders", ":head", ":hooks", @@ -220,6 +221,33 @@ py_test( ) py_library( + name = "exporter", + srcs = [ + "python/estimator/exporter.py", + ], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/python:framework_ops", + "//tensorflow/python:platform", + "//tensorflow/python:summary", + "//tensorflow/python/estimator:exporter", + ], +) + +py_test( + name = "exporter_test", + size = "medium", + srcs = ["python/estimator/exporter_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":exporter", + "//tensorflow/python:platform", + "//tensorflow/python/estimator", + "//tensorflow/python/estimator:exporter", + ], +) + +py_library( name = "head", srcs = [ "python/estimator/head.py", diff --git a/tensorflow/contrib/estimator/python/estimator/exporter.py b/tensorflow/contrib/estimator/python/estimator/exporter.py new file mode 100644 index 0000000000..09d7440605 --- /dev/null +++ b/tensorflow/contrib/estimator/python/estimator/exporter.py @@ -0,0 +1,280 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Implements StepsExporter to export the model in user specified steps.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from tensorflow.python.estimator import exporter +from tensorflow.python.framework import ops +from tensorflow.python.platform import gfile +from tensorflow.python.platform import tf_logging +from tensorflow.python.summary import summary_iterator + +DEFAULT_GLOBAL_STEP_KEY = ops.GraphKeys.GLOBAL_STEP + + +class StepsExporter(exporter.Exporter): + """This class exports the model in user specified steps. + + This class exports the model at the steps given by the `steps_to_keep` + argument. Each number in the list is treated as a lower bound for model + exports, to handle the case when evaluation is performed at different steps. + + Consider this example: + + ``` + steps_to_keep = [1, 2, 3, 6, 7, 10, 12, 25] + ``` + + The model is evaluated at step increments of 5: `[5, 10, 15, 20, 25, 30]`. + The `StepsExporter` will export the model when it has reached steps + `[5, 10, 15, 25]`. + + This example illustrates the two cases when the model is exported: + + 1. Model is evaluated on a step defined in the list `steps_to_keep`. + + In the example, the model is exported on step `10` and `25`. + + 2. Model is evaluated on a step not defined in the list `steps_to_keep`, but + is still exported because a step in `steps_to_keep` was missed. + + In the example, when the model reaches step `5`, the model is exported even + though `steps_to_keep` does not contain `5`. Step `5` is exported to make + up for step `3`, which was missed. Steps `1` and `2` in `steps_to_keep` are + skipped completely (e.g. say the model is evaluated at step `6`. It will + **not** be exported to make up for step `2`). + + Using the `steps_to_keep` list as a lower bound allows users to define + approximate step boundaries for exporting their models, and avoid frustrating + off-by-one calculation errors. + + Sample Use Cases: + There are specific points during the training when having a saved version of + the model would be useful. One example is at the end of each training phase + when the set of freezed weights is changed. + Another good use case is saving the model at the end of each epoch for + visualization or retraining. + """ + + def __init__(self, + steps_to_keep, + name='steps_exporter', + serving_input_receiver_fn=None, + event_file_pattern='eval/*.tfevents.*', + assets_extra=None, + as_text=False): + """Create an `StepsExporter` to use with `tf.estimator.EvalSpec`. + + Example of creating a StepsExporter for training and evaluation: + + ```python + categorical_feature_a = categorical_column_with_hash_bucket(...) + categorical_feature_b = categorical_column_with_hash_bucket(...) + + categorical_feature_a_emb = embedding_column( + categorical_column=categorical_feature_a, ...) + categorical_feature_b_emb = embedding_column( + categorical_column=categorical_feature_b, ...) + + estimator = tf.estimator.DNNClassifier( + feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb], + hidden_units=[1024, 512, 256]) + + # Input pipeline for train and evaluate. + def train_input_fn: # returns x, y + # please shuffle the data. + pass + def eval_input_fn_eval: # returns x, y + pass + + exporter = tf.contrib.estimator.exporter.StepsExporter( + name="steps_exporter", + serving_input_receiver_fn=serving_input_receiver_fn, + event_file_pattern='eval/*.tfevents.*' + steps_to_keep=[...]) + + train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn, max_steps=1000) + + eval_spec = [tf.estimator.EvalSpec( + input_fn=eval_input_fn, + steps=1, + exporters=exporter, + start_delay_secs=0, + throttle_secs=5)] + + tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec) + + # Models will be exported to estimator.model_dir in timestamped directories, + # which can be used for serving, analysis with TFMA, or directly loaded in. + # For example: + export_dir = os.path.join(estimator.model_dir, + <timestamped directory name>) + + with ops.Graph().as_default() as graph: + with session.Session(graph=graph) as sess: + tf.saved_model.loader.load( + sess, [tf.saved_model.tag_constants.SERVING], export_dir) + + ``` + + Args: + steps_to_keep: Non-empty list of positive integers containing + the step numbers at which the model should be exported. All the exports + will be kept, so there is no garbage collection. + name: Unique name of this `Exporter` that is going to be used in the + export path. + serving_input_receiver_fn: A function that takes no arguments and returns + a `ServingInputReceiver`. + event_file_pattern: Event file name pattern relative to model_dir. If + None, however, the exporter would not be preemption-safe. To be + preemption-safe, event_file_pattern should be specified. + assets_extra: An optional dict specifying how to populate the assets.extra + directory within the exported SavedModel. Each key should give the + destination path (including the filename) relative to the assets.extra + directory. The corresponding value gives the full path of the source + file to be copied. For example, the simple case of copying a single + file without renaming it is specified as `{'my_asset_file.txt': + '/path/to/my_asset_file.txt'}`. + as_text: Whether to write the SavedModel proto in text format. Defaults to + `False`. + + Raises: + ValueError: If any arguments is invalid. + """ + # pylint: disable=protected-access + self._saved_model_exporter = exporter._SavedModelExporter( + name, serving_input_receiver_fn, assets_extra, as_text) + # pylint: enable=protected-access + + self._event_file_pattern = event_file_pattern + self._model_dir = None + + self._input_steps_to_keep = steps_to_keep + steps_to_keep = [step for step in steps_to_keep if isinstance(step, int)] + steps_to_keep = [step for step in steps_to_keep if step > 0] + if not steps_to_keep: + raise ValueError( + '`steps_to_keep` list must have at least one positive integer') + elif self._input_steps_to_keep != steps_to_keep: + tf_logging.warn('Changed `steps_to_keep`, by omitting non-integer or' + ' less than 1 elements, to [%s]', + ', '.join(str(step) for step in steps_to_keep)) + self._steps_to_keep = sorted(steps_to_keep) + self._steps_kept = [] + + @property + def name(self): + return self._saved_model_exporter.name + + def export(self, estimator, export_path, checkpoint_path, eval_result, + is_the_final_export): + """Exports the given Estimator to a specific format. + + Args: + estimator: A `tf.estimator.Estimator` instance to export. + export_path: A string containing a directory where to write the export. + checkpoint_path: The checkpoint path to export. + eval_result: The output of Estimator.evaluate on this checkpoint. + is_the_final_export: This boolean is True when this is an export in the + end of training. It is False for the intermediate exports during the + training. When passing Exporter to tf.estimator.train_and_evaluate + is_the_final_export is always False if TrainSpec.max_steps is None. + + Returns: + The string path to the exported directory or None if export is skipped. + + Raises: + ValueError: If `eval_result` is None or doesn't have + `ops.GraphKeys.GLOBAL_STEP` as a key. + """ + export_result = None + + if not eval_result or DEFAULT_GLOBAL_STEP_KEY not in eval_result: + raise ValueError( + '`eval_result` is empty, or does not have global step. This' + ' should never happen as Estimator always sets the global step in ' + '`eval_result`. Please file a bug report. Got eval_result: %s' + % str(eval_result)) + + if self._model_dir != estimator.model_dir and self._event_file_pattern: + tf_logging.info('Loads the steps that the model was already evaluated at,' + 'from event files') + self._model_dir = estimator.model_dir + full_event_file_pattern = os.path.join(self._model_dir, + self._event_file_pattern) + self._steps_kept = self._get_kept_steps(full_event_file_pattern) + + if self._steps_kept: + self._steps_kept = sorted(self._steps_kept) + self._steps_to_keep = [step for step in self._steps_to_keep if + step > self._steps_kept[-1]] + # It is assumed that the model is exported at any evaluated step 'n' if + # there is any `steps_missed` lower than 'n'. As a result, all the steps in + # `_steps_to_keep` lower than the last evaluated step will be removed. + steps_missed = [step for step in self._steps_to_keep + if step <= eval_result[DEFAULT_GLOBAL_STEP_KEY]] + + if steps_missed: + # update the `_steps_to_keep` list by omitting all steps smaller than the + # current global step which are missed to be exported + export_result = self._saved_model_exporter.export(estimator, export_path, + checkpoint_path, + eval_result, + is_the_final_export) + self._steps_to_keep = [step for step in self._steps_to_keep if step + not in steps_missed] + # contains all the steps in which export has happened. + self._steps_kept.append(eval_result[DEFAULT_GLOBAL_STEP_KEY]) + # Show warning for all the missed steps except the last one + if steps_missed[:-1]: + tf_logging.warn('Missed steps [%s] for exporting, as no evaluation' + ' took place at them.', ', '.join(str(step) for step in + steps_missed[:-1])) + # Log model export if the last missed step is the same as the current step + if steps_missed[-1] == eval_result[DEFAULT_GLOBAL_STEP_KEY]: + tf_logging.info('Performing model export at step %d.', + eval_result[DEFAULT_GLOBAL_STEP_KEY]) + # Show warning for exporting model at another step instead of the user + # specified one + else: + tf_logging.warn('Performing model export at step %d instead of %d, as' + ' no evaluation took place at step %d.', + eval_result[DEFAULT_GLOBAL_STEP_KEY], steps_missed[-1], + steps_missed[-1]) + return export_result + + def _get_kept_steps(self, event_files): + """Get the steps that the model was evaluated at, from event files. + + Args: + event_files: Absolute pattern of event files. + + Returns: + steps_kept: A list of steps in which the model was evaluated. + """ + if not event_files: + return None + + steps_kept = [] + for event_file in gfile.Glob(os.path.join(event_files)): + for event in summary_iterator.summary_iterator(event_file): + if event.step not in steps_kept: + steps_kept.append(event.step) + return steps_kept diff --git a/tensorflow/contrib/estimator/python/estimator/exporter_test.py b/tensorflow/contrib/estimator/python/estimator/exporter_test.py new file mode 100644 index 0000000000..0d009b945e --- /dev/null +++ b/tensorflow/contrib/estimator/python/estimator/exporter_test.py @@ -0,0 +1,206 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for `StepsExporter`.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import shutil +import tempfile + +from tensorflow.contrib.estimator.python.estimator import exporter as exporter_lib +from tensorflow.python.estimator import estimator as estimator_lib +from tensorflow.python.platform import gfile +from tensorflow.python.platform import test + + +class StepsExporterTest(test.TestCase): + + def test_error_out_if_steps_to_keep_has_no_positive_integers(self): + + def _serving_input_receiver_fn(): + pass + + with self.assertRaisesRegexp(ValueError, "positive integer"): + exporter = exporter_lib.StepsExporter( + name="specified_steps_exporter", + serving_input_receiver_fn=_serving_input_receiver_fn, + steps_to_keep=[-1, 0, 1.1]) + self.assertEqual("specified_steps_exporter", exporter.name) + + def test_steps_exporter(self): + + def _serving_input_receiver_fn(): + pass + + export_dir_base = tempfile.mkdtemp() + gfile.MkDir(export_dir_base) + gfile.MkDir(export_dir_base + "/export") + gfile.MkDir(export_dir_base + "/eval") + + exporter = exporter_lib.StepsExporter( + name="steps_exporter", + serving_input_receiver_fn=_serving_input_receiver_fn, + assets_extra={"from/path": "to/path"}, + as_text=False, + steps_to_keep=[1]) + estimator = test.mock.Mock(spec=estimator_lib.Estimator) + estimator.export_savedmodel.return_value = "export_result_path" + estimator.model_dir = export_dir_base + + export_result = exporter.export(estimator, export_dir_base, + "checkpoint_path", {"global_step": 1}, + False) + + self.assertEqual("export_result_path", export_result) + estimator.export_savedmodel.assert_called_with( + export_dir_base, + _serving_input_receiver_fn, + assets_extra={"from/path": "to/path"}, + as_text=False, + checkpoint_path="checkpoint_path", + strip_default_attrs=True) + + shutil.rmtree(export_dir_base, ignore_errors=True) + + def test_steps_exporter_with_preemption(self): + + def _serving_input_receiver_fn(): + pass + + export_dir_base = tempfile.mkdtemp() + gfile.MkDir(export_dir_base) + gfile.MkDir(export_dir_base + "/export") + gfile.MkDir(export_dir_base + "/eval") + + eval_dir_base = os.path.join(export_dir_base, "eval_continuous") + estimator_lib._write_dict_to_summary(eval_dir_base, {}, 1) + estimator_lib._write_dict_to_summary(eval_dir_base, {}, 2) + + exporter = exporter_lib.StepsExporter( + name="steps_exporter", + serving_input_receiver_fn=_serving_input_receiver_fn, + event_file_pattern="eval_continuous/*.tfevents.*", + assets_extra={"from/path": "to/path"}, + as_text=False, + steps_to_keep=[1, 2, 6, 8]) + + estimator = test.mock.Mock(spec=estimator_lib.Estimator) + estimator.model_dir = export_dir_base + estimator.export_savedmodel.return_value = "export_result_path" + + export_result = exporter.export(estimator, export_dir_base, + "checkpoint_path", {"global_step": 3}, + False) + self.assertEqual(None, export_result) + + export_result = exporter.export(estimator, export_dir_base, + "checkpoint_path", {"global_step": 6}, + False) + self.assertEqual("export_result_path", export_result) + + export_result = exporter.export(estimator, export_dir_base, + "checkpoint_path", {"global_step": 7}, + False) + self.assertEqual(None, export_result) + + shutil.rmtree(export_dir_base, ignore_errors=True) + + def test_specified_step_is_saved(self): + + def _serving_input_receiver_fn(): + pass + + export_dir_base = tempfile.mkdtemp() + gfile.MkDir(export_dir_base) + gfile.MkDir(export_dir_base + "/export") + gfile.MkDir(export_dir_base + "/eval") + + exporter = exporter_lib.StepsExporter( + name="steps_exporter", + serving_input_receiver_fn=_serving_input_receiver_fn, + assets_extra={"from/path": "to/path"}, + as_text=False, + steps_to_keep=[1, 5, 8, 10, 11]) + estimator = test.mock.Mock(spec=estimator_lib.Estimator) + estimator.export_savedmodel.return_value = "export_result_path" + estimator.model_dir = export_dir_base + + export_result = exporter.export(estimator, export_dir_base, + "checkpoint_path", {"global_step": 1}, + False) + + self.assertTrue(estimator.export_savedmodel.called) + self.assertEqual("export_result_path", export_result) + + export_result = exporter.export(estimator, export_dir_base, + "checkpoint_path", {"global_step": 2}, + False) + self.assertEqual(None, export_result) + + export_result = exporter.export(estimator, export_dir_base, + "checkpoint_path", {"global_step": 5}, + False) + self.assertTrue(estimator.export_savedmodel.called) + self.assertEqual("export_result_path", export_result) + + export_result = exporter.export(estimator, export_dir_base, + "checkpoint_path", {"global_step": 10}, + False) + self.assertTrue(estimator.export_savedmodel.called) + self.assertEqual("export_result_path", export_result) + + export_result = exporter.export(estimator, export_dir_base, + "checkpoint_path", {"global_step": 15}, + False) + self.assertTrue(estimator.export_savedmodel.called) + self.assertEqual("export_result_path", export_result) + + export_result = exporter.export(estimator, export_dir_base, + "checkpoint_path", {"global_step": 20}, + False) + self.assertEqual(None, export_result) + + shutil.rmtree(export_dir_base, ignore_errors=True) + + def test_steps_exporter_with_no_global_step_key(self): + + def _serving_input_receiver_fn(): + pass + + export_dir_base = tempfile.mkdtemp() + gfile.MkDir(export_dir_base) + gfile.MkDir(export_dir_base + "/export") + gfile.MkDir(export_dir_base + "/eval") + + exporter = exporter_lib.StepsExporter( + name="steps_exporter", + serving_input_receiver_fn=_serving_input_receiver_fn, + assets_extra={"from/path": "to/path"}, + as_text=False, + steps_to_keep=[1]) + estimator = test.mock.Mock(spec=estimator_lib.Estimator) + estimator.export_savedmodel.return_value = "export_result_path" + estimator.model_dir = export_dir_base + + with self.assertRaisesRegexp(ValueError, "does not have global step"): + exporter.export(estimator, export_dir_base, "checkpoint_path", {}, False) + + shutil.rmtree(export_dir_base, ignore_errors=True) + + +if __name__ == "__main__": + test.main() |