Added Mobilenet support to TensorFlow for Poets training script.

PiperOrigin-RevId: 160995543
author: Pete Warden <petewarden@google.com> 2017-07-05 13:12:07 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2017-07-05 13:20:42 -0700
commit: 055500bbcea60513c0160d213a10a7055f079312 (patch)
tree: 794a2f2f3340d80070c9ce743ec2858bc7f36a09 /tensorflow/examples/image_retraining
parent: e4351ac3f8d4c4134dbd90ec9f8f03fdef31c20a (diff)
2 files changed, 357 insertions, 152 deletions
diff --git a/tensorflow/examples/image_retraining/retrain.py b/tensorflow/examples/image_retraining/retrain.py
index 44a3097d80..ac39639af1 100644
--- a/tensorflow/examples/image_retraining/retrain.py
+++ b/tensorflow/examples/image_retraining/retrain.py
@@ -12,18 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Simple transfer learning with an Inception v3 architecture model.
+r"""Simple transfer learning with Inception v3 or Mobilenet models.
 
 With support for TensorBoard.
 
-This example shows how to take a Inception v3 architecture model trained on
+This example shows how to take a Inception v3 or Mobilenet model trained on
 ImageNet images, and train a new top layer that can recognize other classes of
 images.
 
-The top layer receives as input a 2048-dimensional vector for each image. We
-train a softmax layer on top of this representation. Assuming the softmax layer
-contains N labels, this corresponds to learning N + 2048*N model parameters
-corresponding to the learned biases and weights.
+The top layer receives as input a 2048-dimensional vector (1001-dimensional for
+Mobilenet) for each image. We train a softmax layer on top of this
+representation. Assuming the softmax layer contains N labels, this corresponds
+to learning N + 2048*N (or 1001*N)  model parameters corresponding to the
+learned biases and weights.
 
 Here's an example, which assumes you have a folder containing class-named
 subfolders, each full of images for each label. The example folder flower_photos
@@ -62,6 +63,23 @@ in.
 This produces a new model file that can be loaded and run by any TensorFlow
 program, for example the label_image sample code.
 
+By default this script will use the high accuracy, but comparatively large and
+slow Inception v3 model architecture. It's recommended that you start with this
+to validate that you have gathered good training data, but if you want to deploy
+on resource-limited platforms, you can try the `--architecture` flag with a
+Mobilenet model. For example:
+
+```bash
+python tensorflow/examples/image_retraining/retrain.py \
+    --image_dir ~/flower_photos --architecture mobilenet_1.0_224
+```
+
+There are 32 different Mobilenet models to choose from, with a variety of file
+size and latency options. The first number can be '1.0', '0.75', '0.50', or
+'0.25' to control the size, and the second controls the input image size, either
+'224', '192', '160', or '128', with smaller sizes running faster. See
+https://research.googleblog.com/2017/06/mobilenets-open-source-models-for.html
+for more information on Mobilenet.
 
 To use with TensorBoard:
 
@@ -82,7 +100,6 @@ import hashlib
 import os.path
 import random
 import re
-import struct
 import sys
 import tarfile
 
@@ -101,16 +118,6 @@ FLAGS = None
 # we're using for Inception v3. These include things like tensor names and their
 # sizes. If you want to adapt this script to work with another model, you will
 # need to update these to reflect the values in the network you're using.
-# pylint: disable=line-too-long
-DATA_URL = 'http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz'
-# pylint: enable=line-too-long
-BOTTLENECK_TENSOR_NAME = 'pool_3/_reshape:0'
-BOTTLENECK_TENSOR_SIZE = 2048
-MODEL_INPUT_WIDTH = 299
-MODEL_INPUT_HEIGHT = 299
-MODEL_INPUT_DEPTH = 3
-JPEG_DATA_TENSOR_NAME = 'DecodeJpeg/contents:0'
-RESIZED_INPUT_TENSOR_NAME = 'ResizeBilinear:0'
 MAX_NUM_IMAGES_PER_CLASS = 2 ** 27 - 1  # ~134M
 
 
@@ -131,7 +138,7 @@ def create_image_lists(image_dir, testing_percentage, validation_percentage):
     into training, testing, and validation sets within each label.
   """
   if not gfile.Exists(image_dir):
-    print("Image directory '" + image_dir + "' not found.")
+    tf.logging.error("Image directory '" + image_dir + "' not found.")
     return None
   result = {}
   sub_dirs = [x[0] for x in gfile.Walk(image_dir)]
@@ -146,18 +153,20 @@ def create_image_lists(image_dir, testing_percentage, validation_percentage):
     dir_name = os.path.basename(sub_dir)
     if dir_name == image_dir:
       continue
-    print("Looking for images in '" + dir_name + "'")
+    tf.logging.info("Looking for images in '" + dir_name + "'")
     for extension in extensions:
       file_glob = os.path.join(image_dir, dir_name, '*.' + extension)
       file_list.extend(gfile.Glob(file_glob))
     if not file_list:
-      print('No files found')
+      tf.logging.warning('No files found')
       continue
     if len(file_list) < 20:
-      print('WARNING: Folder has less than 20 images, which may cause issues.')
+      tf.logging.warning(
+          'WARNING: Folder has less than 20 images, which may cause issues.')
     elif len(file_list) > MAX_NUM_IMAGES_PER_CLASS:
-      print('WARNING: Folder {} has more than {} images. Some images will '
-            'never be selected.'.format(dir_name, MAX_NUM_IMAGES_PER_CLASS))
+      tf.logging.warning(
+          'WARNING: Folder {} has more than {} images. Some images will '
+          'never be selected.'.format(dir_name, MAX_NUM_IMAGES_PER_CLASS))
     label_name = re.sub(r'[^a-z0-9]+', ' ', dir_name.lower())
     training_images = []
     testing_images = []
@@ -230,7 +239,7 @@ def get_image_path(image_lists, label_name, index, image_dir, category):
 
 
 def get_bottleneck_path(image_lists, label_name, index, bottleneck_dir,
-                        category):
+                        category, architecture):
   """"Returns a path to a bottleneck file for a label at the given index.
 
   Args:
@@ -241,35 +250,42 @@ def get_bottleneck_path(image_lists, label_name, index, bottleneck_dir,
     bottleneck_dir: Folder string holding cached files of bottleneck values.
     category: Name string of set to pull images from - training, testing, or
     validation.
+    architecture: The name of the model architecture.
 
   Returns:
     File system path string to an image that meets the requested parameters.
   """
   return get_image_path(image_lists, label_name, index, bottleneck_dir,
-                        category) + '.txt'
+                        category) + '_' + architecture + '.txt'
 
 
-def create_inception_graph():
+def create_model_graph(model_info):
   """"Creates a graph from saved GraphDef file and returns a Graph object.
 
+  Args:
+    model_info: Dictionary containing information about the model architecture.
+
   Returns:
     Graph holding the trained Inception network, and various tensors we'll be
     manipulating.
   """
   with tf.Graph().as_default() as graph:
-    model_filename = os.path.join(
-        FLAGS.model_dir, 'classify_image_graph_def.pb')
-    with gfile.FastGFile(model_filename, 'rb') as f:
+    model_path = os.path.join(FLAGS.model_dir, model_info['model_file_name'])
+    with gfile.FastGFile(model_path, 'rb') as f:
       graph_def = tf.GraphDef()
       graph_def.ParseFromString(f.read())
-      bottleneck_tensor, jpeg_data_tensor, resized_input_tensor = (
-          tf.import_graph_def(graph_def, name='', return_elements=[
-              BOTTLENECK_TENSOR_NAME, JPEG_DATA_TENSOR_NAME,
-              RESIZED_INPUT_TENSOR_NAME]))
-  return graph, bottleneck_tensor, jpeg_data_tensor, resized_input_tensor
+      bottleneck_tensor, resized_input_tensor = (tf.import_graph_def(
+          graph_def,
+          name='',
+          return_elements=[
+              model_info['bottleneck_tensor_name'],
+              model_info['resized_input_tensor_name'],
+          ]))
+  return graph, bottleneck_tensor, resized_input_tensor
 
 
 def run_bottleneck_on_image(sess, image_data, image_data_tensor,
+                            decoded_image_tensor, resized_input_tensor,
                             bottleneck_tensor):
   """Runs inference on an image to extract the 'bottleneck' summary layer.
 
@@ -277,28 +293,36 @@ def run_bottleneck_on_image(sess, image_data, image_data_tensor,
     sess: Current active TensorFlow Session.
     image_data: String of raw JPEG data.
     image_data_tensor: Input data layer in the graph.
+    decoded_image_tensor: Output of initial image resizing and  preprocessing.
+    resized_input_tensor: The input node of the recognition graph.
     bottleneck_tensor: Layer before the final softmax.
 
   Returns:
     Numpy array of bottleneck values.
   """
-  bottleneck_values = sess.run(
-      bottleneck_tensor,
-      {image_data_tensor: image_data})
+  # First decode the JPEG image, resize it, and rescale the pixel values.
+  resized_input_values = sess.run(decoded_image_tensor,
+                                  {image_data_tensor: image_data})
+  # Then run it through the recognition network.
+  bottleneck_values = sess.run(bottleneck_tensor,
+                               {resized_input_tensor: resized_input_values})
   bottleneck_values = np.squeeze(bottleneck_values)
   return bottleneck_values
 
 
-def maybe_download_and_extract():
+def maybe_download_and_extract(data_url):
   """Download and extract model tar file.
 
   If the pretrained model we're using doesn't already exist, this function
   downloads it from the TensorFlow.org website and unpacks it into a directory.
+
+  Args:
+    data_url: Web location of the tar file containing the pretrained model.
   """
   dest_directory = FLAGS.model_dir
   if not os.path.exists(dest_directory):
     os.makedirs(dest_directory)
-  filename = DATA_URL.split('/')[-1]
+  filename = data_url.split('/')[-1]
   filepath = os.path.join(dest_directory, filename)
   if not os.path.exists(filepath):
 
@@ -308,12 +332,11 @@ def maybe_download_and_extract():
                         float(count * block_size) / float(total_size) * 100.0))
       sys.stdout.flush()
 
-    filepath, _ = urllib.request.urlretrieve(DATA_URL,
-                                             filepath,
-                                             _progress)
+    filepath, _ = urllib.request.urlretrieve(data_url, filepath, _progress)
     print()
     statinfo = os.stat(filepath)
-    print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
+    tf.logging.info('Successfully downloaded', filename, statinfo.st_size,
+                    'bytes.')
   tarfile.open(filepath, 'r:gz').extractall(dest_directory)
 
 
@@ -327,43 +350,15 @@ def ensure_dir_exists(dir_name):
     os.makedirs(dir_name)
 
 
-def write_list_of_floats_to_file(list_of_floats, file_path):
-  """Writes a given list of floats to a binary file.
-
-  Args:
-    list_of_floats: List of floats we want to write to a file.
-    file_path: Path to a file where list of floats will be stored.
-
-  """
-
-  s = struct.pack('d' * BOTTLENECK_TENSOR_SIZE, *list_of_floats)
-  with open(file_path, 'wb') as f:
-    f.write(s)
-
-
-def read_list_of_floats_from_file(file_path):
-  """Reads list of floats from a given file.
-
-  Args:
-    file_path: Path to a file where list of floats was stored.
-  Returns:
-    Array of bottleneck values (list of floats).
-
-  """
-
-  with open(file_path, 'rb') as f:
-    s = struct.unpack('d' * BOTTLENECK_TENSOR_SIZE, f.read())
-    return list(s)
-
-
 bottleneck_path_2_bottleneck_values = {}
 
 
 def create_bottleneck_file(bottleneck_path, image_lists, label_name, index,
                            image_dir, category, sess, jpeg_data_tensor,
+                           decoded_image_tensor, resized_input_tensor,
                            bottleneck_tensor):
   """Create a single bottleneck file."""
-  print('Creating bottleneck at ' + bottleneck_path)
+  tf.logging.info('Creating bottleneck at ' + bottleneck_path)
   image_path = get_image_path(image_lists, label_name, index,
                               image_dir, category)
   if not gfile.Exists(image_path):
@@ -371,10 +366,11 @@ def create_bottleneck_file(bottleneck_path, image_lists, label_name, index,
   image_data = gfile.FastGFile(image_path, 'rb').read()
   try:
     bottleneck_values = run_bottleneck_on_image(
-        sess, image_data, jpeg_data_tensor, bottleneck_tensor)
-  except:
-    raise RuntimeError('Error during processing file %s' % image_path)
-
+        sess, image_data, jpeg_data_tensor, decoded_image_tensor,
+        resized_input_tensor, bottleneck_tensor)
+  except Exception as e:
+    raise RuntimeError('Error during processing file %s (%s)' % (image_path,
+                                                                 str(e)))
   bottleneck_string = ','.join(str(x) for x in bottleneck_values)
   with open(bottleneck_path, 'w') as bottleneck_file:
     bottleneck_file.write(bottleneck_string)
@@ -382,7 +378,8 @@ def create_bottleneck_file(bottleneck_path, image_lists, label_name, index,
 
 def get_or_create_bottleneck(sess, image_lists, label_name, index, image_dir,
                              category, bottleneck_dir, jpeg_data_tensor,
-                             bottleneck_tensor):
+                             decoded_image_tensor, resized_input_tensor,
+                             bottleneck_tensor, architecture):
   """Retrieves or calculates bottleneck values for an image.
 
   If a cached version of the bottleneck data exists on-disk, return that,
@@ -400,7 +397,10 @@ def get_or_create_bottleneck(sess, image_lists, label_name, index, image_dir,
     or validation.
     bottleneck_dir: Folder string holding cached files of bottleneck values.
     jpeg_data_tensor: The tensor to feed loaded jpeg data into.
+    decoded_image_tensor: The output of decoding and resizing the image.
+    resized_input_tensor: The input node of the recognition graph.
     bottleneck_tensor: The output tensor for the bottleneck values.
+    architecture: The name of the model architecture.
 
   Returns:
     Numpy array of values produced by the bottleneck layer for the image.
@@ -410,10 +410,11 @@ def get_or_create_bottleneck(sess, image_lists, label_name, index, image_dir,
   sub_dir_path = os.path.join(bottleneck_dir, sub_dir)
   ensure_dir_exists(sub_dir_path)
   bottleneck_path = get_bottleneck_path(image_lists, label_name, index,
-                                        bottleneck_dir, category)
+                                        bottleneck_dir, category, architecture)
   if not os.path.exists(bottleneck_path):
     create_bottleneck_file(bottleneck_path, image_lists, label_name, index,
                            image_dir, category, sess, jpeg_data_tensor,
+                           decoded_image_tensor, resized_input_tensor,
                            bottleneck_tensor)
   with open(bottleneck_path, 'r') as bottleneck_file:
     bottleneck_string = bottleneck_file.read()
@@ -421,11 +422,12 @@ def get_or_create_bottleneck(sess, image_lists, label_name, index, image_dir,
   try:
     bottleneck_values = [float(x) for x in bottleneck_string.split(',')]
   except ValueError:
-    print('Invalid float found, recreating bottleneck')
+    tf.logging.warning('Invalid float found, recreating bottleneck')
     did_hit_error = True
   if did_hit_error:
     create_bottleneck_file(bottleneck_path, image_lists, label_name, index,
                            image_dir, category, sess, jpeg_data_tensor,
+                           decoded_image_tensor, resized_input_tensor,
                            bottleneck_tensor)
     with open(bottleneck_path, 'r') as bottleneck_file:
       bottleneck_string = bottleneck_file.read()
@@ -436,7 +438,8 @@ def get_or_create_bottleneck(sess, image_lists, label_name, index, image_dir,
 
 
 def cache_bottlenecks(sess, image_lists, image_dir, bottleneck_dir,
-                      jpeg_data_tensor, bottleneck_tensor):
+                      jpeg_data_tensor, decoded_image_tensor,
+                      resized_input_tensor, bottleneck_tensor, architecture):
   """Ensures all the training, testing, and validation bottlenecks are cached.
 
   Because we're likely to read the same image multiple times (if there are no
@@ -453,7 +456,10 @@ def cache_bottlenecks(sess, image_lists, image_dir, bottleneck_dir,
     images.
     bottleneck_dir: Folder string holding cached files of bottleneck values.
     jpeg_data_tensor: Input tensor for jpeg data from file.
+    decoded_image_tensor: The output of decoding and resizing the image.
+    resized_input_tensor: The input node of the recognition graph.
     bottleneck_tensor: The penultimate output layer of the graph.
+    architecture: The name of the model architecture.
 
   Returns:
     Nothing.
@@ -464,18 +470,21 @@ def cache_bottlenecks(sess, image_lists, image_dir, bottleneck_dir,
     for category in ['training', 'testing', 'validation']:
       category_list = label_lists[category]
       for index, unused_base_name in enumerate(category_list):
-        get_or_create_bottleneck(sess, image_lists, label_name, index,
-                                 image_dir, category, bottleneck_dir,
-                                 jpeg_data_tensor, bottleneck_tensor)
+        get_or_create_bottleneck(
+            sess, image_lists, label_name, index, image_dir, category,
+            bottleneck_dir, jpeg_data_tensor, decoded_image_tensor,
+            resized_input_tensor, bottleneck_tensor, architecture)
 
         how_many_bottlenecks += 1
         if how_many_bottlenecks % 100 == 0:
-          print(str(how_many_bottlenecks) + ' bottleneck files created.')
+          tf.logging.info(
+              str(how_many_bottlenecks) + ' bottleneck files created.')
 
 
 def get_random_cached_bottlenecks(sess, image_lists, how_many, category,
                                   bottleneck_dir, image_dir, jpeg_data_tensor,
-                                  bottleneck_tensor):
+                                  decoded_image_tensor, resized_input_tensor,
+                                  bottleneck_tensor, architecture):
   """Retrieves bottleneck values for cached images.
 
   If no distortions are being applied, this function can retrieve the cached
@@ -493,7 +502,10 @@ def get_random_cached_bottlenecks(sess, image_lists, how_many, category,
     image_dir: Root folder string of the subfolders containing the training
     images.
     jpeg_data_tensor: The layer to feed jpeg image data into.
+    decoded_image_tensor: The output of decoding and resizing the image.
+    resized_input_tensor: The input node of the recognition graph.
     bottleneck_tensor: The bottleneck output layer of the CNN graph.
+    architecture: The name of the model architecture.
 
   Returns:
     List of bottleneck arrays, their corresponding ground truths, and the
@@ -511,10 +523,10 @@ def get_random_cached_bottlenecks(sess, image_lists, how_many, category,
       image_index = random.randrange(MAX_NUM_IMAGES_PER_CLASS + 1)
       image_name = get_image_path(image_lists, label_name, image_index,
                                   image_dir, category)
-      bottleneck = get_or_create_bottleneck(sess, image_lists, label_name,
-                                            image_index, image_dir, category,
-                                            bottleneck_dir, jpeg_data_tensor,
-                                            bottleneck_tensor)
+      bottleneck = get_or_create_bottleneck(
+          sess, image_lists, label_name, image_index, image_dir, category,
+          bottleneck_dir, jpeg_data_tensor, decoded_image_tensor,
+          resized_input_tensor, bottleneck_tensor, architecture)
       ground_truth = np.zeros(class_count, dtype=np.float32)
       ground_truth[label_index] = 1.0
       bottlenecks.append(bottleneck)
@@ -527,10 +539,10 @@ def get_random_cached_bottlenecks(sess, image_lists, how_many, category,
           image_lists[label_name][category]):
         image_name = get_image_path(image_lists, label_name, image_index,
                                     image_dir, category)
-        bottleneck = get_or_create_bottleneck(sess, image_lists, label_name,
-                                              image_index, image_dir, category,
-                                              bottleneck_dir, jpeg_data_tensor,
-                                              bottleneck_tensor)
+        bottleneck = get_or_create_bottleneck(
+            sess, image_lists, label_name, image_index, image_dir, category,
+            bottleneck_dir, jpeg_data_tensor, decoded_image_tensor,
+            resized_input_tensor, bottleneck_tensor, architecture)
         ground_truth = np.zeros(class_count, dtype=np.float32)
         ground_truth[label_index] = 1.0
         bottlenecks.append(bottleneck)
@@ -583,12 +595,12 @@ def get_random_distorted_bottlenecks(
     # might be optimized in other implementations.
     distorted_image_data = sess.run(distorted_image,
                                     {input_jpeg_tensor: jpeg_data})
-    bottleneck = run_bottleneck_on_image(sess, distorted_image_data,
-                                         resized_input_tensor,
-                                         bottleneck_tensor)
+    bottleneck_values = sess.run(bottleneck_tensor,
+                                 {resized_input_tensor: distorted_image_data})
+    bottleneck_values = np.squeeze(bottleneck_values)
     ground_truth = np.zeros(class_count, dtype=np.float32)
     ground_truth[label_index] = 1.0
-    bottlenecks.append(bottleneck)
+    bottlenecks.append(bottleneck_values)
     ground_truths.append(ground_truth)
   return bottlenecks, ground_truths
 
@@ -612,7 +624,8 @@ def should_distort_images(flip_left_right, random_crop, random_scale,
 
 
 def add_input_distortions(flip_left_right, random_crop, random_scale,
-                          random_brightness):
+                          random_brightness, input_width, input_height,
+                          input_depth, input_mean, input_std):
   """Creates the operations to apply the specified distortions.
 
   During training it can help to improve the results if we run the images
@@ -660,13 +673,18 @@ def add_input_distortions(flip_left_right, random_crop, random_scale,
     random_scale: Integer percentage of how much to vary the scale by.
     random_brightness: Integer range to randomly multiply the pixel values by.
     graph.
+    input_width: Horizontal size of expected input image to model.
+    input_height: Vertical size of expected input image to model.
+    input_depth: How many channels the expected input image should have.
+    input_mean: Pixel value that should be zero in the image for the graph.
+    input_std: How much to divide the pixel values by before recognition.
 
   Returns:
     The jpeg input layer and the distorted result tensor.
   """
 
   jpeg_data = tf.placeholder(tf.string, name='DistortJPGInput')
-  decoded_image = tf.image.decode_jpeg(jpeg_data, channels=MODEL_INPUT_DEPTH)
+  decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth)
   decoded_image_as_float = tf.cast(decoded_image, dtype=tf.float32)
   decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0)
   margin_scale = 1.0 + (random_crop / 100.0)
@@ -676,16 +694,15 @@ def add_input_distortions(flip_left_right, random_crop, random_scale,
                                          minval=1.0,
                                          maxval=resize_scale)
   scale_value = tf.multiply(margin_scale_value, resize_scale_value)
-  precrop_width = tf.multiply(scale_value, MODEL_INPUT_WIDTH)
-  precrop_height = tf.multiply(scale_value, MODEL_INPUT_HEIGHT)
+  precrop_width = tf.multiply(scale_value, input_width)
+  precrop_height = tf.multiply(scale_value, input_height)
   precrop_shape = tf.stack([precrop_height, precrop_width])
   precrop_shape_as_int = tf.cast(precrop_shape, dtype=tf.int32)
   precropped_image = tf.image.resize_bilinear(decoded_image_4d,
                                               precrop_shape_as_int)
   precropped_image_3d = tf.squeeze(precropped_image, squeeze_dims=[0])
   cropped_image = tf.random_crop(precropped_image_3d,
-                                 [MODEL_INPUT_HEIGHT, MODEL_INPUT_WIDTH,
-                                  MODEL_INPUT_DEPTH])
+                                 [input_height, input_width, input_depth])
   if flip_left_right:
     flipped_image = tf.image.random_flip_left_right(cropped_image)
   else:
@@ -696,7 +713,9 @@ def add_input_distortions(flip_left_right, random_crop, random_scale,
                                        minval=brightness_min,
                                        maxval=brightness_max)
   brightened_image = tf.multiply(flipped_image, brightness_value)
-  distort_result = tf.expand_dims(brightened_image, 0, name='DistortResult')
+  offset_image = tf.subtract(brightened_image, input_mean)
+  mul_image = tf.multiply(offset_image, 1.0 / input_std)
+  distort_result = tf.expand_dims(mul_image, 0, name='DistortResult')
   return jpeg_data, distort_result
 
 
@@ -713,7 +732,8 @@ def variable_summaries(var):
     tf.summary.histogram('histogram', var)
 
 
-def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor):
+def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor,
+                           bottleneck_tensor_size):
   """Adds a new softmax and fully-connected layer for training.
 
   We need to retrain the top layer to identify our new classes, so this function
@@ -728,6 +748,7 @@ def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor):
     recognize.
     final_tensor_name: Name string for the new final node that produces results.
     bottleneck_tensor: The output of the main CNN graph.
+    bottleneck_tensor_size: How many entries in the bottleneck vector.
 
   Returns:
     The tensors for the training and cross entropy results, and tensors for the
@@ -735,7 +756,8 @@ def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor):
   """
   with tf.name_scope('input'):
     bottleneck_input = tf.placeholder_with_default(
-        bottleneck_tensor, shape=[None, BOTTLENECK_TENSOR_SIZE],
+        bottleneck_tensor,
+        shape=[None, bottleneck_tensor_size],
         name='BottleneckInputPlaceholder')
 
     ground_truth_input = tf.placeholder(tf.float32,
@@ -747,8 +769,8 @@ def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor):
   layer_name = 'final_training_ops'
   with tf.name_scope(layer_name):
     with tf.name_scope('weights'):
-      initial_value = tf.truncated_normal([BOTTLENECK_TENSOR_SIZE, class_count],
-                                          stddev=0.001)
+      initial_value = tf.truncated_normal(
+          [bottleneck_tensor_size, class_count], stddev=0.001)
 
       layer_weights = tf.Variable(initial_value, name='final_weights')
 
@@ -802,7 +824,7 @@ def add_evaluation_step(result_tensor, ground_truth_tensor):
 
 def save_graph_to_file(sess, graph, graph_file_name):
   output_graph_def = graph_util.convert_variables_to_constants(
-    sess, graph.as_graph_def(), [FLAGS.final_tensor_name])
+      sess, graph.as_graph_def(), [FLAGS.final_tensor_name])
   with gfile.FastGFile(graph_file_name, 'wb') as f:
     f.write(output_graph_def.SerializeToString())
   return
@@ -818,25 +840,160 @@ def prepare_file_system():
   return
 
 
+def create_model_info(architecture):
+  """Given the name of a model architecture, returns information about it.
+
+  There are different base image recognition pretrained models that can be
+  retrained using transfer learning, and this function translates from the name
+  of a model to the attributes that are needed to download and train with it.
+
+  Args:
+    architecture: Name of a model architecture.
+
+  Returns:
+    Dictionary of information about the model, or None if the name isn't
+    recognized
+
+  Raises:
+    ValueError: If architecture name is unknown.
+  """
+  architecture = architecture.lower()
+  if architecture == 'inception_v3':
+    # pylint: disable=line-too-long
+    data_url = 'http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz'
+    # pylint: enable=line-too-long
+    bottleneck_tensor_name = 'pool_3/_reshape:0'
+    bottleneck_tensor_size = 2048
+    input_width = 299
+    input_height = 299
+    input_depth = 3
+    resized_input_tensor_name = 'Mul:0'
+    model_file_name = 'classify_image_graph_def.pb'
+    input_mean = 128
+    input_std = 128
+  elif architecture.startswith('mobilenet_'):
+    parts = architecture.split('_')
+    if len(parts) != 3 and len(parts) != 4:
+      tf.logging.error("Couldn't understand architecture name '%s'",
+                       architecture)
+      return None
+    version_string = parts[1]
+    if (version_string != '1.0' and version_string != '0.75' and
+        version_string != '0.50' and version_string != '0.25'):
+      tf.logging.error(
+          """"The Mobilenet version should be '1.0', '0.75', '0.50', or '0.25',
+  but found '%s' for architecture '%s'""",
+          version_string, architecture)
+      return None
+    size_string = parts[2]
+    if (size_string != '224' and size_string != '192' and
+        size_string != '160' and size_string != '128'):
+      tf.logging.error(
+          """The Mobilenet input size should be '224', '192', '160', or '128',
+ but found '%s' for architecture '%s'""",
+          size_string, architecture)
+      return None
+    if len(parts) == 3:
+      is_quantized = False
+    else:
+      if parts[3] != 'quantized':
+        tf.logging.error(
+            "Couldn't understand architecture suffix '%s' for '%s'", parts[3],
+            architecture)
+        return None
+      is_quantized = True
+    data_url = 'http://download.tensorflow.org/models/mobilenet_v1_'
+    data_url += version_string + '_' + size_string + '_frozen.tgz'
+    bottleneck_tensor_name = 'MobilenetV1/Predictions/Reshape:0'
+    bottleneck_tensor_size = 1001
+    input_width = int(size_string)
+    input_height = int(size_string)
+    input_depth = 3
+    resized_input_tensor_name = 'input:0'
+    if is_quantized:
+      model_base_name = 'quantized_graph.pb'
+    else:
+      model_base_name = 'frozen_graph.pb'
+    model_dir_name = 'mobilenet_v1_' + version_string + '_' + size_string
+    model_file_name = os.path.join(model_dir_name, model_base_name)
+    input_mean = 127.5
+    input_std = 127.5
+  else:
+    tf.logging.error("Couldn't understand architecture name '%s'", architecture)
+    raise ValueError('Unknown architecture', architecture)
+
+  return {
+      'data_url': data_url,
+      'bottleneck_tensor_name': bottleneck_tensor_name,
+      'bottleneck_tensor_size': bottleneck_tensor_size,
+      'input_width': input_width,
+      'input_height': input_height,
+      'input_depth': input_depth,
+      'resized_input_tensor_name': resized_input_tensor_name,
+      'model_file_name': model_file_name,
+      'input_mean': input_mean,
+      'input_std': input_std,
+  }
+
+
+def add_jpeg_decoding(input_width, input_height, input_depth, input_mean,
+                      input_std):
+  """Adds operations that perform JPEG decoding and resizing to the graph..
+
+  Args:
+    input_width: Desired width of the image fed into the recognizer graph.
+    input_height: Desired width of the image fed into the recognizer graph.
+    input_depth: Desired channels of the image fed into the recognizer graph.
+    input_mean: Pixel value that should be zero in the image for the graph.
+    input_std: How much to divide the pixel values by before recognition.
+
+  Returns:
+    Tensors for the node to feed JPEG data into, and the output of the
+      preprocessing steps.
+  """
+  jpeg_data = tf.placeholder(tf.string, name='DecodeJPGInput')
+  decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth)
+  decoded_image_as_float = tf.cast(decoded_image, dtype=tf.float32)
+  decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0)
+  resize_shape = tf.stack([input_height, input_width])
+  resize_shape_as_int = tf.cast(resize_shape, dtype=tf.int32)
+  resized_image = tf.image.resize_bilinear(decoded_image_4d,
+                                           resize_shape_as_int)
+  offset_image = tf.subtract(resized_image, input_mean)
+  mul_image = tf.multiply(offset_image, 1.0 / input_std)
+  return jpeg_data, mul_image
+
+
 def main(_):
+  # Needed to make sure the logging output is visible.
+  # See https://github.com/tensorflow/tensorflow/issues/3047
+  tf.logging.set_verbosity(tf.logging.INFO)
+
   # Prepare necessary directories  that can be used during training
   prepare_file_system()
 
+  # Gather information about the model architecture we'll be using.
+  model_info = create_model_info(FLAGS.architecture)
+  if not model_info:
+    tf.logging.error('Did not recognize architecture flag')
+    return -1
+
   # Set up the pre-trained graph.
-  maybe_download_and_extract()
-  graph, bottleneck_tensor, jpeg_data_tensor, resized_image_tensor = (
-      create_inception_graph())
+  maybe_download_and_extract(model_info['data_url'])
+  graph, bottleneck_tensor, resized_image_tensor = (
+      create_model_graph(model_info))
 
   # Look at the folder structure, and create lists of all the images.
   image_lists = create_image_lists(FLAGS.image_dir, FLAGS.testing_percentage,
                                    FLAGS.validation_percentage)
   class_count = len(image_lists.keys())
   if class_count == 0:
-    print('No valid folders of images found at ' + FLAGS.image_dir)
+    tf.logging.error('No valid folders of images found at ' + FLAGS.image_dir)
     return -1
   if class_count == 1:
-    print('Only one valid folder of images found at ' + FLAGS.image_dir +
-          ' - multiple classes are needed for classification.')
+    tf.logging.error('Only one valid folder of images found at ' +
+                     FLAGS.image_dir +
+                     ' - multiple classes are needed for classification.')
     return -1
 
   # See if the command-line flags mean we're applying any distortions.
@@ -845,25 +1002,33 @@ def main(_):
       FLAGS.random_brightness)
 
   with tf.Session(graph=graph) as sess:
+    # Set up the image decoding sub-graph.
+    jpeg_data_tensor, decoded_image_tensor = add_jpeg_decoding(
+        model_info['input_width'], model_info['input_height'],
+        model_info['input_depth'], model_info['input_mean'],
+        model_info['input_std'])
 
     if do_distort_images:
       # We will be applying distortions, so setup the operations we'll need.
       (distorted_jpeg_data_tensor,
        distorted_image_tensor) = add_input_distortions(
-           FLAGS.flip_left_right, FLAGS.random_crop,
-           FLAGS.random_scale, FLAGS.random_brightness)
+           FLAGS.flip_left_right, FLAGS.random_crop, FLAGS.random_scale,
+           FLAGS.random_brightness, model_info['input_width'],
+           model_info['input_height'], model_info['input_depth'],
+           model_info['input_mean'], model_info['input_std'])
     else:
       # We'll make sure we've calculated the 'bottleneck' image summaries and
       # cached them on disk.
       cache_bottlenecks(sess, image_lists, FLAGS.image_dir,
                         FLAGS.bottleneck_dir, jpeg_data_tensor,
-                        bottleneck_tensor)
+                        decoded_image_tensor, resized_image_tensor,
+                        bottleneck_tensor, FLAGS.architecture)
 
     # Add the new layer that we'll be training.
     (train_step, cross_entropy, bottleneck_input, ground_truth_input,
-     final_tensor) = add_final_training_ops(len(image_lists.keys()),
-                                            FLAGS.final_tensor_name,
-                                            bottleneck_tensor)
+     final_tensor) = add_final_training_ops(
+         len(image_lists.keys()), FLAGS.final_tensor_name, bottleneck_tensor,
+         model_info['bottleneck_tensor_size'])
 
     # Create the operations we need to evaluate the accuracy of our new layer.
     evaluation_step, prediction = add_evaluation_step(
@@ -896,10 +1061,10 @@ def main(_):
          train_ground_truth, _) = get_random_cached_bottlenecks(
              sess, image_lists, FLAGS.train_batch_size, 'training',
              FLAGS.bottleneck_dir, FLAGS.image_dir, jpeg_data_tensor,
-             bottleneck_tensor)
+             decoded_image_tensor, resized_image_tensor, bottleneck_tensor,
+             FLAGS.architecture)
       # Feed the bottlenecks and ground truth into the graph, and run a training
       # step. Capture training summaries for TensorBoard with the `merged` op.
-
       train_summary, _ = sess.run(
           [merged, train_step],
           feed_dict={bottleneck_input: train_bottlenecks,
@@ -913,15 +1078,16 @@ def main(_):
             [evaluation_step, cross_entropy],
             feed_dict={bottleneck_input: train_bottlenecks,
                        ground_truth_input: train_ground_truth})
-        print('%s: Step %d: Train accuracy = %.1f%%' % (datetime.now(), i,
-                                                        train_accuracy * 100))
-        print('%s: Step %d: Cross entropy = %f' % (datetime.now(), i,
-                                                   cross_entropy_value))
+        tf.logging.info('%s: Step %d: Train accuracy = %.1f%%' %
+                        (datetime.now(), i, train_accuracy * 100))
+        tf.logging.info('%s: Step %d: Cross entropy = %f' %
+                        (datetime.now(), i, cross_entropy_value))
         validation_bottlenecks, validation_ground_truth, _ = (
             get_random_cached_bottlenecks(
                 sess, image_lists, FLAGS.validation_batch_size, 'validation',
                 FLAGS.bottleneck_dir, FLAGS.image_dir, jpeg_data_tensor,
-                bottleneck_tensor))
+                decoded_image_tensor, resized_image_tensor, bottleneck_tensor,
+                FLAGS.architecture))
         # Run a validation step and capture training summaries for TensorBoard
         # with the `merged` op.
         validation_summary, validation_accuracy = sess.run(
@@ -929,38 +1095,43 @@ def main(_):
             feed_dict={bottleneck_input: validation_bottlenecks,
                        ground_truth_input: validation_ground_truth})
         validation_writer.add_summary(validation_summary, i)
-        print('%s: Step %d: Validation accuracy = %.1f%% (N=%d)' %
-              (datetime.now(), i, validation_accuracy * 100,
-               len(validation_bottlenecks)))
+        tf.logging.info('%s: Step %d: Validation accuracy = %.1f%% (N=%d)' %
+                        (datetime.now(), i, validation_accuracy * 100,
+                         len(validation_bottlenecks)))
 
       # Store intermediate results
       intermediate_frequency = FLAGS.intermediate_store_frequency
 
-      if intermediate_frequency > 0 and (i % intermediate_frequency == 0) and i > 0:
-        intermediate_file_name = FLAGS.intermediate_output_graphs_dir + 'intermediate_' + str(i) + '.pb'
-        print('Save intermediate result to : ' + intermediate_file_name)
+      if (intermediate_frequency > 0 and (i % intermediate_frequency == 0)
+          and i > 0):
+        intermediate_file_name = (FLAGS.intermediate_output_graphs_dir +
+                                  'intermediate_' + str(i) + '.pb')
+        tf.logging.info('Save intermediate result to : ' +
+                        intermediate_file_name)
         save_graph_to_file(sess, graph, intermediate_file_name)
-                
+
     # We've completed all our training, so run a final test evaluation on
     # some new images we haven't used before.
     test_bottlenecks, test_ground_truth, test_filenames = (
-        get_random_cached_bottlenecks(sess, image_lists, FLAGS.test_batch_size,
-                                      'testing', FLAGS.bottleneck_dir,
-                                      FLAGS.image_dir, jpeg_data_tensor,
-                                      bottleneck_tensor))
+        get_random_cached_bottlenecks(
+            sess, image_lists, FLAGS.test_batch_size, 'testing',
+            FLAGS.bottleneck_dir, FLAGS.image_dir, jpeg_data_tensor,
+            decoded_image_tensor, resized_image_tensor, bottleneck_tensor,
+            FLAGS.architecture))
     test_accuracy, predictions = sess.run(
         [evaluation_step, prediction],
         feed_dict={bottleneck_input: test_bottlenecks,
                    ground_truth_input: test_ground_truth})
-    print('Final test accuracy = %.1f%% (N=%d)' % (
-        test_accuracy * 100, len(test_bottlenecks)))
+    tf.logging.info('Final test accuracy = %.1f%% (N=%d)' %
+                    (test_accuracy * 100, len(test_bottlenecks)))
 
     if FLAGS.print_misclassified_test_images:
-      print('=== MISCLASSIFIED TEST IMAGES ===')
+      tf.logging.info('=== MISCLASSIFIED TEST IMAGES ===')
       for i, test_filename in enumerate(test_filenames):
         if predictions[i] != test_ground_truth[i].argmax():
-          print('%70s  %s' % (test_filename,
-                              list(image_lists.keys())[predictions[i]]))
+          tf.logging.info('%70s  %s' %
+                          (test_filename,
+                           list(image_lists.keys())[predictions[i]]))
 
     # Write out the trained graph and labels with the weights stored as
     # constants.
@@ -993,7 +1164,10 @@ if __name__ == '__main__':
       '--intermediate_store_frequency',
       type=int,
       default=0,
-      help='How many steps to store intermediate graph. If "0" then will not store.'
+      help="""\
+         How many steps to store intermediate graph. If "0" then will not
+         store.\
+      """
   )
   parser.add_argument(
       '--output_labels',
@@ -1134,5 +1308,19 @@ if __name__ == '__main__':
       input pixels up or down by.\
       """
   )
+  parser.add_argument(
+      '--architecture',
+      type=str,
+      default='inception_v3',
+      help="""\
+      Which model architecture to use. 'inception_v3' is the most accurate, but
+      also the slowest. For faster or smaller models, chose a MobileNet with the
+      form 'mobilenet_<parameter size>_<input_size>[_quantized]'. For example,
+      'mobilenet_1.0_224' will pick a model that is 17 MB in size and takes 224
+      pixel input images, while 'mobilenet_0.25_128_quantized' will choose a much
+      less accurate, but smaller and faster network that's 920 KB on disk and
+      takes 128x128 images. See https://research.googleblog.com/2017/06/mobilenets-open-source-models-for.html
+      for more information on Mobilenet.\
+      """)
   FLAGS, unparsed = parser.parse_known_args()
   tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
diff --git a/tensorflow/examples/image_retraining/retrain_test.py b/tensorflow/examples/image_retraining/retrain_test.py
index 8af5cc7114..467c15d0de 100644
--- a/tensorflow/examples/image_retraining/retrain_test.py
+++ b/tensorflow/examples/image_retraining/retrain_test.py
@@ -48,10 +48,10 @@ class ImageRetrainingTest(test_util.TensorFlowTestCase):
 
   def testGetBottleneckPath(self):
     image_lists = self.dummyImageLists()
-    self.assertEqual('bottleneck_dir/somedir/image_five.jpg.txt',
+    self.assertEqual('bottleneck_dir/somedir/image_five.jpg_imagenet_v3.txt',
                      retrain.get_bottleneck_path(
                          image_lists, 'label_one', 0, 'bottleneck_dir',
-                         'validation'))
+                         'validation', 'imagenet_v3'))
 
   def testShouldDistortImage(self):
     self.assertEqual(False, retrain.should_distort_images(False, 0, 0, 0))
@@ -63,7 +63,7 @@ class ImageRetrainingTest(test_util.TensorFlowTestCase):
   def testAddInputDistortions(self):
     with tf.Graph().as_default():
       with tf.Session() as sess:
-        retrain.add_input_distortions(True, 10, 10, 10)
+        retrain.add_input_distortions(True, 10, 10, 10, 299, 299, 3, 128, 128)
         self.assertIsNotNone(sess.graph.get_tensor_by_name('DistortJPGInput:0'))
         self.assertIsNotNone(sess.graph.get_tensor_by_name('DistortResult:0'))
 
@@ -72,9 +72,9 @@ class ImageRetrainingTest(test_util.TensorFlowTestCase):
     with tf.Graph().as_default():
       with tf.Session() as sess:
         bottleneck = tf.placeholder(
-            tf.float32, [1, retrain.BOTTLENECK_TENSOR_SIZE],
-            name=retrain.BOTTLENECK_TENSOR_NAME.split(':')[0])
-        retrain.add_final_training_ops(5, 'final', bottleneck)
+            tf.float32, [1, 1024],
+            name='bottleneck')
+        retrain.add_final_training_ops(5, 'final', bottleneck, 1024)
         self.assertIsNotNone(sess.graph.get_tensor_by_name('final:0'))
 
   def testAddEvaluationStep(self):
@@ -113,5 +113,22 @@ class ImageRetrainingTest(test_util.TensorFlowTestCase):
       result = label_image.run_graph(image, labels, jpeg, 'final:0', 3)
       self.assertEqual(result, 0)
 
+  def testAddJpegDecoding(self):
+    with tf.Graph().as_default():
+      jpeg_data, mul_image = retrain.add_jpeg_decoding(10, 10, 3, 0, 255)
+      self.assertIsNotNone(jpeg_data)
+      self.assertIsNotNone(mul_image)
+
+  def testCreateModelInfo(self):
+    did_raise_value_error = False
+    try:
+      retrain.create_model_info('no_such_model_name')
+    except ValueError:
+      did_raise_value_error = True
+    self.assertTrue(did_raise_value_error)
+    model_info = retrain.create_model_info('inception_v3')
+    self.assertIsNotNone(model_info)
+    self.assertEqual(299, model_info['input_width'])
+
 if __name__ == '__main__':
   tf.test.main()
author	Pete Warden <petewarden@google.com>	2017-07-05 13:12:07 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2017-07-05 13:20:42 -0700
commit	055500bbcea60513c0160d213a10a7055f079312 (patch)
tree	794a2f2f3340d80070c9ce743ec2858bc7f36a09 /tensorflow/examples/image_retraining
parent	e4351ac3f8d4c4134dbd90ec9f8f03fdef31c20a (diff)