Minor formatting fixes.

Change: 113582098
author: Vijay Vasudevan <vrv@google.com> 2016-02-01 16:39:16 -0800
committer: Manjunath Kudlur <keveman@gmail.com> 2016-02-02 08:35:00 -0800
commit: a8d2f0983ecdea8ff2526c717d6a9b2f06f403d8 (patch)
tree: 485daa70344be4240e68f89c016231bfdab242d1 /tensorflow/examples/udacity
parent: 5ff6d34a05b2eb49f7a79a4d0b78ada9a6842b6c (diff)
4 files changed, 243 insertions, 132 deletions
diff --git a/tensorflow/examples/udacity/1_notmnist.ipynb b/tensorflow/examples/udacity/1_notmnist.ipynb
index b2a06ba115..661ea4df92 100644
--- a/tensorflow/examples/udacity/1_notmnist.ipynb
+++ b/tensorflow/examples/udacity/1_notmnist.ipynb
@@ -113,7 +113,7 @@
         "    filename, _ = urlretrieve(url + filename, filename)\n",
         "  statinfo = os.stat(filename)\n",
         "  if statinfo.st_size == expected_bytes:\n",
-        "    print 'Found and verified', filename\n",
+        "    print('Found and verified', filename)\n",
         "  else:\n",
         "    raise Exception(\n",
         "      'Failed to verify' + filename + '. Can you get to it with a browser?')\n",
@@ -237,9 +237,9 @@
         "colab_type": "text"
       },
       "source": [
-        "Now let's load the data in a more manageable format.\n",
+        "Now let's load the data in a more manageable format. Since, depending on your computer setup you might not be able to fit it all in memory, we'll load each class into a separate dataset, store them on disk and curate them independently. Later we'll merge them into a single dataset of manageable size.\n",
         "\n",
-        "We'll convert the entire dataset into a 3D array (image index, x, y) of floating point values, normalized to have approximately zero mean and standard deviation ~0.5 to make training easier down the road. The labels will be stored into a separate array of integers 0 through 9.\n",
+        "We'll convert the entire dataset into a 3D array (image index, x, y) of floating point values, normalized to have approximately zero mean and standard deviation ~0.5 to make training easier down the road. \n",
         "\n",
         "A few images might not be readable, we'll just skip them."
       ]
@@ -283,83 +283,143 @@
         "image_size = 28  # Pixel width and height.\n",
         "pixel_depth = 255.0  # Number of levels per pixel.\n",
         "\n",
-        "def load(data_folders, min_num_images, max_num_images):\n",
-        "  dataset = np.ndarray(\n",
-        "    shape=(max_num_images, image_size, image_size), dtype=np.float32)\n",
-        "  labels = np.ndarray(shape=(max_num_images), dtype=np.int32)\n",
-        "  label_index = 0\n",
-        "  image_index = 0\n",
-        "  for folder in data_folders:\n",
-        "    print(folder)\n",
+        "def load_letter(folder, min_num_images):\n",
+        "    image_files = os.listdir(folder)\n",
+        "    dataset = np.ndarray(shape=(len(image_files), image_size, image_size),\n",
+        "                         dtype=np.float32)\n",
+        "    image_index = 0\n",
+        "    print folder\n",
         "    for image in os.listdir(folder):\n",
-        "      if image_index >= max_num_images:\n",
-        "        raise Exception('More images than expected: %d >= %d' % (\n",
-        "          image_index, max_num_images))\n",
         "      image_file = os.path.join(folder, image)\n",
         "      try:\n",
-        "        image_data = (ndimage.imread(image_file).astype(float) -\n",
+        "        image_data = (ndimage.imread(image_file).astype(float) - \n",
         "                      pixel_depth / 2) / pixel_depth\n",
         "        if image_data.shape != (image_size, image_size):\n",
         "          raise Exception('Unexpected image shape: %s' % str(image_data.shape))\n",
         "        dataset[image_index, :, :] = image_data\n",
-        "        labels[image_index] = label_index\n",
         "        image_index += 1\n",
         "      except IOError as e:\n",
         "        print('Could not read:', image_file, ':', e, '- it\\'s ok, skipping.')\n",
-        "    label_index += 1\n",
-        "  num_images = image_index\n",
-        "  dataset = dataset[0:num_images, :, :]\n",
-        "  labels = labels[0:num_images]\n",
-        "  if num_images < min_num_images:\n",
-        "    raise Exception('Many fewer images than expected: %d < %d' % (\n",
-        "        num_images, min_num_images))\n",
-        "  print('Full dataset tensor:', dataset.shape)\n",
-        "  print('Mean:', np.mean(dataset))\n",
-        "  print('Standard deviation:', np.std(dataset))\n",
-        "  print('Labels:', labels.shape)\n",
-        "  return dataset, labels\n",
-        "train_dataset, train_labels = load(train_folders, 450000, 550000)\n",
-        "test_dataset, test_labels = load(test_folders, 18000, 20000)"
+        "    \n",
+        "    num_images = image_index\n",
+        "    dataset = dataset[0:num_images, :, :]\n",
+        "    if num_images < min_num_images:\n",
+    "        raise Exception('Many fewer images than expected: %d < %d' % \n",
+    "                        (num_images, min_num_images))\n",
+        "    \n",
+        "    print('Full dataset tensor:', dataset.shape)\n",
+        "    print('Mean:', np.mean(dataset))\n",
+        "    print('Standard deviation:', np.std(dataset))\n",
+        "    return dataset\n",
+        "        \n",
+        "def load(data_folders, min_num_images_per_class):\n",
+        "  dataset_names = []\n",
+        "  for folder in data_folders:\n",
+        "    dataset = load_letter(folder, min_num_images_per_class)\n",
+        "    set_filename = folder + '.pickle'\n",
+        "    try:\n",
+        "        with open(set_filename, 'wb') as f:\n",
+        "            pickle.dump(dataset, f, pickle.HIGHEST_PROTOCOL)\n",
+        "        dataset_names.append(set_filename)\n",
+        "    except Exception as e:\n",
+        "        print('Unable to save data to', pickle_file, ':', e)\n",
+        "  \n",
+        "  return dataset_names\n",
+        "\n",
+        "train_datasets = load(train_folders, 45000)\n",
+        "test_datasets = load(test_folders, 1800)"
       ],
       "outputs": [
         {
           "output_type": "stream",
           "text": [
             "notMNIST_large/A\n",
-            "Could not read: notMNIST_large/A/SG90IE11c3RhcmQgQlROIFBvc3Rlci50dGY=.png : cannot identify image file - it's ok, skipping.\n",
-            "Could not read: notMNIST_large/A/RnJlaWdodERpc3BCb29rSXRhbGljLnR0Zg==.png : cannot identify image file - it's ok, skipping.\n",
             "Could not read: notMNIST_large/A/Um9tYW5hIEJvbGQucGZi.png : cannot identify image file - it's ok, skipping.\n",
+            "Could not read: notMNIST_large/A/RnJlaWdodERpc3BCb29rSXRhbGljLnR0Zg==.png : cannot identify image file - it's ok, skipping.\n",
+            "Could not read: notMNIST_large/A/SG90IE11c3RhcmQgQlROIFBvc3Rlci50dGY=.png : cannot identify image file - it's ok, skipping.\n",
+            "Full dataset tensor: (52909, 28, 28)\n",
+            "Mean: -0.12848\n",
+            "Standard deviation: 0.425576\n",
             "notMNIST_large/B\n",
             "Could not read: notMNIST_large/B/TmlraXNFRi1TZW1pQm9sZEl0YWxpYy5vdGY=.png : cannot identify image file - it's ok, skipping.\n",
+            "Full dataset tensor: (52911, 28, 28)\n",
+            "Mean: -0.00755947\n",
+            "Standard deviation: 0.417272\n",
             "notMNIST_large/C\n",
+            "Full dataset tensor: (52912, 28, 28)\n",
+            "Mean: -0.142321\n",
+            "Standard deviation: 0.421305\n",
             "notMNIST_large/D\n",
             "Could not read: notMNIST_large/D/VHJhbnNpdCBCb2xkLnR0Zg==.png : cannot identify image file - it's ok, skipping.\n",
+            "Full dataset tensor: (52911, 28, 28)\n",
+            "Mean: -0.0574553\n",
+            "Standard deviation: 0.434072\n",
             "notMNIST_large/E\n",
+            "Full dataset tensor: (52912, 28, 28)\n",
+            "Mean: -0.0701406\n",
+            "Standard deviation: 0.42882\n",
             "notMNIST_large/F\n",
+            "Full dataset tensor: (52912, 28, 28)\n",
+            "Mean: -0.125914\n",
+            "Standard deviation: 0.429645\n",
             "notMNIST_large/G\n",
+            "Full dataset tensor: (52912, 28, 28)\n",
+            "Mean: -0.0947771\n",
+            "Standard deviation: 0.421674\n",
             "notMNIST_large/H\n",
+            "Full dataset tensor: (52912, 28, 28)\n",
+            "Mean: -0.0687667\n",
+            "Standard deviation: 0.430344\n",
             "notMNIST_large/I\n",
+            "Full dataset tensor: (52912, 28, 28)\n",
+            "Mean: 0.0307405\n",
+            "Standard deviation: 0.449686\n",
             "notMNIST_large/J\n",
-            "Full dataset tensor: (529114, 28, 28)\n",
-            "Mean: -0.0816593\n",
-            "Standard deviation: 0.454232\n",
-            "Labels: (529114,)\n",
+            "Full dataset tensor: (52911, 28, 28)\n",
+            "Mean: -0.153479\n",
+            "Standard deviation: 0.397169\n",
             "notMNIST_small/A\n",
             "Could not read: notMNIST_small/A/RGVtb2NyYXRpY2FCb2xkT2xkc3R5bGUgQm9sZC50dGY=.png : cannot identify image file - it's ok, skipping.\n",
+            "Full dataset tensor: (1872, 28, 28)\n",
+            "Mean: -0.132588\n",
+            "Standard deviation: 0.445923\n",
             "notMNIST_small/B\n",
+            "Full dataset tensor: (1873, 28, 28)\n",
+            "Mean: 0.00535619\n",
+            "Standard deviation: 0.457054\n",
             "notMNIST_small/C\n",
+            "Full dataset tensor: (1873, 28, 28)\n",
+            "Mean: -0.141489\n",
+            "Standard deviation: 0.441056\n",
             "notMNIST_small/D\n",
+            "Full dataset tensor: (1873, 28, 28)\n",
+            "Mean: -0.0492094\n",
+            "Standard deviation: 0.460477\n",
             "notMNIST_small/E\n",
+            "Full dataset tensor: (1873, 28, 28)\n",
+            "Mean: -0.0598952\n",
+            "Standard deviation: 0.456146\n",
             "notMNIST_small/F\n",
             "Could not read: notMNIST_small/F/Q3Jvc3NvdmVyIEJvbGRPYmxpcXVlLnR0Zg==.png : cannot identify image file - it's ok, skipping.\n",
+            "Full dataset tensor: (1872, 28, 28)\n",
+            "Mean: -0.118148\n",
+            "Standard deviation: 0.451134\n",
             "notMNIST_small/G\n",
+            "Full dataset tensor: (1872, 28, 28)\n",
+            "Mean: -0.092519\n",
+            "Standard deviation: 0.448468\n",
             "notMNIST_small/H\n",
+            "Full dataset tensor: (1872, 28, 28)\n",
+            "Mean: -0.0586729\n",
+            "Standard deviation: 0.457387\n",
             "notMNIST_small/I\n",
+            "Full dataset tensor: (1872, 28, 28)\n",
+            "Mean: 0.0526481\n",
+            "Standard deviation: 0.472657\n",
             "notMNIST_small/J\n",
-            "Full dataset tensor: (18724, 28, 28)\n",
-            "Mean: -0.0746364\n",
-            "Standard deviation: 0.458622\n",
-            "Labels: (18724,)\n"
+            "Full dataset tensor: (1872, 28, 28)\n",
+            "Mean: -0.15167\n",
+            "Standard deviation: 0.449521\n"
           ],
           "name": "stdout"
         }
@@ -385,63 +445,12 @@
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "GPTCnjIcyuKN",
-        "colab_type": "text"
-      },
-      "source": [
-        "Next, we'll randomize the data. It's important to have the labels well shuffled for the training and test distributions to match."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "6WZ2l2tN2zOL",
-        "colab_type": "code",
-        "colab": {
-          "autoexec": {
-            "startup": false,
-            "wait_interval": 0
-          }
-        },
-        "cellView": "both"
-      },
-      "source": [
-        "np.random.seed(133)\n",
-        "def randomize(dataset, labels):\n",
-        "  permutation = np.random.permutation(labels.shape[0])\n",
-        "  shuffled_dataset = dataset[permutation,:,:]\n",
-        "  shuffled_labels = labels[permutation]\n",
-        "  return shuffled_dataset, shuffled_labels\n",
-        "train_dataset, train_labels = randomize(train_dataset, train_labels)\n",
-        "test_dataset, test_labels = randomize(test_dataset, test_labels)"
-      ],
-      "outputs": [],
-      "execution_count": 0
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "puDUTe6t6USl",
-        "colab_type": "text"
-      },
-      "source": [
-        "---\n",
-        "Problem 3\n",
-        "---------\n",
-        "Convince yourself that the data is still good after shuffling!\n",
-        "\n",
-        "---"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
         "id": "cYznx5jUwzoO",
         "colab_type": "text"
       },
       "source": [
         "---\n",
-        "Problem 4\n",
+        "Problem 3\n",
         "---------\n",
         "Another check: we expect the data to be balanced across classes. Verify that.\n",
         "\n",
@@ -455,7 +464,7 @@
         "colab_type": "text"
       },
       "source": [
-        "Prune the training data as needed. Depending on your computer setup, you might not be able to fit it all in memory, and you can tune train_size as needed.\n",
+        "Merge and prune the training data as needed. Depending on your computer setup, you might not be able to fit it all in memory, and you can tune `train_size` as needed. The labels will be stored into a separate array of integers 0 through 9.\n",
         "\n",
         "Also create a validation dataset for hyperparameter tuning."
       ]
@@ -496,22 +505,65 @@
         "outputId": "8af66da6-902d-4719-bedc-7c9fb7ae7948"
       },
       "source": [
+        "def make_arrays(nb_rows, img_size):\n",
+        "    if nb_rows:\n",
+        "        dataset = np.ndarray((nb_rows, img_size, img_size), dtype=np.float32)\n",
+        "        labels = np.ndarray(nb_rows, dtype=np.int32)\n",
+        "    else:\n",
+        "        dataset, labels = None, None\n",
+        "    return dataset, labels\n",
+        "\n",
+        "def merge_datasets(pickle_files, train_size, valid_size=0):\n",
+        "    num_classes = len(pickle_files)\n",
+        "    valid_dataset, valid_labels = make_arrays(valid_size, image_size)\n",
+        "    train_dataset, train_labels = make_arrays(train_size, image_size)\n",
+        "    vsize_per_class = valid_size // num_classes\n",
+        "    tsize_per_class = train_size // num_classes\n",
+        "    \n",
+        "    start_v, start_t = 0, 0\n",
+        "    end_v, end_t = vsize_per_class, tsize_per_class\n",
+        "    end_l = vsize_per_class+tsize_per_class\n",
+        "    for label, pickle_file in enumerate(pickle_files):       \n",
+        "        try:\n",
+        "            with open(pickle_file, 'rb') as f:\n",
+        "                letter_set = pickle.load(f)\n",
+        "                if valid_dataset is not None:\n",
+        "                    valid_letter = letter_set[:vsize_per_class, :, :]\n",
+        "                    valid_dataset[start_v:end_v, :, :] = valid_letter\n",
+        "                    valid_labels[start_v:end_v] = label\n",
+        "                    start_v += vsize_per_class\n",
+        "                    end_v += vsize_per_class\n",
+        "                    \n",
+        "                train_letter = letter_set[vsize_per_class:end_l, :, :]\n",
+        "                train_dataset[start_t:end_t, :, :] = train_letter\n",
+        "                train_labels[start_t:end_t] = label\n",
+        "                start_t += tsize_per_class\n",
+        "                end_t += tsize_per_class\n",
+        "        except Exception as e:\n",
+        "          print('Unable to process data from', pickle_file, ':', e)\n",
+        "          raise\n",
+        "    \n",
+        "    return valid_dataset, valid_labels, train_dataset, train_labels\n",
+        "            \n",
+        "            \n",
         "train_size = 200000\n",
         "valid_size = 10000\n",
+        "test_size = 10000\n",
+        "\n",
+        "valid_dataset, valid_labels, train_dataset, train_labels = merge_datasets(train_datasets, train_size, valid_size)\n",
+        "__, __, test_dataset, test_labels = merge_datasets(test_datasets, test_size)\n",
         "\n",
-        "valid_dataset = train_dataset[:valid_size,:,:]\n",
-        "valid_labels = train_labels[:valid_size]\n",
-        "train_dataset = train_dataset[valid_size:valid_size+train_size,:,:]\n",
-        "train_labels = train_labels[valid_size:valid_size+train_size]\n",
-        "print('Training', train_dataset.shape, train_labels.shape)\n",
-        "print('Validation', valid_dataset.shape, valid_labels.shape)"
+        "print('Training:', train_dataset.shape, train_labels.shape)\n",
+        "print('Validation:', valid_dataset.shape, valid_labels.shape)\n",
+        "print('Testing:', test_dataset.shape, test_labels.shape)"
       ],
       "outputs": [
         {
           "output_type": "stream",
           "text": [
             "Training (200000, 28, 28) (200000,)\n",
-            "Validation (10000, 28, 28) (10000,)\n"
+            "Validation (10000, 28, 28) (10000,)\n",
+            "Testing (10000, 28, 28) (10000,)\n"
           ],
           "name": "stdout"
         }
@@ -521,6 +573,57 @@
     {
       "cell_type": "markdown",
       "metadata": {
+        "id": "GPTCnjIcyuKN",
+        "colab_type": "text"
+      },
+      "source": [
+        "Next, we'll randomize the data. It's important to have the labels well shuffled for the training and test distributions to match."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "6WZ2l2tN2zOL",
+        "colab_type": "code",
+        "colab": {
+          "autoexec": {
+            "startup": false,
+            "wait_interval": 0
+          }
+        },
+        "cellView": "both"
+      },
+      "source": [
+        "np.random.seed(133)\n",
+        "def randomize(dataset, labels):\n",
+        "  permutation = np.random.permutation(labels.shape[0])\n",
+        "  shuffled_dataset = dataset[permutation,:,:]\n",
+        "  shuffled_labels = labels[permutation]\n",
+        "  return shuffled_dataset, shuffled_labels\n",
+        "train_dataset, train_labels = randomize(train_dataset, train_labels)\n",
+        "test_dataset, test_labels = randomize(test_dataset, test_labels)"
+      ],
+      "outputs": [],
+      "execution_count": 0
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "puDUTe6t6USl",
+        "colab_type": "text"
+      },
+      "source": [
+        "---\n",
+        "Problem 4\n",
+        "---------\n",
+        "Convince yourself that the data is still good after shuffling!\n",
+        "\n",
+        "---"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
         "id": "tIQJaJuwg5Hw",
         "colab_type": "text"
       },
@@ -654,4 +757,4 @@
       ]
     }
   ]
-}
-\ No newline at end of file
+}
diff --git a/tensorflow/examples/udacity/4_convolutions.ipynb b/tensorflow/examples/udacity/4_convolutions.ipynb
index 151aa25ce8..9ad41acb0c 100644
--- a/tensorflow/examples/udacity/4_convolutions.ipynb
+++ b/tensorflow/examples/udacity/4_convolutions.ipynb
@@ -265,7 +265,7 @@
         "      [patch_size, patch_size, depth, depth], stddev=0.1))\n",
         "  layer2_biases = tf.Variable(tf.constant(1.0, shape=[depth]))\n",
         "  layer3_weights = tf.Variable(tf.truncated_normal(\n",
-        "      [image_size / 4 * image_size / 4 * depth, num_hidden], stddev=0.1))\n",
+        "      [image_size // 4 * image_size // 4 * depth, num_hidden], stddev=0.1))\n",
         "  layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))\n",
         "  layer4_weights = tf.Variable(tf.truncated_normal(\n",
         "      [num_hidden, num_labels], stddev=0.1))\n",
@@ -461,4 +461,4 @@
       ]
     }
   ]
-}
-\ No newline at end of file
+}
diff --git a/tensorflow/examples/udacity/5_word2vec.ipynb b/tensorflow/examples/udacity/5_word2vec.ipynb
index 1b7f5e2005..b3a7a71e2c 100644
--- a/tensorflow/examples/udacity/5_word2vec.ipynb
+++ b/tensorflow/examples/udacity/5_word2vec.ipynb
@@ -114,7 +114,7 @@
         "  if statinfo.st_size == expected_bytes:\n",
         "    print('Found and verified %s' % filename)\n",
         "  else:\n",
-        "    print statinfo.st_size\n",
+        "    print(statinfo.st_size)\n",
         "    raise Exception(\n",
         "      'Failed to verify ' + filename + '. Can you get to it with a browser?')\n",
         "  return filename\n",
@@ -354,35 +354,31 @@
         "    data_index = (data_index + 1) % len(data)\n",
         "  return batch, labels\n",
         "\n",
-        "batch, labels = generate_batch(batch_size=8, num_skips=2, skip_window=1)\n",
-        "for i in range(8):\n",
-        "  print('%d -> %d' % (batch[i], labels[i, 0]))\n",
-        "  print('%s -> %s' % (reverse_dictionary[batch[i]],\n",
-        "                      reverse_dictionary[labels[i, 0]]))"
+        "print('data:', [reverse_dictionary[di] for di in data[:8]])\n",
+        "\n",
+        "for num_skips, skip_window in [(2, 1), (4, 2)]:\n",
+        "    data_index = 0\n",
+        "    batch, labels = generate_batch(batch_size=8, num_skips=num_skips, skip_window=skip_window)\n",
+        "    print('\\nwith num_skips = %d and skip_window = %d:' % (num_skips, skip_window))\n",
+        "    print('    batch:', [reverse_dictionary[bi] for bi in batch])\n",
+        "    print('    labels:', [reverse_dictionary[li] for li in labels.reshape(8)])"
       ],
       "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            " 3083 -> 5243\n",
-            "originated -> anarchism\n",
-            "3083 -> 12\n",
-            "originated -> as\n",
-            "12 -> 3083\n",
-            "as -> originated\n",
-            "12 -> 6\n",
-            "as -> a\n",
-            "6 -> 12\n",
-            "a -> as\n",
-            "6 -> 195\n",
-            "a -> term\n",
-            "195 -> 6\n",
-            "term -> a\n",
-            "195 -> 2\n",
-            "term -> of\n"
-          ],
-          "name": "stdout"
-        }
+       {
+        "name": "stdout",
+        "output_type": "stream",
+        "text": [
+         "data: ['anarchism', 'originated', 'as', 'a', 'term', 'of', 'abuse', 'first']\n",
+         "\n",
+         "with num_skips = 2 and skip_window = 1:\n",
+         "    batch: ['originated', 'originated', 'as', 'as', 'a', 'a', 'term', 'term']\n",
+         "    labels: ['as', 'anarchism', 'a', 'originated', 'term', 'as', 'a', 'of']\n",
+         "\n",
+         "with num_skips = 4 and skip_window = 2:\n",
+         "    batch: ['as', 'as', 'as', 'as', 'a', 'a', 'a', 'a']\n",
+         "    labels: ['anarchism', 'originated', 'term', 'a', 'as', 'of', 'originated', 'term']\n"
+        ]
+       }
       ],
       "execution_count": 0
     },
@@ -890,4 +886,4 @@
       ]
     }
   ]
-}
-\ No newline at end of file
+}
diff --git a/tensorflow/examples/udacity/README.md b/tensorflow/examples/udacity/README.md
index a3adc5f155..65b1664882 100644
--- a/tensorflow/examples/udacity/README.md
+++ b/tensorflow/examples/udacity/README.md
@@ -1,6 +1,8 @@
 Assignments for Udacity Deep Learning class with TensorFlow
 ===========================================================
 
+Course information can be found at https://www.udacity.com/course/deep-learning--ud730
+
 Running the Docker container from the Google Cloud repository
 -------------------------------------------------------------
 
@@ -44,8 +46,18 @@ Building a local Docker container
 Running the local container
 ---------------------------
 
+To run a disposable container:
+
     docker run -p 8888:8888 -it --rm $USER/assignments
 
+Note the above command will create an ephemeral container and all data stored in the container will be lost when the container stops.
+
+To avoid losing work between sessions in the container, it is recommended that you mount the `tensorflow/examples/udacity` directory into the container:
+
+    docker run -p 8888:8888 -v </path/to/tensorflow/examples/udacity>:/notebooks -it --rm $USER/assignments
+
+This will allow you to save work and have access to generated files on the host filesystem.
+
 Pushing a Google Cloud release
 ------------------------------
author	Vijay Vasudevan <vrv@google.com>	2016-02-01 16:39:16 -0800
committer	Manjunath Kudlur <keveman@gmail.com>	2016-02-02 08:35:00 -0800
commit	a8d2f0983ecdea8ff2526c717d6a9b2f06f403d8 (patch)
tree	485daa70344be4240e68f89c016231bfdab242d1 /tensorflow/examples/udacity
parent	5ff6d34a05b2eb49f7a79a4d0b78ada9a6842b6c (diff)