diff options
author | 2016-04-18 17:56:51 -0800 | |
---|---|---|
committer | 2016-04-18 19:03:29 -0700 | |
commit | 5c9bc51857bc0c330d3ab976871ee3509647d1e7 (patch) | |
tree | a58def7cbf316c6e091b3b36657f120f1388ec54 /tensorflow/examples/tutorials | |
parent | fc432e37a7ddd408ff09a7b90b1c4cd5af1b134e (diff) |
Merge changes from github.
Change: 120185825
Diffstat (limited to 'tensorflow/examples/tutorials')
-rw-r--r-- | tensorflow/examples/tutorials/mnist/input_data.py | 193 | ||||
-rw-r--r-- | tensorflow/examples/tutorials/mnist/mnist_softmax.py | 4 | ||||
-rw-r--r-- | tensorflow/examples/tutorials/word2vec/word2vec_basic.py | 6 |
3 files changed, 6 insertions, 197 deletions
diff --git a/tensorflow/examples/tutorials/mnist/input_data.py b/tensorflow/examples/tutorials/mnist/input_data.py index 07ed2c4f1c..f1081b1052 100644 --- a/tensorflow/examples/tutorials/mnist/input_data.py +++ b/tensorflow/examples/tutorials/mnist/input_data.py @@ -26,195 +26,4 @@ import numpy from six.moves import urllib from six.moves import xrange # pylint: disable=redefined-builtin import tensorflow as tf - -SOURCE_URL = 'http://yann.lecun.com/exdb/mnist/' - - -def maybe_download(filename, work_directory): - """Download the data from Yann's website, unless it's already here.""" - if not tf.gfile.Exists(work_directory): - tf.gfile.MakeDirs(work_directory) - filepath = os.path.join(work_directory, filename) - if not tf.gfile.Exists(filepath): - with tempfile.NamedTemporaryFile() as tmpfile: - temp_file_name = tmpfile.name - urllib.request.urlretrieve(SOURCE_URL + filename, temp_file_name) - tf.gfile.Copy(temp_file_name, filepath) - with tf.gfile.GFile(filepath) as f: - size = f.Size() - print('Successfully downloaded', filename, size, 'bytes.') - return filepath - - -def _read32(bytestream): - dt = numpy.dtype(numpy.uint32).newbyteorder('>') - return numpy.frombuffer(bytestream.read(4), dtype=dt)[0] - - -def extract_images(filename): - """Extract the images into a 4D uint8 numpy array [index, y, x, depth].""" - print('Extracting', filename) - with tf.gfile.Open(filename, 'rb') as f, gzip.GzipFile(fileobj=f) as bytestream: - magic = _read32(bytestream) - if magic != 2051: - raise ValueError( - 'Invalid magic number %d in MNIST image file: %s' % - (magic, filename)) - num_images = _read32(bytestream) - rows = _read32(bytestream) - cols = _read32(bytestream) - buf = bytestream.read(rows * cols * num_images) - data = numpy.frombuffer(buf, dtype=numpy.uint8) - data = data.reshape(num_images, rows, cols, 1) - return data - - -def dense_to_one_hot(labels_dense, num_classes): - """Convert class labels from scalars to one-hot vectors.""" - num_labels = labels_dense.shape[0] - index_offset = numpy.arange(num_labels) * num_classes - labels_one_hot = numpy.zeros((num_labels, num_classes)) - labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1 - return labels_one_hot - - -def extract_labels(filename, one_hot=False, num_classes=10): - """Extract the labels into a 1D uint8 numpy array [index].""" - print('Extracting', filename) - with tf.gfile.Open(filename, 'rb') as f, gzip.GzipFile(fileobj=f) as bytestream: - magic = _read32(bytestream) - if magic != 2049: - raise ValueError( - 'Invalid magic number %d in MNIST label file: %s' % - (magic, filename)) - num_items = _read32(bytestream) - buf = bytestream.read(num_items) - labels = numpy.frombuffer(buf, dtype=numpy.uint8) - if one_hot: - return dense_to_one_hot(labels, num_classes) - return labels - - -class DataSet(object): - - def __init__(self, images, labels, fake_data=False, one_hot=False, - dtype=tf.float32): - """Construct a DataSet. - - one_hot arg is used only if fake_data is true. `dtype` can be either - `uint8` to leave the input as `[0, 255]`, or `float32` to rescale into - `[0, 1]`. - """ - dtype = tf.as_dtype(dtype).base_dtype - if dtype not in (tf.uint8, tf.float32): - raise TypeError('Invalid image dtype %r, expected uint8 or float32' % - dtype) - if fake_data: - self._num_examples = 10000 - self.one_hot = one_hot - else: - assert images.shape[0] == labels.shape[0], ( - 'images.shape: %s labels.shape: %s' % (images.shape, - labels.shape)) - self._num_examples = images.shape[0] - - # Convert shape from [num examples, rows, columns, depth] - # to [num examples, rows*columns] (assuming depth == 1) - assert images.shape[3] == 1 - images = images.reshape(images.shape[0], - images.shape[1] * images.shape[2]) - if dtype == tf.float32: - # Convert from [0, 255] -> [0.0, 1.0]. - images = images.astype(numpy.float32) - images = numpy.multiply(images, 1.0 / 255.0) - self._images = images - self._labels = labels - self._epochs_completed = 0 - self._index_in_epoch = 0 - - @property - def images(self): - return self._images - - @property - def labels(self): - return self._labels - - @property - def num_examples(self): - return self._num_examples - - @property - def epochs_completed(self): - return self._epochs_completed - - def next_batch(self, batch_size, fake_data=False): - """Return the next `batch_size` examples from this data set.""" - if fake_data: - fake_image = [1] * 784 - if self.one_hot: - fake_label = [1] + [0] * 9 - else: - fake_label = 0 - return [fake_image for _ in xrange(batch_size)], [ - fake_label for _ in xrange(batch_size)] - start = self._index_in_epoch - self._index_in_epoch += batch_size - if self._index_in_epoch > self._num_examples: - # Finished epoch - self._epochs_completed += 1 - # Shuffle the data - perm = numpy.arange(self._num_examples) - numpy.random.shuffle(perm) - self._images = self._images[perm] - self._labels = self._labels[perm] - # Start next epoch - start = 0 - self._index_in_epoch = batch_size - assert batch_size <= self._num_examples - end = self._index_in_epoch - return self._images[start:end], self._labels[start:end] - - -def read_data_sets(train_dir, fake_data=False, one_hot=False, dtype=tf.float32): - class DataSets(object): - pass - data_sets = DataSets() - - if fake_data: - def fake(): - return DataSet([], [], fake_data=True, one_hot=one_hot, dtype=dtype) - data_sets.train = fake() - data_sets.validation = fake() - data_sets.test = fake() - return data_sets - - TRAIN_IMAGES = 'train-images-idx3-ubyte.gz' - TRAIN_LABELS = 'train-labels-idx1-ubyte.gz' - TEST_IMAGES = 't10k-images-idx3-ubyte.gz' - TEST_LABELS = 't10k-labels-idx1-ubyte.gz' - VALIDATION_SIZE = 5000 - - local_file = maybe_download(TRAIN_IMAGES, train_dir) - train_images = extract_images(local_file) - - local_file = maybe_download(TRAIN_LABELS, train_dir) - train_labels = extract_labels(local_file, one_hot=one_hot) - - local_file = maybe_download(TEST_IMAGES, train_dir) - test_images = extract_images(local_file) - - local_file = maybe_download(TEST_LABELS, train_dir) - test_labels = extract_labels(local_file, one_hot=one_hot) - - validation_images = train_images[:VALIDATION_SIZE] - validation_labels = train_labels[:VALIDATION_SIZE] - train_images = train_images[VALIDATION_SIZE:] - train_labels = train_labels[VALIDATION_SIZE:] - - data_sets.train = DataSet(train_images, train_labels, dtype=dtype) - data_sets.validation = DataSet(validation_images, validation_labels, - dtype=dtype) - data_sets.test = DataSet(test_images, test_labels, dtype=dtype) - - return data_sets +from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets diff --git a/tensorflow/examples/tutorials/mnist/mnist_softmax.py b/tensorflow/examples/tutorials/mnist/mnist_softmax.py index 5aaca6bbac..81bce1a01b 100644 --- a/tensorflow/examples/tutorials/mnist/mnist_softmax.py +++ b/tensorflow/examples/tutorials/mnist/mnist_softmax.py @@ -43,8 +43,8 @@ y = tf.nn.softmax(tf.matmul(x, W) + b) # Define loss and optimizer y_ = tf.placeholder(tf.float32, [None, 10]) -cross_entropy = -tf.reduce_sum(y_ * tf.log(y)) -train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy) +cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1])) +train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) # Train tf.initialize_all_variables().run() diff --git a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py index 6cbe6a0f7e..83bb5dd165 100644 --- a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py +++ b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py @@ -81,7 +81,7 @@ def build_dataset(words): data, count, dictionary, reverse_dictionary = build_dataset(words) del words # Hint to reduce memory. print('Most common words (+UNK)', count[:5]) -print('Sample data', data[:10]) +print('Sample data', data[:10], [reverse_dictionary[i] for i in data[:10]]) data_index = 0 @@ -113,8 +113,8 @@ def generate_batch(batch_size, num_skips, skip_window): batch, labels = generate_batch(batch_size=8, num_skips=2, skip_window=1) for i in range(8): - print(batch[i], '->', labels[i, 0]) - print(reverse_dictionary[batch[i]], '->', reverse_dictionary[labels[i, 0]]) + print(batch[i], reverse_dictionary[batch[i]], + '->', labels[i, 0], reverse_dictionary[labels[i, 0]]) # Step 4: Build and train a skip-gram model. |