aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/examples/tutorials
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/examples/tutorials')
-rw-r--r--tensorflow/examples/tutorials/estimators/BUILD12
-rw-r--r--tensorflow/examples/tutorials/estimators/__init__.py0
-rw-r--r--tensorflow/examples/tutorials/input_fn/__init__.py0
-rw-r--r--tensorflow/examples/tutorials/layers/BUILD12
-rw-r--r--tensorflow/examples/tutorials/layers/__init__.py0
-rw-r--r--tensorflow/examples/tutorials/mnist/BUILD15
-rw-r--r--tensorflow/examples/tutorials/mnist/input_data.py2
-rw-r--r--tensorflow/examples/tutorials/mnist/mnist_softmax.py16
-rw-r--r--tensorflow/examples/tutorials/monitors/BUILD12
-rw-r--r--tensorflow/examples/tutorials/monitors/__init__.py0
-rw-r--r--tensorflow/examples/tutorials/monitors/iris_monitors.py6
-rw-r--r--tensorflow/examples/tutorials/word2vec/BUILD14
-rw-r--r--tensorflow/examples/tutorials/word2vec/word2vec_basic.py161
13 files changed, 140 insertions, 110 deletions
diff --git a/tensorflow/examples/tutorials/estimators/BUILD b/tensorflow/examples/tutorials/estimators/BUILD
index ecbc1a431d..bab609f208 100644
--- a/tensorflow/examples/tutorials/estimators/BUILD
+++ b/tensorflow/examples/tutorials/estimators/BUILD
@@ -20,15 +20,3 @@ py_binary(
"//third_party/py/numpy",
],
)
-
-filegroup(
- name = "all_files",
- srcs = glob(
- ["**/*"],
- exclude = [
- "**/METADATA",
- "**/OWNERS",
- ],
- ),
- visibility = ["//tensorflow:__subpackages__"],
-)
diff --git a/tensorflow/examples/tutorials/estimators/__init__.py b/tensorflow/examples/tutorials/estimators/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/tensorflow/examples/tutorials/estimators/__init__.py
diff --git a/tensorflow/examples/tutorials/input_fn/__init__.py b/tensorflow/examples/tutorials/input_fn/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/tensorflow/examples/tutorials/input_fn/__init__.py
diff --git a/tensorflow/examples/tutorials/layers/BUILD b/tensorflow/examples/tutorials/layers/BUILD
index f8a29c79c6..aad78b1840 100644
--- a/tensorflow/examples/tutorials/layers/BUILD
+++ b/tensorflow/examples/tutorials/layers/BUILD
@@ -19,15 +19,3 @@ py_binary(
"//third_party/py/numpy",
],
)
-
-filegroup(
- name = "all_files",
- srcs = glob(
- ["**/*"],
- exclude = [
- "**/METADATA",
- "**/OWNERS",
- ],
- ),
- visibility = ["//tensorflow:__subpackages__"],
-)
diff --git a/tensorflow/examples/tutorials/layers/__init__.py b/tensorflow/examples/tutorials/layers/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/tensorflow/examples/tutorials/layers/__init__.py
diff --git a/tensorflow/examples/tutorials/mnist/BUILD b/tensorflow/examples/tutorials/mnist/BUILD
index 6d4e67063d..d4070fdd1e 100644
--- a/tensorflow/examples/tutorials/mnist/BUILD
+++ b/tensorflow/examples/tutorials/mnist/BUILD
@@ -51,6 +51,7 @@ py_binary(
"fully_connected_feed.py",
],
srcs_version = "PY2AND3",
+ tags = ["optonly"],
deps = [
":input_data",
":mnist",
@@ -96,7 +97,7 @@ py_binary(
py_test(
name = "fully_connected_feed_test",
- size = "small",
+ size = "medium",
srcs = [
"fully_connected_feed.py",
],
@@ -132,15 +133,3 @@ py_test(
"//tensorflow:tensorflow_py",
],
)
-
-filegroup(
- name = "all_files",
- srcs = glob(
- ["**/*"],
- exclude = [
- "**/METADATA",
- "**/OWNERS",
- ],
- ),
- visibility = ["//tensorflow:__subpackages__"],
-)
diff --git a/tensorflow/examples/tutorials/mnist/input_data.py b/tensorflow/examples/tutorials/mnist/input_data.py
index f1a7e1c4af..fa148ae3e6 100644
--- a/tensorflow/examples/tutorials/mnist/input_data.py
+++ b/tensorflow/examples/tutorials/mnist/input_data.py
@@ -18,6 +18,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
+# pylint: disable=unused-import
import gzip
import os
import tempfile
@@ -27,3 +28,4 @@ from six.moves import urllib
from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow as tf
from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets
+# pylint: enable=unused-import
diff --git a/tensorflow/examples/tutorials/mnist/mnist_softmax.py b/tensorflow/examples/tutorials/mnist/mnist_softmax.py
index fb3ac94203..47dd6a1947 100644
--- a/tensorflow/examples/tutorials/mnist/mnist_softmax.py
+++ b/tensorflow/examples/tutorials/mnist/mnist_softmax.py
@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
-
"""A very simple MNIST classifier.
See extensive documentation at
@@ -67,12 +66,19 @@ def main(_):
# Test trained model
correct_prediction = tf.equal(tf.argmax(y, 1), y_)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
- print(sess.run(accuracy, feed_dict={x: mnist.test.images,
- y_: mnist.test.labels}))
+ print(sess.run(
+ accuracy, feed_dict={
+ x: mnist.test.images,
+ y_: mnist.test.labels
+ }))
+
if __name__ == '__main__':
parser = argparse.ArgumentParser()
- parser.add_argument('--data_dir', type=str, default='/tmp/tensorflow/mnist/input_data',
- help='Directory for storing input data')
+ parser.add_argument(
+ '--data_dir',
+ type=str,
+ default='/tmp/tensorflow/mnist/input_data',
+ help='Directory for storing input data')
FLAGS, unparsed = parser.parse_known_args()
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
diff --git a/tensorflow/examples/tutorials/monitors/BUILD b/tensorflow/examples/tutorials/monitors/BUILD
index 4220e8144d..1c49e3fe53 100644
--- a/tensorflow/examples/tutorials/monitors/BUILD
+++ b/tensorflow/examples/tutorials/monitors/BUILD
@@ -23,15 +23,3 @@ py_binary(
"//third_party/py/numpy",
],
)
-
-filegroup(
- name = "all_files",
- srcs = glob(
- ["**/*"],
- exclude = [
- "**/METADATA",
- "**/OWNERS",
- ],
- ),
- visibility = ["//tensorflow:__subpackages__"],
-)
diff --git a/tensorflow/examples/tutorials/monitors/__init__.py b/tensorflow/examples/tutorials/monitors/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/tensorflow/examples/tutorials/monitors/__init__.py
diff --git a/tensorflow/examples/tutorials/monitors/iris_monitors.py b/tensorflow/examples/tutorials/monitors/iris_monitors.py
index 850d105f7b..a2b7fe6023 100644
--- a/tensorflow/examples/tutorials/monitors/iris_monitors.py
+++ b/tensorflow/examples/tutorials/monitors/iris_monitors.py
@@ -32,9 +32,9 @@ IRIS_TEST = os.path.join(os.path.dirname(__file__), "iris_test.csv")
def main(unused_argv):
# Load datasets.
training_set = tf.contrib.learn.datasets.base.load_csv_with_header(
- filename=IRIS_TRAINING, target_dtype=np.int, features_dtype=np.float)
+ filename=IRIS_TRAINING, target_dtype=np.int, features_dtype=np.float32)
test_set = tf.contrib.learn.datasets.base.load_csv_with_header(
- filename=IRIS_TEST, target_dtype=np.int, features_dtype=np.float)
+ filename=IRIS_TEST, target_dtype=np.int, features_dtype=np.float32)
validation_metrics = {
"accuracy":
@@ -83,7 +83,7 @@ def main(unused_argv):
# Classify two new flower samples.
new_samples = np.array(
- [[6.4, 3.2, 4.5, 1.5], [5.8, 3.1, 5.0, 1.7]], dtype=float)
+ [[6.4, 3.2, 4.5, 1.5], [5.8, 3.1, 5.0, 1.7]], dtype=np.float32)
y = list(classifier.predict(new_samples))
print("Predictions: {}".format(str(y)))
diff --git a/tensorflow/examples/tutorials/word2vec/BUILD b/tensorflow/examples/tutorials/word2vec/BUILD
index 42d6355b4f..2e19c038bd 100644
--- a/tensorflow/examples/tutorials/word2vec/BUILD
+++ b/tensorflow/examples/tutorials/word2vec/BUILD
@@ -13,19 +13,11 @@ py_binary(
"word2vec_basic.py",
],
srcs_version = "PY2AND3",
+ tags = [
+ "no-internal-py3",
+ ],
deps = [
"//tensorflow:tensorflow_py",
"//third_party/py/numpy",
],
)
-
-filegroup(
- name = "all_files",
- srcs = glob(
- ["**/*"],
- exclude = [
- "**/METADATA",
- "**/OWNERS",
- ],
- ),
-)
diff --git a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py
index 142e45a2e8..b09ee99768 100644
--- a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py
+++ b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py
@@ -21,6 +21,8 @@ from __future__ import print_function
import collections
import math
import os
+import sys
+import argparse
import random
from tempfile import gettempdir
import zipfile
@@ -30,6 +32,24 @@ from six.moves import urllib
from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow as tf
+from tensorflow.contrib.tensorboard.plugins import projector
+
+# Give a folder path as an argument with '--log_dir' to save
+# TensorBoard summaries. Default is a log folder in current directory.
+current_path = os.path.dirname(os.path.realpath(sys.argv[0]))
+
+parser = argparse.ArgumentParser()
+parser.add_argument(
+ '--log_dir',
+ type=str,
+ default=os.path.join(current_path, 'log'),
+ help='The log directory for TensorBoard summaries.')
+FLAGS, unparsed = parser.parse_known_args()
+
+# Create the directory for TensorBoard variables if there is not.
+if not os.path.exists(FLAGS.log_dir):
+ os.makedirs(FLAGS.log_dir)
+
# Step 1: Download the data.
url = 'http://mattmahoney.net/dc/'
@@ -61,6 +81,7 @@ def read_data(filename):
data = tf.compat.as_str(f.read(f.namelist()[0])).split()
return data
+
vocabulary = read_data(filename)
print('Data size', len(vocabulary))
@@ -86,20 +107,22 @@ def build_dataset(words, n_words):
reversed_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
return data, count, dictionary, reversed_dictionary
+
# Filling 4 global variables:
# data - list of codes (integers from 0 to vocabulary_size-1).
# This is the original text but words are replaced by their codes
# count - map of words(strings) to count of occurrences
# dictionary - map of words(strings) to their codes(integers)
# reverse_dictionary - maps codes(integers) to words(strings)
-data, count, dictionary, reverse_dictionary = build_dataset(vocabulary,
- vocabulary_size)
+data, count, dictionary, reverse_dictionary = build_dataset(
+ vocabulary, vocabulary_size)
del vocabulary # Hint to reduce memory.
print('Most common words (+UNK)', count[:5])
print('Sample data', data[:10], [reverse_dictionary[i] for i in data[:10]])
data_index = 0
+
# Step 3: Function to generate a training batch for the skip-gram model.
def generate_batch(batch_size, num_skips, skip_window):
global data_index
@@ -108,7 +131,7 @@ def generate_batch(batch_size, num_skips, skip_window):
batch = np.ndarray(shape=(batch_size), dtype=np.int32)
labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
span = 2 * skip_window + 1 # [ skip_window target skip_window ]
- buffer = collections.deque(maxlen=span)
+ buffer = collections.deque(maxlen=span) # pylint: disable=redefined-builtin
if data_index + span > len(data):
data_index = 0
buffer.extend(data[data_index:data_index + span])
@@ -120,7 +143,7 @@ def generate_batch(batch_size, num_skips, skip_window):
batch[i * num_skips + j] = buffer[skip_window]
labels[i * num_skips + j, 0] = buffer[context_word]
if data_index == len(data):
- buffer[:] = data[:span]
+ buffer.extend(data[0:span])
data_index = span
else:
buffer.append(data[data_index])
@@ -129,96 +152,130 @@ def generate_batch(batch_size, num_skips, skip_window):
data_index = (data_index + len(data) - span) % len(data)
return batch, labels
+
batch, labels = generate_batch(batch_size=8, num_skips=2, skip_window=1)
for i in range(8):
- print(batch[i], reverse_dictionary[batch[i]],
- '->', labels[i, 0], reverse_dictionary[labels[i, 0]])
+ print(batch[i], reverse_dictionary[batch[i]], '->', labels[i, 0],
+ reverse_dictionary[labels[i, 0]])
# Step 4: Build and train a skip-gram model.
batch_size = 128
embedding_size = 128 # Dimension of the embedding vector.
-skip_window = 1 # How many words to consider left and right.
-num_skips = 2 # How many times to reuse an input to generate a label.
-num_sampled = 64 # Number of negative examples to sample.
+skip_window = 1 # How many words to consider left and right.
+num_skips = 2 # How many times to reuse an input to generate a label.
+num_sampled = 64 # Number of negative examples to sample.
# We pick a random validation set to sample nearest neighbors. Here we limit the
# validation samples to the words that have a low numeric ID, which by
# construction are also the most frequent. These 3 variables are used only for
# displaying model accuracy, they don't affect calculation.
-valid_size = 16 # Random set of words to evaluate similarity on.
+valid_size = 16 # Random set of words to evaluate similarity on.
valid_window = 100 # Only pick dev samples in the head of the distribution.
valid_examples = np.random.choice(valid_window, valid_size, replace=False)
-
graph = tf.Graph()
with graph.as_default():
# Input data.
- train_inputs = tf.placeholder(tf.int32, shape=[batch_size])
- train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1])
- valid_dataset = tf.constant(valid_examples, dtype=tf.int32)
+ with tf.name_scope('inputs'):
+ train_inputs = tf.placeholder(tf.int32, shape=[batch_size])
+ train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1])
+ valid_dataset = tf.constant(valid_examples, dtype=tf.int32)
# Ops and variables pinned to the CPU because of missing GPU implementation
with tf.device('/cpu:0'):
# Look up embeddings for inputs.
- embeddings = tf.Variable(
- tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0))
- embed = tf.nn.embedding_lookup(embeddings, train_inputs)
+ with tf.name_scope('embeddings'):
+ embeddings = tf.Variable(
+ tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0))
+ embed = tf.nn.embedding_lookup(embeddings, train_inputs)
# Construct the variables for the NCE loss
- nce_weights = tf.Variable(
- tf.truncated_normal([vocabulary_size, embedding_size],
- stddev=1.0 / math.sqrt(embedding_size)))
- nce_biases = tf.Variable(tf.zeros([vocabulary_size]))
+ with tf.name_scope('weights'):
+ nce_weights = tf.Variable(
+ tf.truncated_normal(
+ [vocabulary_size, embedding_size],
+ stddev=1.0 / math.sqrt(embedding_size)))
+ with tf.name_scope('biases'):
+ nce_biases = tf.Variable(tf.zeros([vocabulary_size]))
# Compute the average NCE loss for the batch.
# tf.nce_loss automatically draws a new sample of the negative labels each
# time we evaluate the loss.
# Explanation of the meaning of NCE loss:
# http://mccormickml.com/2016/04/19/word2vec-tutorial-the-skip-gram-model/
- loss = tf.reduce_mean(
- tf.nn.nce_loss(weights=nce_weights,
- biases=nce_biases,
- labels=train_labels,
- inputs=embed,
- num_sampled=num_sampled,
- num_classes=vocabulary_size))
+ with tf.name_scope('loss'):
+ loss = tf.reduce_mean(
+ tf.nn.nce_loss(
+ weights=nce_weights,
+ biases=nce_biases,
+ labels=train_labels,
+ inputs=embed,
+ num_sampled=num_sampled,
+ num_classes=vocabulary_size))
+
+ # Add the loss value as a scalar to summary.
+ tf.summary.scalar('loss', loss)
# Construct the SGD optimizer using a learning rate of 1.0.
- optimizer = tf.train.GradientDescentOptimizer(1.0).minimize(loss)
+ with tf.name_scope('optimizer'):
+ optimizer = tf.train.GradientDescentOptimizer(1.0).minimize(loss)
# Compute the cosine similarity between minibatch examples and all embeddings.
- norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True))
+ norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keepdims=True))
normalized_embeddings = embeddings / norm
- valid_embeddings = tf.nn.embedding_lookup(
- normalized_embeddings, valid_dataset)
+ valid_embeddings = tf.nn.embedding_lookup(normalized_embeddings,
+ valid_dataset)
similarity = tf.matmul(
valid_embeddings, normalized_embeddings, transpose_b=True)
+ # Merge all summaries.
+ merged = tf.summary.merge_all()
+
# Add variable initializer.
init = tf.global_variables_initializer()
+ # Create a saver.
+ saver = tf.train.Saver()
+
# Step 5: Begin training.
num_steps = 100001
with tf.Session(graph=graph) as session:
+ # Open a writer to write summaries.
+ writer = tf.summary.FileWriter(FLAGS.log_dir, session.graph)
+
# We must initialize all variables before we use them.
init.run()
print('Initialized')
average_loss = 0
for step in xrange(num_steps):
- batch_inputs, batch_labels = generate_batch(
- batch_size, num_skips, skip_window)
+ batch_inputs, batch_labels = generate_batch(batch_size, num_skips,
+ skip_window)
feed_dict = {train_inputs: batch_inputs, train_labels: batch_labels}
+ # Define metadata variable.
+ run_metadata = tf.RunMetadata()
+
# We perform one update step by evaluating the optimizer op (including it
# in the list of returned values for session.run()
- _, loss_val = session.run([optimizer, loss], feed_dict=feed_dict)
+ # Also, evaluate the merged op to get all summaries from the returned "summary" variable.
+ # Feed metadata variable to session for visualizing the graph in TensorBoard.
+ _, summary, loss_val = session.run(
+ [optimizer, merged, loss],
+ feed_dict=feed_dict,
+ run_metadata=run_metadata)
average_loss += loss_val
+ # Add returned summaries to writer in each step.
+ writer.add_summary(summary, step)
+ # Add metadata to visualize the graph for the last run.
+ if step == (num_steps - 1):
+ writer.add_run_metadata(run_metadata, 'step%d' % step)
+
if step % 2000 == 0:
if step > 0:
average_loss /= 2000
@@ -240,6 +297,23 @@ with tf.Session(graph=graph) as session:
print(log_str)
final_embeddings = normalized_embeddings.eval()
+ # Write corresponding labels for the embeddings.
+ with open(FLAGS.log_dir + '/metadata.tsv', 'w') as f:
+ for i in xrange(vocabulary_size):
+ f.write(reverse_dictionary[i] + '\n')
+
+ # Save the model for checkpoints.
+ saver.save(session, os.path.join(FLAGS.log_dir, 'model.ckpt'))
+
+ # Create a configuration for visualizing embeddings with the labels in TensorBoard.
+ config = projector.ProjectorConfig()
+ embedding_conf = config.embeddings.add()
+ embedding_conf.tensor_name = embeddings.name
+ embedding_conf.metadata_path = os.path.join(FLAGS.log_dir, 'metadata.tsv')
+ projector.visualize_embeddings(writer, config)
+
+writer.close()
+
# Step 6: Visualize the embeddings.
@@ -251,21 +325,24 @@ def plot_with_labels(low_dim_embs, labels, filename):
for i, label in enumerate(labels):
x, y = low_dim_embs[i, :]
plt.scatter(x, y)
- plt.annotate(label,
- xy=(x, y),
- xytext=(5, 2),
- textcoords='offset points',
- ha='right',
- va='bottom')
+ plt.annotate(
+ label,
+ xy=(x, y),
+ xytext=(5, 2),
+ textcoords='offset points',
+ ha='right',
+ va='bottom')
plt.savefig(filename)
+
try:
# pylint: disable=g-import-not-at-top
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
- tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000, method='exact')
+ tsne = TSNE(
+ perplexity=30, n_components=2, init='pca', n_iter=5000, method='exact')
plot_only = 500
low_dim_embs = tsne.fit_transform(final_embeddings[:plot_only, :])
labels = [reverse_dictionary[i] for i in xrange(plot_only)]