Merge changes from github.

Change: 120185825
author: Illia Polosukhin <ilblackdragon@gmail.com> 2016-04-18 17:56:51 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2016-04-18 19:03:29 -0700
commit: 5c9bc51857bc0c330d3ab976871ee3509647d1e7 (patch)
tree: a58def7cbf316c6e091b3b36657f120f1388ec54 /tensorflow/models
parent: fc432e37a7ddd408ff09a7b90b1c4cd5af1b134e (diff)
6 files changed, 18 insertions, 19 deletions
diff --git a/tensorflow/models/embedding/word2vec.py b/tensorflow/models/embedding/word2vec.py
index cf30548e14..9cb15d3f41 100644
--- a/tensorflow/models/embedding/word2vec.py
+++ b/tensorflow/models/embedding/word2vec.py
@@ -82,15 +82,15 @@ flags.DEFINE_boolean(
     "interactive", False,
     "If true, enters an IPython interactive session to play with the trained "
     "model. E.g., try model.analogy('france', 'paris', 'russia') and "
-    "model.nearby(['proton', 'elephant', 'maxwell']")
+    "model.nearby(['proton', 'elephant', 'maxwell'])")
 flags.DEFINE_integer("statistics_interval", 5,
                      "Print statistics every n seconds.")
 flags.DEFINE_integer("summary_interval", 5,
                      "Save training summary to file every n seconds (rounded "
-                     "up to statistics interval.")
+                     "up to statistics interval).")
 flags.DEFINE_integer("checkpoint_interval", 600,
                      "Checkpoint the model (i.e. save the parameters) every n "
-                     "seconds (rounded up to statistics interval.")
+                     "seconds (rounded up to statistics interval).")
 
 FLAGS = flags.FLAGS
 
@@ -420,7 +420,7 @@ class Word2Vec(object):
         last_summary_time = now
       if now - last_checkpoint_time > opts.checkpoint_interval:
         self.saver.save(self._session,
-                        opts.save_path + "model",
+                        os.path.join(opts.save_path, "model.ckpt"),
                         global_step=step.astype(int))
         last_checkpoint_time = now
       if epoch != initial_epoch:
diff --git a/tensorflow/models/embedding/word2vec_optimized.py b/tensorflow/models/embedding/word2vec_optimized.py
index 3b39c7da20..3ce795d5c8 100644
--- a/tensorflow/models/embedding/word2vec_optimized.py
+++ b/tensorflow/models/embedding/word2vec_optimized.py
@@ -79,7 +79,7 @@ flags.DEFINE_boolean(
     "interactive", False,
     "If true, enters an IPython interactive session to play with the trained "
     "model. E.g., try model.analogy('france', 'paris', 'russia') and "
-    "model.nearby(['proton', 'elephant', 'maxwell']")
+    "model.nearby(['proton', 'elephant', 'maxwell'])")
 
 FLAGS = flags.FLAGS
 
diff --git a/tensorflow/models/embedding/word2vec_optimized_test.py b/tensorflow/models/embedding/word2vec_optimized_test.py
index e0aa770a94..f8f9424bfc 100644
--- a/tensorflow/models/embedding/word2vec_optimized_test.py
+++ b/tensorflow/models/embedding/word2vec_optimized_test.py
@@ -23,7 +23,7 @@ import os
 
 import tensorflow as tf
 
-from tensorflow.models.embedding import word2vec_optimized as word2vec_optimized
+from tensorflow.models.embedding import word2vec_optimized
 
 flags = tf.app.flags
 
diff --git a/tensorflow/models/embedding/word2vec_test.py b/tensorflow/models/embedding/word2vec_test.py
index 8378e1f5ad..1eb9182a65 100644
--- a/tensorflow/models/embedding/word2vec_test.py
+++ b/tensorflow/models/embedding/word2vec_test.py
@@ -23,7 +23,7 @@ import os
 
 import tensorflow as tf
 
-from tensorflow.models.embedding import word2vec as word2vec
+from tensorflow.models.embedding import word2vec
 
 flags = tf.app.flags
 
diff --git a/tensorflow/models/image/cifar10/cifar10.py b/tensorflow/models/image/cifar10/cifar10.py
index f2fd3e19fe..503ea09296 100644
--- a/tensorflow/models/image/cifar10/cifar10.py
+++ b/tensorflow/models/image/cifar10/cifar10.py
@@ -221,11 +221,8 @@ def inference(images):
   # local3
   with tf.variable_scope('local3') as scope:
     # Move everything into depth so we can perform a single matrix multiply.
-    dim = 1
-    for d in pool2.get_shape()[1:].as_list():
-      dim *= d
-    reshape = tf.reshape(pool2, [FLAGS.batch_size, dim])
-
+    reshape = tf.reshape(pool2, [FLAGS.batch_size, -1])
+    dim = reshape.get_shape()[1].value
     weights = _variable_with_weight_decay('weights', shape=[dim, 384],
                                           stddev=0.04, wd=0.004)
     biases = _variable_on_cpu('biases', [384], tf.constant_initializer(0.1))
diff --git a/tensorflow/models/rnn/translate/data_utils.py b/tensorflow/models/rnn/translate/data_utils.py
index 001182bbd5..10c908cdc8 100644
--- a/tensorflow/models/rnn/translate/data_utils.py
+++ b/tensorflow/models/rnn/translate/data_utils.py
@@ -242,13 +242,15 @@ def data_to_token_ids(data_path, target_path, vocabulary_path,
           tokens_file.write(" ".join([str(tok) for tok in token_ids]) + "\n")
 
 
-def prepare_wmt_data(data_dir, en_vocabulary_size, fr_vocabulary_size):
+def prepare_wmt_data(data_dir, en_vocabulary_size, fr_vocabulary_size, tokenizer=None):
   """Get WMT data into data_dir, create vocabularies and tokenize data.
 
   Args:
     data_dir: directory in which the data sets will be stored.
     en_vocabulary_size: size of the English vocabulary to create and use.
     fr_vocabulary_size: size of the French vocabulary to create and use.
+    tokenizer: a function to use to tokenize each data sentence;
+      if None, basic_tokenizer will be used.
 
   Returns:
     A tuple of 6 elements:
@@ -266,20 +268,20 @@ def prepare_wmt_data(data_dir, en_vocabulary_size, fr_vocabulary_size):
   # Create vocabularies of the appropriate sizes.
   fr_vocab_path = os.path.join(data_dir, "vocab%d.fr" % fr_vocabulary_size)
   en_vocab_path = os.path.join(data_dir, "vocab%d.en" % en_vocabulary_size)
-  create_vocabulary(fr_vocab_path, train_path + ".fr", fr_vocabulary_size)
-  create_vocabulary(en_vocab_path, train_path + ".en", en_vocabulary_size)
+  create_vocabulary(fr_vocab_path, train_path + ".fr", fr_vocabulary_size, tokenizer)
+  create_vocabulary(en_vocab_path, train_path + ".en", en_vocabulary_size, tokenizer)
 
   # Create token ids for the training data.
   fr_train_ids_path = train_path + (".ids%d.fr" % fr_vocabulary_size)
   en_train_ids_path = train_path + (".ids%d.en" % en_vocabulary_size)
-  data_to_token_ids(train_path + ".fr", fr_train_ids_path, fr_vocab_path)
-  data_to_token_ids(train_path + ".en", en_train_ids_path, en_vocab_path)
+  data_to_token_ids(train_path + ".fr", fr_train_ids_path, fr_vocab_path, tokenizer)
+  data_to_token_ids(train_path + ".en", en_train_ids_path, en_vocab_path, tokenizer)
 
   # Create token ids for the development data.
   fr_dev_ids_path = dev_path + (".ids%d.fr" % fr_vocabulary_size)
   en_dev_ids_path = dev_path + (".ids%d.en" % en_vocabulary_size)
-  data_to_token_ids(dev_path + ".fr", fr_dev_ids_path, fr_vocab_path)
-  data_to_token_ids(dev_path + ".en", en_dev_ids_path, en_vocab_path)
+  data_to_token_ids(dev_path + ".fr", fr_dev_ids_path, fr_vocab_path, tokenizer)
+  data_to_token_ids(dev_path + ".en", en_dev_ids_path, en_vocab_path, tokenizer)
 
   return (en_train_ids_path, fr_train_ids_path,
           en_dev_ids_path, fr_dev_ids_path,
author	Illia Polosukhin <ilblackdragon@gmail.com>	2016-04-18 17:56:51 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2016-04-18 19:03:29 -0700
commit	5c9bc51857bc0c330d3ab976871ee3509647d1e7 (patch)
tree	a58def7cbf316c6e091b3b36657f120f1388ec54 /tensorflow/models
parent	fc432e37a7ddd408ff09a7b90b1c4cd5af1b134e (diff)