diff options
author | 2016-04-18 17:56:51 -0800 | |
---|---|---|
committer | 2016-04-18 19:03:29 -0700 | |
commit | 5c9bc51857bc0c330d3ab976871ee3509647d1e7 (patch) | |
tree | a58def7cbf316c6e091b3b36657f120f1388ec54 /tensorflow/models | |
parent | fc432e37a7ddd408ff09a7b90b1c4cd5af1b134e (diff) |
Merge changes from github.
Change: 120185825
Diffstat (limited to 'tensorflow/models')
-rw-r--r-- | tensorflow/models/embedding/word2vec.py | 8 | ||||
-rw-r--r-- | tensorflow/models/embedding/word2vec_optimized.py | 2 | ||||
-rw-r--r-- | tensorflow/models/embedding/word2vec_optimized_test.py | 2 | ||||
-rw-r--r-- | tensorflow/models/embedding/word2vec_test.py | 2 | ||||
-rw-r--r-- | tensorflow/models/image/cifar10/cifar10.py | 7 | ||||
-rw-r--r-- | tensorflow/models/rnn/translate/data_utils.py | 16 |
6 files changed, 18 insertions, 19 deletions
diff --git a/tensorflow/models/embedding/word2vec.py b/tensorflow/models/embedding/word2vec.py index cf30548e14..9cb15d3f41 100644 --- a/tensorflow/models/embedding/word2vec.py +++ b/tensorflow/models/embedding/word2vec.py @@ -82,15 +82,15 @@ flags.DEFINE_boolean( "interactive", False, "If true, enters an IPython interactive session to play with the trained " "model. E.g., try model.analogy('france', 'paris', 'russia') and " - "model.nearby(['proton', 'elephant', 'maxwell']") + "model.nearby(['proton', 'elephant', 'maxwell'])") flags.DEFINE_integer("statistics_interval", 5, "Print statistics every n seconds.") flags.DEFINE_integer("summary_interval", 5, "Save training summary to file every n seconds (rounded " - "up to statistics interval.") + "up to statistics interval).") flags.DEFINE_integer("checkpoint_interval", 600, "Checkpoint the model (i.e. save the parameters) every n " - "seconds (rounded up to statistics interval.") + "seconds (rounded up to statistics interval).") FLAGS = flags.FLAGS @@ -420,7 +420,7 @@ class Word2Vec(object): last_summary_time = now if now - last_checkpoint_time > opts.checkpoint_interval: self.saver.save(self._session, - opts.save_path + "model", + os.path.join(opts.save_path, "model.ckpt"), global_step=step.astype(int)) last_checkpoint_time = now if epoch != initial_epoch: diff --git a/tensorflow/models/embedding/word2vec_optimized.py b/tensorflow/models/embedding/word2vec_optimized.py index 3b39c7da20..3ce795d5c8 100644 --- a/tensorflow/models/embedding/word2vec_optimized.py +++ b/tensorflow/models/embedding/word2vec_optimized.py @@ -79,7 +79,7 @@ flags.DEFINE_boolean( "interactive", False, "If true, enters an IPython interactive session to play with the trained " "model. E.g., try model.analogy('france', 'paris', 'russia') and " - "model.nearby(['proton', 'elephant', 'maxwell']") + "model.nearby(['proton', 'elephant', 'maxwell'])") FLAGS = flags.FLAGS diff --git a/tensorflow/models/embedding/word2vec_optimized_test.py b/tensorflow/models/embedding/word2vec_optimized_test.py index e0aa770a94..f8f9424bfc 100644 --- a/tensorflow/models/embedding/word2vec_optimized_test.py +++ b/tensorflow/models/embedding/word2vec_optimized_test.py @@ -23,7 +23,7 @@ import os import tensorflow as tf -from tensorflow.models.embedding import word2vec_optimized as word2vec_optimized +from tensorflow.models.embedding import word2vec_optimized flags = tf.app.flags diff --git a/tensorflow/models/embedding/word2vec_test.py b/tensorflow/models/embedding/word2vec_test.py index 8378e1f5ad..1eb9182a65 100644 --- a/tensorflow/models/embedding/word2vec_test.py +++ b/tensorflow/models/embedding/word2vec_test.py @@ -23,7 +23,7 @@ import os import tensorflow as tf -from tensorflow.models.embedding import word2vec as word2vec +from tensorflow.models.embedding import word2vec flags = tf.app.flags diff --git a/tensorflow/models/image/cifar10/cifar10.py b/tensorflow/models/image/cifar10/cifar10.py index f2fd3e19fe..503ea09296 100644 --- a/tensorflow/models/image/cifar10/cifar10.py +++ b/tensorflow/models/image/cifar10/cifar10.py @@ -221,11 +221,8 @@ def inference(images): # local3 with tf.variable_scope('local3') as scope: # Move everything into depth so we can perform a single matrix multiply. - dim = 1 - for d in pool2.get_shape()[1:].as_list(): - dim *= d - reshape = tf.reshape(pool2, [FLAGS.batch_size, dim]) - + reshape = tf.reshape(pool2, [FLAGS.batch_size, -1]) + dim = reshape.get_shape()[1].value weights = _variable_with_weight_decay('weights', shape=[dim, 384], stddev=0.04, wd=0.004) biases = _variable_on_cpu('biases', [384], tf.constant_initializer(0.1)) diff --git a/tensorflow/models/rnn/translate/data_utils.py b/tensorflow/models/rnn/translate/data_utils.py index 001182bbd5..10c908cdc8 100644 --- a/tensorflow/models/rnn/translate/data_utils.py +++ b/tensorflow/models/rnn/translate/data_utils.py @@ -242,13 +242,15 @@ def data_to_token_ids(data_path, target_path, vocabulary_path, tokens_file.write(" ".join([str(tok) for tok in token_ids]) + "\n") -def prepare_wmt_data(data_dir, en_vocabulary_size, fr_vocabulary_size): +def prepare_wmt_data(data_dir, en_vocabulary_size, fr_vocabulary_size, tokenizer=None): """Get WMT data into data_dir, create vocabularies and tokenize data. Args: data_dir: directory in which the data sets will be stored. en_vocabulary_size: size of the English vocabulary to create and use. fr_vocabulary_size: size of the French vocabulary to create and use. + tokenizer: a function to use to tokenize each data sentence; + if None, basic_tokenizer will be used. Returns: A tuple of 6 elements: @@ -266,20 +268,20 @@ def prepare_wmt_data(data_dir, en_vocabulary_size, fr_vocabulary_size): # Create vocabularies of the appropriate sizes. fr_vocab_path = os.path.join(data_dir, "vocab%d.fr" % fr_vocabulary_size) en_vocab_path = os.path.join(data_dir, "vocab%d.en" % en_vocabulary_size) - create_vocabulary(fr_vocab_path, train_path + ".fr", fr_vocabulary_size) - create_vocabulary(en_vocab_path, train_path + ".en", en_vocabulary_size) + create_vocabulary(fr_vocab_path, train_path + ".fr", fr_vocabulary_size, tokenizer) + create_vocabulary(en_vocab_path, train_path + ".en", en_vocabulary_size, tokenizer) # Create token ids for the training data. fr_train_ids_path = train_path + (".ids%d.fr" % fr_vocabulary_size) en_train_ids_path = train_path + (".ids%d.en" % en_vocabulary_size) - data_to_token_ids(train_path + ".fr", fr_train_ids_path, fr_vocab_path) - data_to_token_ids(train_path + ".en", en_train_ids_path, en_vocab_path) + data_to_token_ids(train_path + ".fr", fr_train_ids_path, fr_vocab_path, tokenizer) + data_to_token_ids(train_path + ".en", en_train_ids_path, en_vocab_path, tokenizer) # Create token ids for the development data. fr_dev_ids_path = dev_path + (".ids%d.fr" % fr_vocabulary_size) en_dev_ids_path = dev_path + (".ids%d.en" % en_vocabulary_size) - data_to_token_ids(dev_path + ".fr", fr_dev_ids_path, fr_vocab_path) - data_to_token_ids(dev_path + ".en", en_dev_ids_path, en_vocab_path) + data_to_token_ids(dev_path + ".fr", fr_dev_ids_path, fr_vocab_path, tokenizer) + data_to_token_ids(dev_path + ".en", en_dev_ids_path, en_vocab_path, tokenizer) return (en_train_ids_path, fr_train_ids_path, en_dev_ids_path, fr_dev_ids_path, |