diff options
Diffstat (limited to 'tensorflow/examples/tutorials/word2vec/word2vec_basic.py')
-rw-r--r-- | tensorflow/examples/tutorials/word2vec/word2vec_basic.py | 16 |
1 files changed, 6 insertions, 10 deletions
diff --git a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py index aee482fda5..13e5717b0d 100644 --- a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py +++ b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py @@ -91,6 +91,7 @@ print('Sample data', data[:10], [reverse_dictionary[i] for i in data[:10]]) data_index = 0 + # Step 3: Function to generate a training batch for the skip-gram model. def generate_batch(batch_size, num_skips, skip_window): global data_index @@ -100,10 +101,9 @@ def generate_batch(batch_size, num_skips, skip_window): labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32) span = 2 * skip_window + 1 # [ skip_window target skip_window ] buffer = collections.deque(maxlen=span) - if data_index + span > len(data): - data_index = 0 - buffer.extend(data[data_index:data_index + span]) - data_index += span + for _ in range(span): + buffer.append(data[data_index]) + data_index = (data_index + 1) % len(data) for i in range(batch_size // num_skips): target = skip_window # target label at the center of the buffer targets_to_avoid = [skip_window] @@ -113,12 +113,8 @@ def generate_batch(batch_size, num_skips, skip_window): targets_to_avoid.append(target) batch[i * num_skips + j] = buffer[skip_window] labels[i * num_skips + j, 0] = buffer[target] - if data_index == len(data): - buffer[:] = data[:span] - data_index = span - else: - buffer.append(data[data_index]) - data_index += 1 + buffer.append(data[data_index]) + data_index = (data_index + 1) % len(data) # Backtrack a little bit to avoid skipping words in the end of a batch data_index = (data_index + len(data) - span) % len(data) return batch, labels |