aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/examples/tutorials/word2vec/word2vec_basic.py
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/examples/tutorials/word2vec/word2vec_basic.py')
-rw-r--r--tensorflow/examples/tutorials/word2vec/word2vec_basic.py16
1 files changed, 10 insertions, 6 deletions
diff --git a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py
index 13e5717b0d..aee482fda5 100644
--- a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py
+++ b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py
@@ -91,7 +91,6 @@ print('Sample data', data[:10], [reverse_dictionary[i] for i in data[:10]])
data_index = 0
-
# Step 3: Function to generate a training batch for the skip-gram model.
def generate_batch(batch_size, num_skips, skip_window):
global data_index
@@ -101,9 +100,10 @@ def generate_batch(batch_size, num_skips, skip_window):
labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
span = 2 * skip_window + 1 # [ skip_window target skip_window ]
buffer = collections.deque(maxlen=span)
- for _ in range(span):
- buffer.append(data[data_index])
- data_index = (data_index + 1) % len(data)
+ if data_index + span > len(data):
+ data_index = 0
+ buffer.extend(data[data_index:data_index + span])
+ data_index += span
for i in range(batch_size // num_skips):
target = skip_window # target label at the center of the buffer
targets_to_avoid = [skip_window]
@@ -113,8 +113,12 @@ def generate_batch(batch_size, num_skips, skip_window):
targets_to_avoid.append(target)
batch[i * num_skips + j] = buffer[skip_window]
labels[i * num_skips + j, 0] = buffer[target]
- buffer.append(data[data_index])
- data_index = (data_index + 1) % len(data)
+ if data_index == len(data):
+ buffer[:] = data[:span]
+ data_index = span
+ else:
+ buffer.append(data[data_index])
+ data_index += 1
# Backtrack a little bit to avoid skipping words in the end of a batch
data_index = (data_index + len(data) - span) % len(data)
return batch, labels