aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/examples/tutorials/word2vec/word2vec_basic.py
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/examples/tutorials/word2vec/word2vec_basic.py')
-rw-r--r--tensorflow/examples/tutorials/word2vec/word2vec_basic.py16
1 files changed, 6 insertions, 10 deletions
diff --git a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py
index aee482fda5..13e5717b0d 100644
--- a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py
+++ b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py
@@ -91,6 +91,7 @@ print('Sample data', data[:10], [reverse_dictionary[i] for i in data[:10]])
data_index = 0
+
# Step 3: Function to generate a training batch for the skip-gram model.
def generate_batch(batch_size, num_skips, skip_window):
global data_index
@@ -100,10 +101,9 @@ def generate_batch(batch_size, num_skips, skip_window):
labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
span = 2 * skip_window + 1 # [ skip_window target skip_window ]
buffer = collections.deque(maxlen=span)
- if data_index + span > len(data):
- data_index = 0
- buffer.extend(data[data_index:data_index + span])
- data_index += span
+ for _ in range(span):
+ buffer.append(data[data_index])
+ data_index = (data_index + 1) % len(data)
for i in range(batch_size // num_skips):
target = skip_window # target label at the center of the buffer
targets_to_avoid = [skip_window]
@@ -113,12 +113,8 @@ def generate_batch(batch_size, num_skips, skip_window):
targets_to_avoid.append(target)
batch[i * num_skips + j] = buffer[skip_window]
labels[i * num_skips + j, 0] = buffer[target]
- if data_index == len(data):
- buffer[:] = data[:span]
- data_index = span
- else:
- buffer.append(data[data_index])
- data_index += 1
+ buffer.append(data[data_index])
+ data_index = (data_index + 1) % len(data)
# Backtrack a little bit to avoid skipping words in the end of a batch
data_index = (data_index + len(data) - span) % len(data)
return batch, labels