diff options
Diffstat (limited to 'tensorflow/examples/tutorials/word2vec/word2vec_basic.py')
-rw-r--r-- | tensorflow/examples/tutorials/word2vec/word2vec_basic.py | 16 |
1 files changed, 10 insertions, 6 deletions
diff --git a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py index 13e5717b0d..aee482fda5 100644 --- a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py +++ b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py @@ -91,7 +91,6 @@ print('Sample data', data[:10], [reverse_dictionary[i] for i in data[:10]]) data_index = 0 - # Step 3: Function to generate a training batch for the skip-gram model. def generate_batch(batch_size, num_skips, skip_window): global data_index @@ -101,9 +100,10 @@ def generate_batch(batch_size, num_skips, skip_window): labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32) span = 2 * skip_window + 1 # [ skip_window target skip_window ] buffer = collections.deque(maxlen=span) - for _ in range(span): - buffer.append(data[data_index]) - data_index = (data_index + 1) % len(data) + if data_index + span > len(data): + data_index = 0 + buffer.extend(data[data_index:data_index + span]) + data_index += span for i in range(batch_size // num_skips): target = skip_window # target label at the center of the buffer targets_to_avoid = [skip_window] @@ -113,8 +113,12 @@ def generate_batch(batch_size, num_skips, skip_window): targets_to_avoid.append(target) batch[i * num_skips + j] = buffer[skip_window] labels[i * num_skips + j, 0] = buffer[target] - buffer.append(data[data_index]) - data_index = (data_index + 1) % len(data) + if data_index == len(data): + buffer[:] = data[:span] + data_index = span + else: + buffer.append(data[data_index]) + data_index += 1 # Backtrack a little bit to avoid skipping words in the end of a batch data_index = (data_index + len(data) - span) % len(data) return batch, labels |