使用TensorFlow v2.0实现Word2Vec算法计算单词的向量表示,这个例子是使用一小部分维基百科文章来训练的。

更多信息请查看论文: Mikolov, Tomas et al. “Efficient Estimation of Word Representations in Vector Space.”, 20131

  1. from __future__ import division, print_function, absolute_import
  2. import collections
  3. import os
  4. import random
  5. import urllib
  6. import zipfile
  7. import numpy as np
  8. import tensorflow as tf

  1. learning_rate = 0.1
  2. batch_size = 128
  3. num_steps = 3000000
  4. display_step = 10000
  5. eval_step = 200000
  6. # 训练参数
  7. learning_rate = 0.1
  8. batch_size = 128
  9. num_steps = 3000000
  10. display_step = 10000
  11. eval_step = 200000
  12. # 评估参数
  13. eval_words = ['five', 'of', 'going', 'hardware', 'american', 'britain']
  14. # Word2Vec 参数
  15. embedding_size = 200 # 嵌入向量的维度 vector.
  16. max_vocabulary_size = 50000 # 词汇表中不同单词的总数words in the vocabulary.
  17. min_occurrence = 10 # 删除出现小于n次的所有单词
  18. skip_window = 3 # 左右各要考虑多少个单词
  19. num_skips = 2 # 重复使用输入生成标签的次数
  20. num_sampled = 64 # 负采样数量
  1. # 下载一小部分维基百科文章集
  2. url = 'http://mattmahoney.net/dc/text8.zip'
  3. data_path = 'text8.zip'
  4. if not os.path.exists(data_path):
  5. print("Downloading the dataset... (It may take some time)")
  6. filename, _ = urllib.urlretrieve(url, data_path)
  7. print("Done!")
  8. # 解压数据集文件,文本已处理完毕
  9. with zipfile.ZipFile(data_path) as f:
  10. text_words = f.read(f.namelist()[0]).lower().split()
  1. # 构建词典并用 UNK 标记替换频数较低的词
  2. count = [('UNK', -1)]
  3. # 检索最常见的单词
  4. count.extend(collections.Counter(text_words).most_common(max_vocabulary_size - 1))
  5. # 删除少于'min_occurrence'次数的样本
  6. for i in range(len(count) - 1, -1, -1):
  7. if count[i][1] < min_occurrence:
  8. count.pop(i)
  9. else:
  10. #该集合是有序的,因此在当出现小于'min_occurrence'时停止
  11. break
  12. # 计算单词表单词个数
  13. vocabulary_size = len(count)
  14. # 为每一个词分配id
  15. word2id = dict()
  16. for i, (word, _)in enumerate(count):
  17. word2id[word] = i
  18. data = list()
  19. unk_count = 0
  20. for word in text_words:
  21. # 检索单词id,或者如果不在字典中则为其指定索引0('UNK')
  22. index = word2id.get(word, 0)
  23. if index == 0:
  24. unk_count = 1
  25. data.append(index)
  26. count[0] = ('UNK', unk_count)
  27. id2word = dict(zip(word2id.values(), word2id.keys()))
  28. print("Words count:", len(text_words))
  29. print("Unique words:", len(set(text_words)))
  30. print("Vocabulary size:", vocabulary_size)
  31. print("Most common words:", count[:10])

output:

  1. Words count: 17005207
  2. Unique words: 253854
  3. Vocabulary size: 47135
  4. Most common words: [('UNK', 444176), ('the', 1061396), ('of', 593677), ('and', 416629), ('one', 411764), ('in', 372201), ('a', 325873), ('to', 316376), ('zero', 264975), ('nine', 250430)]
  1. data_index = 0
  2. # 为skip-gram模型生成训练批次
  3. def next_batch(batch_size, num_skips, skip_window):
  4. global data_index
  5. assert batch_size % num_skips == 0
  6. assert num_skips <= 2 * skip_window
  7. batch = np.ndarray(shape=(batch_size), dtype=np.int32)
  8. labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
  9. # 得到窗口长度( 当前单词左边和右边 当前单词)
  10. span = 2 * skip_window 1
  11. buffer = collections.deque(maxlen=span)
  12. if data_index span > len(data):
  13. data_index = 0
  14. buffer.extend(data[data_index:data_index span])
  15. data_index = span
  16. for i in range(batch_size // num_skips):
  17. context_words = [w for w in range(span) if w != skip_window]
  18. words_to_use = random.sample(context_words, num_skips)
  19. for j, context_word in enumerate(words_to_use):
  20. batch[i * num_skips j] = buffer[skip_window]
  21. labels[i * num_skips j, 0] = buffer[context_word]
  22. if data_index == len(data):
  23. buffer.extend(data[0:span])
  24. data_index = span
  25. else:
  26. buffer.append(data[data_index])
  27. data_index = 1
  28. #回溯一点,以避免在批处理结束时跳过单词
  29. data_index = (data_index len(data) - span) % len(data)
  30. return batch, labels
  1. # 确保在CPU上分配以下操作和变量
  2. # (某些操作在GPU上不兼容)
  3. with tf.device('/cpu:0'):
  4. # 创建嵌入变量(每一行代表一个词嵌入向量) embedding vector).
  5. embedding = tf.Variable(tf.random.normal([vocabulary_size, embedding_size]))
  6. # 构造NCE损失的变量
  7. nce_weights = tf.Variable(tf.random.normal([vocabulary_size, embedding_size]))
  8. nce_biases = tf.Variable(tf.zeros([vocabulary_size]))
  9. def get_embedding(x):
  10. with tf.device('/cpu:0'):
  11. # 对于X中的每一个样本查找对应的嵌入向量
  12. x_embed = tf.nn.embedding_lookup(embedding, x)
  13. return x_embed
  14. def nce_loss(x_embed, y):
  15. with tf.device('/cpu:0'):
  16. # 计算批处理的平均NCE损失
  17. y = tf.cast(y, tf.int64)
  18. loss = tf.reduce_mean(
  19. tf.nn.nce_loss(weights=nce_weights,
  20. biases=nce_biases,
  21. labels=y,
  22. inputs=x_embed,
  23. num_sampled=num_sampled,
  24. num_classes=vocabulary_size))
  25. return loss
  26. # 评估
  27. def evaluate(x_embed):
  28. with tf.device('/cpu:0'):
  29. # 计算输入数据嵌入与每个嵌入向量之间的余弦相似度
  30. x_embed = tf.cast(x_embed, tf.float32)
  31. x_embed_norm = x_embed / tf.sqrt(tf.reduce_sum(tf.square(x_embed)))
  32. embedding_norm = embedding / tf.sqrt(tf.reduce_sum(tf.square(embedding), 1, keepdims=True), tf.float32)
  33. cosine_sim_op = tf.matmul(x_embed_norm, embedding_norm, transpose_b=True)
  34. return cosine_sim_op
  35. # 定义优化器
  36. optimizer = tf.optimizers.SGD(learning_rate)
  1. # 优化过程
  2. def run_optimization(x, y):
  3. with tf.device('/cpu:0'):
  4. # 将计算封装在GradientTape中以实现自动微分
  5. with tf.GradientTape() as g:
  6. emb = get_embedding(x)
  7. loss = nce_loss(emb, y)
  8. # 计算梯度
  9. gradients = g.gradient(loss, [embedding, nce_weights, nce_biases])
  10. # 按gradients更新 W 和 b
  11. optimizer.apply_gradients(zip(gradients, [embedding, nce_weights, nce_biases]))
  1. # 用于测试的单词
  2. x_test = np.array([word2id[w] for w in eval_words])
  3. # 针对给定步骤数进行训练
  4. for step in xrange(1, num_steps 1):
  5. batch_x, batch_y = next_batch(batch_size, num_skips, skip_window)
  6. run_optimization(batch_x, batch_y)
  7. if step % display_step == 0 or step == 1:
  8. loss = nce_loss(get_embedding(batch_x), batch_y)
  9. print("step: %i, loss: %f" % (step, loss))
  10. # 评估
  11. if step % eval_step == 0 or step == 1:
  12. print("Evaluation...")
  13. sim = evaluate(get_embedding(x_test)).numpy()
  14. for i in xrange(len(eval_words)):
  15. top_k = 8 # 最相似的单词数量
  16. nearest = (-sim[i, :]).argsort()[1:top_k 1]
  17. log_str = '"%s" nearest neighbors:' % eval_words[i]
  18. for k in xrange(top_k):
  19. log_str = '%s %s,' % (log_str, id2word[nearest[k]])
  20. print(log_str)
  1. step: 1, loss: 504.444214
  2. Evaluation...
  3. "five" nearest neighbors: censure, stricken, anglicanism, stick, streetcars, shrines, horrified, sparkle,
  4. "of" nearest neighbors: jolly, weary, clinicians, kerouac, economist, owls, safe, playoff,
  5. "going" nearest neighbors: filament, platforms, moderately, micheal, despotic, krag, disclosed, your,
  6. "hardware" nearest neighbors: occupants, paraffin, vera, reorganized, rename, declares, prima, condoned,
  7. "american" nearest neighbors: portfolio, rhein, aalto, angle, lifeson, tucker, sexton, dench,
  8. "britain" nearest neighbors: indivisible, disbelief, scripture, pepsi, scriptores, sighting, napalm, strike,
  9. step: 10000, loss: 117.166962
  10. step: 20000, loss: 65.478333
  11. step: 30000, loss: 46.580460
  12. step: 40000, loss: 25.563128
  13. step: 50000, loss: 50.924446
  14. step: 60000, loss: 51.696526
  15. step: 70000, loss: 17.272142
  16. step: 80000, loss: 32.579414
  17. step: 90000, loss: 68.372032
  18. step: 100000, loss: 36.026573
  19. step: 110000, loss: 22.502020
  20. step: 120000, loss: 15.788742
  21. step: 130000, loss: 31.832420
  22. step: 140000, loss: 25.096617
  23. step: 150000, loss: 12.013027
  24. step: 160000, loss: 20.574780
  25. step: 170000, loss: 12.201975
  26. step: 180000, loss: 20.983793
  27. step: 190000, loss: 11.366720
  28. step: 200000, loss: 19.431549
  29. Evaluation...
  30. "five" nearest neighbors: three, four, eight, six, two, seven, nine, zero,
  31. "of" nearest neighbors: the, a, and, first, with, on, but, from,
  32. "going" nearest neighbors: have, more, used, out, be, with, on, however,
  33. "hardware" nearest neighbors: be, known, system, apollo, and, a, such, used,
  34. "american" nearest neighbors: UNK, and, from, s, at, in, after, about,
  35. "britain" nearest neighbors: of, and, many, the, as, used, but, such,
  36. step: 210000, loss: 16.361233
  37. step: 220000, loss: 17.529526
  38. step: 230000, loss: 16.805817
  39. step: 240000, loss: 6.365625
  40. step: 250000, loss: 8.083097
  41. step: 260000, loss: 11.262514
  42. step: 270000, loss: 9.842708
  43. step: 280000, loss: 6.363440
  44. step: 290000, loss: 8.732617
  45. step: 300000, loss: 10.484728
  46. step: 310000, loss: 12.099487
  47. step: 320000, loss: 11.496288
  48. step: 330000, loss: 9.283813
  49. step: 340000, loss: 10.777218
  50. step: 350000, loss: 16.310440
  51. step: 360000, loss: 7.495782
  52. step: 370000, loss: 9.287696
  53. step: 380000, loss: 6.982735
  54. step: 390000, loss: 8.549622
  55. step: 400000, loss: 8.388112
  56. Evaluation...
  57. "five" nearest neighbors: four, three, six, two, seven, eight, one, zero,
  58. "of" nearest neighbors: the, a, with, also, for, and, which, by,
  59. "going" nearest neighbors: have, are, both, called, being, a, of, had,
  60. "hardware" nearest neighbors: may, de, some, have, so, which, other, also,
  61. "american" nearest neighbors: s, british, UNK, from, in, including, first, see,
  62. "britain" nearest neighbors: against, include, including, both, british, other, an, most,
  63. step: 410000, loss: 8.757725
  64. step: 420000, loss: 12.303110
  65. step: 430000, loss: 12.325478
  66. step: 440000, loss: 7.659882
  67. step: 450000, loss: 6.028089
  68. step: 460000, loss: 12.700299
  69. step: 470000, loss: 7.063077
  70. step: 480000, loss: 18.004183
  71. step: 490000, loss: 7.510474
  72. step: 500000, loss: 10.089376
  73. step: 510000, loss: 11.404436
  74. step: 520000, loss: 9.494527
  75. step: 530000, loss: 7.797963
  76. step: 540000, loss: 7.390718
  77. step: 550000, loss: 13.911215
  78. step: 560000, loss: 6.975731
  79. step: 570000, loss: 6.179163
  80. step: 580000, loss: 7.066525
  81. step: 590000, loss: 6.487288
  82. step: 600000, loss: 5.361528
  83. Evaluation...
  84. "five" nearest neighbors: four, six, three, seven, two, one, eight, zero,
  85. "of" nearest neighbors: the, and, from, with, a, including, in, include,
  86. "going" nearest neighbors: have, even, they, term, who, many, which, were,
  87. "hardware" nearest neighbors: include, computer, an, which, other, each, than, may,
  88. "american" nearest neighbors: english, french, s, german, from, in, film, see,
  89. "britain" nearest neighbors: several, first, modern, part, government, german, was, were,
  90. step: 610000, loss: 4.144980
  91. step: 620000, loss: 5.865635
  92. step: 630000, loss: 6.826498
  93. step: 640000, loss: 8.376097
  94. step: 650000, loss: 7.117930
  95. step: 660000, loss: 7.639544
  96. step: 670000, loss: 5.973255
  97. step: 680000, loss: 4.908459
  98. step: 690000, loss: 6.164993
  99. step: 700000, loss: 7.360281
  100. step: 710000, loss: 12.693079
  101. step: 720000, loss: 6.410182
  102. step: 730000, loss: 7.499201
  103. step: 740000, loss: 6.509094
  104. step: 750000, loss: 10.625893
  105. step: 760000, loss: 7.177696
  106. step: 770000, loss: 12.639092
  107. step: 780000, loss: 8.441635
  108. step: 790000, loss: 7.529139
  109. step: 800000, loss: 6.579177
  110. Evaluation...
  111. "five" nearest neighbors: four, three, six, seven, eight, two, one, zero,
  112. "of" nearest neighbors: and, with, in, the, its, from, by, including,
  113. "going" nearest neighbors: have, they, how, include, people, however, also, their,
  114. "hardware" nearest neighbors: computer, large, include, may, or, which, other, there,
  115. "american" nearest neighbors: born, french, british, english, german, b, john, d,
  116. "britain" nearest neighbors: country, including, include, general, part, various, several, by,
  117. step: 810000, loss: 6.934138
  118. step: 820000, loss: 5.686094
  119. step: 830000, loss: 7.310243
  120. step: 840000, loss: 5.028157
  121. step: 850000, loss: 7.079705
  122. step: 860000, loss: 6.768996
  123. step: 870000, loss: 5.604030
  124. step: 880000, loss: 8.208309
  125. step: 890000, loss: 6.301597
  126. step: 900000, loss: 5.733234
  127. step: 910000, loss: 6.577081
  128. step: 920000, loss: 6.774826
  129. step: 930000, loss: 7.068932
  130. step: 940000, loss: 6.694956
  131. step: 950000, loss: 7.944673
  132. step: 960000, loss: 5.988618
  133. step: 970000, loss: 6.651366
  134. step: 980000, loss: 4.595577
  135. step: 990000, loss: 6.564834
  136. step: 1000000, loss: 4.327858
  137. Evaluation...
  138. "five" nearest neighbors: four, three, seven, six, eight, two, nine, zero,
  139. "of" nearest neighbors: the, first, and, became, from, under, at, with,
  140. "going" nearest neighbors: others, has, then, have, how, become, had, also,
  141. "hardware" nearest neighbors: computer, large, systems, these, different, either, include, using,
  142. "american" nearest neighbors: b, born, d, UNK, nine, english, german, french,
  143. "britain" nearest neighbors: government, island, local, country, by, including, control, within,
  144. step: 1010000, loss: 5.841236
  145. step: 1020000, loss: 5.805200
  146. step: 1030000, loss: 9.962063
  147. step: 1040000, loss: 6.281199
  148. step: 1050000, loss: 7.147995
  149. step: 1060000, loss: 5.721184
  150. step: 1070000, loss: 7.080662
  151. step: 1080000, loss: 6.638658
  152. step: 1090000, loss: 5.814178
  153. step: 1100000, loss: 5.195928
  154. step: 1110000, loss: 6.724787
  155. step: 1120000, loss: 6.503905
  156. step: 1130000, loss: 5.762966
  157. step: 1140000, loss: 5.790243
  158. step: 1150000, loss: 5.958191
  159. step: 1160000, loss: 5.997983
  160. step: 1170000, loss: 7.065348
  161. step: 1180000, loss: 6.073387
  162. step: 1190000, loss: 6.644097
  163. step: 1200000, loss: 5.934450
  164. Evaluation...
  165. "five" nearest neighbors: three, four, six, eight, seven, two, nine, zero,
  166. "of" nearest neighbors: the, and, including, in, its, with, from, on,
  167. "going" nearest neighbors: others, then, through, has, had, another, people, when,
  168. "hardware" nearest neighbors: computer, control, systems, either, these, large, small, other,
  169. "american" nearest neighbors: born, german, john, d, british, b, UNK, french,
  170. "britain" nearest neighbors: local, against, british, island, country, general, including, within,
  171. step: 1210000, loss: 5.832344
  172. step: 1220000, loss: 6.453851
  173. step: 1230000, loss: 6.583966
  174. step: 1240000, loss: 5.571673
  175. step: 1250000, loss: 5.720917
  176. step: 1260000, loss: 7.663424
  177. step: 1270000, loss: 6.583741
  178. step: 1280000, loss: 8.503859
  179. step: 1290000, loss: 5.540640
  180. step: 1300000, loss: 6.703249
  181. step: 1310000, loss: 5.274101
  182. step: 1320000, loss: 5.846446
  183. step: 1330000, loss: 5.438172
  184. step: 1340000, loss: 6.367691
  185. step: 1350000, loss: 6.558622
  186. step: 1360000, loss: 9.822924
  187. step: 1370000, loss: 4.982378
  188. step: 1380000, loss: 6.159739
  189. step: 1390000, loss: 5.819083
  190. step: 1400000, loss: 7.775135
  191. Evaluation...
  192. "five" nearest neighbors: four, three, six, seven, two, eight, one, zero,
  193. "of" nearest neighbors: and, the, in, with, its, within, for, including,
  194. "going" nearest neighbors: others, through, while, has, to, how, particularly, their,
  195. "hardware" nearest neighbors: computer, systems, large, control, research, using, information, either,
  196. "american" nearest neighbors: english, french, german, born, film, british, s, former,
  197. "britain" nearest neighbors: british, country, europe, local, military, island, against, western,
  198. step: 1410000, loss: 8.214248
  199. step: 1420000, loss: 4.696859
  200. step: 1430000, loss: 5.873761
  201. step: 1440000, loss: 5.971557
  202. step: 1450000, loss: 4.992722
  203. step: 1460000, loss: 5.197714
  204. step: 1470000, loss: 6.916918
  205. step: 1480000, loss: 6.441984
  206. step: 1490000, loss: 5.443647
  207. step: 1500000, loss: 5.178482
  208. step: 1510000, loss: 6.060414
  209. step: 1520000, loss: 6.373306
  210. step: 1530000, loss: 5.098322
  211. step: 1540000, loss: 6.674916
  212. step: 1550000, loss: 6.712685
  213. step: 1560000, loss: 5.280202
  214. step: 1570000, loss: 6.454964
  215. step: 1580000, loss: 4.896697
  216. step: 1590000, loss: 6.239226
  217. step: 1600000, loss: 5.709726
  218. Evaluation...
  219. "five" nearest neighbors: three, four, two, six, seven, eight, one, zero,
  220. "of" nearest neighbors: the, and, including, in, with, within, its, following,
  221. "going" nearest neighbors: others, people, who, they, that, far, were, have,
  222. "hardware" nearest neighbors: computer, systems, include, high, research, some, information, large,
  223. "american" nearest neighbors: born, english, french, british, german, d, john, b,
  224. "britain" nearest neighbors: country, military, china, europe, against, local, central, british,
  225. step: 1610000, loss: 6.334940
  226. step: 1620000, loss: 5.093616
  227. step: 1630000, loss: 6.119366
  228. step: 1640000, loss: 4.975187
  229. step: 1650000, loss: 6.490408
  230. step: 1660000, loss: 7.464082
  231. step: 1670000, loss: 4.977184
  232. step: 1680000, loss: 5.658133
  233. step: 1690000, loss: 5.352454
  234. step: 1700000, loss: 6.810776
  235. step: 1710000, loss: 5.687447
  236. step: 1720000, loss: 5.992206
  237. step: 1730000, loss: 5.513011
  238. step: 1740000, loss: 5.548522
  239. step: 1750000, loss: 6.200248
  240. step: 1760000, loss: 13.070073
  241. step: 1770000, loss: 4.621058
  242. step: 1780000, loss: 5.301342
  243. step: 1790000, loss: 4.777030
  244. step: 1800000, loss: 6.912136
  245. Evaluation...
  246. "five" nearest neighbors: three, four, six, seven, eight, two, nine, zero,
  247. "of" nearest neighbors: the, in, first, from, became, and, following, under,
  248. "going" nearest neighbors: others, their, through, which, therefore, open, how, that,
  249. "hardware" nearest neighbors: computer, systems, include, research, standard, different, system, small,
  250. "american" nearest neighbors: b, d, born, actor, UNK, english, nine, german,
  251. "britain" nearest neighbors: china, country, europe, against, canada, military, island, including,
  252. step: 1810000, loss: 5.584600
  253. step: 1820000, loss: 5.619820
  254. step: 1830000, loss: 6.078709
  255. step: 1840000, loss: 5.052518
  256. step: 1850000, loss: 5.430106
  257. step: 1860000, loss: 7.396770
  258. step: 1870000, loss: 5.344787
  259. step: 1880000, loss: 5.937998
  260. step: 1890000, loss: 5.706491
  261. step: 1900000, loss: 5.140662
  262. step: 1910000, loss: 5.607048
  263. step: 1920000, loss: 5.407231
  264. step: 1930000, loss: 6.238531
  265. step: 1940000, loss: 5.567973
  266. step: 1950000, loss: 4.894245
  267. step: 1960000, loss: 6.104193
  268. step: 1970000, loss: 5.282631
  269. step: 1980000, loss: 6.189069
  270. step: 1990000, loss: 6.169409
  271. step: 2000000, loss: 6.470152
  272. Evaluation...
  273. "five" nearest neighbors: four, three, six, seven, eight, two, nine, zero,
  274. "of" nearest neighbors: the, its, in, with, and, including, within, against,
  275. "going" nearest neighbors: others, only, therefore, will, how, a, far, though,
  276. "hardware" nearest neighbors: computer, systems, for, network, software, program, research, system,
  277. "american" nearest neighbors: born, actor, d, italian, german, john, robert, b,
  278. "britain" nearest neighbors: china, country, europe, canada, british, former, island, france,
  279. step: 2010000, loss: 5.298714
  280. step: 2020000, loss: 5.494207
  281. step: 2030000, loss: 5.410875
  282. step: 2040000, loss: 6.228232
  283. step: 2050000, loss: 5.044596
  284. step: 2060000, loss: 4.624638
  285. step: 2070000, loss: 4.919327
  286. step: 2080000, loss: 4.639625
  287. step: 2090000, loss: 4.865627
  288. step: 2100000, loss: 4.951073
  289. step: 2110000, loss: 5.973768
  290. step: 2120000, loss: 7.366824
  291. step: 2130000, loss: 5.149571
  292. step: 2140000, loss: 7.846234
  293. step: 2150000, loss: 5.449315
  294. step: 2160000, loss: 5.359211
  295. step: 2170000, loss: 5.171029
  296. step: 2180000, loss: 6.106437
  297. step: 2190000, loss: 6.043995
  298. step: 2200000, loss: 5.642351
  299. Evaluation...
  300. "five" nearest neighbors: four, three, six, two, eight, seven, zero, one,
  301. "of" nearest neighbors: the, and, its, see, for, in, with, including,
  302. "going" nearest neighbors: others, therefore, how, even, them, your, have, although,
  303. "hardware" nearest neighbors: computer, systems, system, network, program, research, software, include,
  304. "american" nearest neighbors: english, french, german, canadian, british, film, author, italian,
  305. "britain" nearest neighbors: europe, china, country, germany, british, england, france, throughout,
  306. step: 2210000, loss: 4.427110
  307. step: 2220000, loss: 6.240989
  308. step: 2230000, loss: 5.184978
  309. step: 2240000, loss: 8.035570
  310. step: 2250000, loss: 5.793781
  311. step: 2260000, loss: 4.908427
  312. step: 2270000, loss: 8.807668
  313. step: 2280000, loss: 6.083229
  314. step: 2290000, loss: 5.773360
  315. step: 2300000, loss: 5.613671
  316. step: 2310000, loss: 6.080076
  317. step: 2320000, loss: 5.288568
  318. step: 2330000, loss: 5.949232
  319. step: 2340000, loss: 5.479994
  320. step: 2350000, loss: 7.717686
  321. step: 2360000, loss: 5.163609
  322. step: 2370000, loss: 5.989407
  323. step: 2380000, loss: 5.785729
  324. step: 2390000, loss: 5.345478
  325. step: 2400000, loss: 6.627133
  326. Evaluation...
  327. "five" nearest neighbors: three, four, six, two, seven, eight, zero, nine,
  328. "of" nearest neighbors: the, in, and, including, from, within, its, with,
  329. "going" nearest neighbors: therefore, people, they, out, only, according, your, now,
  330. "hardware" nearest neighbors: computer, systems, network, program, system, software, run, design,
  331. "american" nearest neighbors: author, born, actor, english, canadian, british, italian, d,
  332. "britain" nearest neighbors: china, europe, country, throughout, france, canada, england, western,
  333. step: 2410000, loss: 5.666146
  334. step: 2420000, loss: 5.316198
  335. step: 2430000, loss: 5.129625
  336. step: 2440000, loss: 5.247949
  337. step: 2450000, loss: 5.741394
  338. step: 2460000, loss: 5.833083
  339. step: 2470000, loss: 7.704844
  340. step: 2480000, loss: 5.398345
  341. step: 2490000, loss: 5.089633
  342. step: 2500000, loss: 5.620508
  343. step: 2510000, loss: 4.976034
  344. step: 2520000, loss: 5.884676
  345. step: 2530000, loss: 6.649922
  346. step: 2540000, loss: 5.002588
  347. step: 2550000, loss: 5.072144
  348. step: 2560000, loss: 5.165375
  349. step: 2570000, loss: 5.310089
  350. step: 2580000, loss: 5.481957
  351. step: 2590000, loss: 6.104440
  352. step: 2600000, loss: 5.339644
  353. Evaluation...
  354. "five" nearest neighbors: three, four, six, seven, eight, nine, two, zero,
  355. "of" nearest neighbors: the, first, from, with, became, in, following, and,
  356. "going" nearest neighbors: how, therefore, back, will, through, always, your, make,
  357. "hardware" nearest neighbors: computer, systems, system, network, program, technology, design, software,
  358. "american" nearest neighbors: actor, singer, born, b, author, d, english, writer,
  359. "britain" nearest neighbors: europe, china, throughout, great, england, france, country, india,
  360. step: 2610000, loss: 7.754117
  361. step: 2620000, loss: 5.979313
  362. step: 2630000, loss: 5.394362
  363. step: 2640000, loss: 4.866740
  364. step: 2650000, loss: 5.219806
  365. step: 2660000, loss: 6.074809
  366. step: 2670000, loss: 6.216953
  367. step: 2680000, loss: 5.944881
  368. step: 2690000, loss: 5.863350
  369. step: 2700000, loss: 6.128705
  370. step: 2710000, loss: 5.502523
  371. step: 2720000, loss: 5.300839
  372. step: 2730000, loss: 6.358493
  373. step: 2740000, loss: 6.058306
  374. step: 2750000, loss: 4.689510
  375. step: 2760000, loss: 6.032880
  376. step: 2770000, loss: 5.844904
  377. step: 2780000, loss: 5.385874
  378. step: 2790000, loss: 5.370956
  379. step: 2800000, loss: 4.912577
  380. Evaluation...
  381. "five" nearest neighbors: four, six, three, eight, seven, two, nine, one,
  382. "of" nearest neighbors: in, the, and, from, including, following, with, under,
  383. "going" nearest neighbors: your, then, through, will, how, so, back, even,
  384. "hardware" nearest neighbors: computer, systems, program, network, design, standard, physical, software,
  385. "american" nearest neighbors: actor, singer, born, author, writer, canadian, italian, d,
  386. "britain" nearest neighbors: europe, china, england, throughout, france, india, great, germany,
  387. step: 2810000, loss: 5.897756
  388. step: 2820000, loss: 7.194932
  389. step: 2830000, loss: 7.430175
  390. step: 2840000, loss: 7.258231
  391. step: 2850000, loss: 5.837617
  392. step: 2860000, loss: 5.496673
  393. step: 2870000, loss: 6.173716
  394. step: 2880000, loss: 6.095749
  395. step: 2890000, loss: 6.064944
  396. step: 2900000, loss: 5.560488
  397. step: 2910000, loss: 4.966107
  398. step: 2920000, loss: 5.789579
  399. 1. step: 2930000, loss: 4.525987
  400. step: 2940000, loss: 6.704808
  401. step: 2950000, loss: 4.506433
  402. step: 2960000, loss: 6.251270
  403. step: 2970000, loss: 5.588204
  404. step: 2980000, loss: 5.423235
  405. step: 2990000, loss: 5.613834
  406. step: 3000000, loss: 5.137326
  407. Evaluation...
  408. "five" nearest neighbors: four, three, six, seven, eight, two, zero, one,
  409. "of" nearest neighbors: the, including, and, with, in, its, includes, within,
  410. "going" nearest neighbors: how, they, when, them, make, always, your, though,
  411. "hardware" nearest neighbors: computer, systems, network, program, physical, design, technology, software,
  412. "american" nearest neighbors: canadian, english, australian, british, german, film, italian, author,
  413. "britain" nearest neighbors: europe, england, china, throughout, india, france, great, british,

欢迎关注磐创博客资源汇总站:

http://docs.panchuang.net/

欢迎关注PyTorch官方中文教程站:

http://pytorch.panchuang.net/


  1. https://arxiv.org/pdf/1301.3781.pdf ↩︎

TensorFlow v2.0实现Word2Vec算法的更多相关文章

  1. 使用TensorFlow v2.0构建多层感知器

    使用TensorFlow v2.0构建一个两层隐藏层完全连接的神经网络(多层感知器). 这个例子使用低级方法来更好地理解构建神经网络和训练过程背后的所有机制. 神经网络概述 MNIST 数据集概述 此 ...

  2. 使用TensorFlow v2.0构建卷积神经网络

    使用TensorFlow v2.0构建卷积神经网络. 这个例子使用低级方法来更好地理解构建卷积神经网络和训练过程背后的所有机制. CNN 概述 MNIST 数据集概述 此示例使用手写数字的MNIST数 ...

  3. TensorFlow v2.0实现逻辑斯谛回归

    使用TensorFlow v2.0实现逻辑斯谛回归 此示例使用简单方法来更好地理解训练过程背后的所有机制 MNIST数据集概览 此示例使用MNIST手写数字.该数据集包含60,000个用于训练的样本和 ...

  4. TensorFlow v2.0的基本张量操作

    使用TensorFlow v2.0的基本张量操作 from __future__ import print_function import tensorflow as tf # 定义张量常量 a = ...

  5. 在Anaconda3环境下安装并切换 Tensorflow 2.0 环境

    背景 Anaconda切换各种环境非常方便,现在我们就来介绍一下如何使用anaconda安装tensorflow环境. anaconda v3.5 from 清华镜像站 tensorflow v2.0 ...

  6. TensorFlow 2.0 新特性

    安装 TensorFlow 2.0 Alpha 本文仅仅介绍 Windows 的安装方式: pip install tensorflow==2.0.0-alpha0 # cpu 版本 pip inst ...

  7. 电梯V2.0

    电梯V2.0 GitHub仓库地址 Problem 一栋10层的大楼(楼层编号1-10),设3台无限载重的电梯,初始时电梯停在1层.其中:1号电梯只能停留在奇数层,2号电梯可以各层都停留,3号电梯只停 ...

  8. Kinect for Windows SDK v2.0 开发笔记 (十五) 手势帧

     (转载请注明出处) 使用SDK: Kinect for Windows SDK v2.0 public preview1409 同前面,由于SDK未完毕,不附上函数/方法/接口的超链接. 这次最 ...

  9. 痞子衡嵌入式:MCUBootUtility v2.0来袭,i.MXRT1010哪里逃

    -- 恩智浦半导体从2017年10月开始正式推出业内首款跨界处理器-i.MX RT系列,如今距离该系列第一款i.MXRT1050发布已过去近2年,i.MX RT系列在行业里应用越来越广泛,i.MX R ...

随机推荐

  1. 使用pandas筛选出指定列值所对应的行

    在pandas中怎么样实现类似mysql查找语句的功能: select * from table where column_name = some_value; pandas中获取数据的有以下几种方法 ...

  2. python递归用法

    需求:4的阶乘 4*3*2*1计算.通过递归算法,c=4*getnums(4-1),然后调用自己本身的函数,形成递归,就等于3*getnums(3-1),2*getnums(2-1),依次递归调用,最 ...

  3. k3s原理分析丨如何搞定k3s node注册失败问题

    前 言 面向边缘的轻量级K8S发行版k3s于去年2月底发布后,备受关注,在发布后的10个月时间里,Github Star达11,000颗.于去年11月中旬已经GA.但正如你所知,没有一个产品是十全十美 ...

  4. Error : Failed to get convolution algorithm. This is probably because cuDNN failed to initialize

    记录一下: 报错:# Error : Failed to get convolution algorithm. This is probably because cuDNN failed to ini ...

  5. HTTP GET请求302重定向问题

    1.问题描述 ① 在华为云服务器中搭建了java环境,并在tomcat中部署了一个空的web项目 ② 在此web项目中上传了一个名为:plugin_DTDREAM_LIVING_DEHUMIDIFIE ...

  6. 量子计算机编程(二)——QPU基础函数

    第二部分主要是QPU的基础功能,第一部分就像是我们有了哪些基本的语句,第二部分就是我们能写一些简单基础的函数,一些小模块,第三部分就是他的应用了. 先来看一下一个简单量子应用的结构: 第一步,将量子态 ...

  7. vue-element-admin 模板 登录页面 post请求通过django的csrf认证,处理304错误

    经过一天的研究,终于把 vue-admin-template 模板的 post 请求 和django的api 弄通了 没有了那该死的304报错了 直接贴代码: 在main.js中 我直接给设置了一个 ...

  8. 移值 RTL8188EUS linux4.1.36 驱动

    网卡为COMFAST CF-WU720N RTL8188EUSVID 0x0bdaPIV 0x8179 驱动下载网址是http://www.comfast.cn/download/qdcx/item- ...

  9. Redis04——五分钟明白Redis的哨兵模式

    和所有的数据库一样,Redis也支持集群化,Redis的集群分为分布式集群和主从集群.大部分公司采取的都是主从集群.所以在本篇文章内,我们将着重介绍Redis的主从集群及哨兵机制. 由于Redis的主 ...

  10. (转)协议森林13 9527 (DNS协议)

    协议森林13 9527 (DNS协议) 作者:Vamei 出处:http://www.cnblogs.com/vamei 欢迎转载,也请保留这段声明.谢谢! 在周星驰的电影<唐伯虎点秋香> ...