1. **args, **kwargs的区别

     def build_vocab(self, *args, **kwargs):
counter = Counter()
sources = []
for arg in args:
if isinstance(arg, Dataset):
sources += [getattr(arg, name) for name, field in
arg.fields.items() if field is self]
for data in sources:
for x in data:
if not self.sequential:
x = [x]
specials = list(OrderedDict.fromkeys(
tok for tok in [self.pad_token, self.init_token, self.eos_token]
if tok is not None))
self.vocab = Vocab(counter, specials=specials, **kwargs)

2. np.sum

 import numpy as np
np.random.seed(0) N, D = 3, 4
x = np.random.randn(N, D)
y = np.random.randn(N, D)
z = np.random.randn(N, D) a = x * y
b = a + z
c = np.sum(b)
print(c) # 6.7170085378 # search the function of np.sum
total = 0
for i in range(len(b)):
for j in range(4):
total += b[i][j]
print(total) # 6.7170085378

3. use numpy to solve grad

 import numpy as np
N, D = 3, 4
x = np.random.randn(N, D)
y = np.random.randn(N, D)
z = np.random.randn(N, D) a = x * y
b = a + z
# print(b)
c = np.sum(b)
# print(c) # 6.7170085378 grad_c = 1.0
grad_b = grad_c * np.ones((N, D))
grad_a = grad_b.copy()
grad_z = grad_b.copy()
grad_x = grad_a * y
grad_y = grad_a * x print(grad_x)
[[ 0.04998285 0.32809396 -0.49822878 1.36419309]
[-0.52303972 -0.5881509 -0.37058995 -1.42112189]
[-0.58705758 -0.26012336 1.31326911 -0.20088737]]
[[ 0.14893265 -0.45509058 0.21410015 0.27659 ]
[ 0.29617438 0.98971103 2.07310583 -0.0195055 ]
[-1.49222601 -0.64073344 -0.18269488 0.26193553]]
[[ 1. 1. 1. 1.]
[ 1. 1. 1. 1.]
[ 1. 1. 1. 1.]]


 import torch
from torch.autograd import Variable N, D = 3, 4
# define variables to start building a computational graph
x = Variable(torch.randn(N, D), requires_grad=True)
y = Variable(torch.randn(N, D), requires_grad=True)
z = Variable(torch.randn(N, D), requires_grad=True) # forward pass looks just like numpy
a = x * y
b = a + z
c = torch.sum(b) # calling c,backward() computes all gradients
-0.9775 -0.0913 0.3710 1.5789
0.0896 -0.6563 0.8976 -0.3508
-0.9378 0.7028 1.4533 0.9255
[torch.FloatTensor of size 3x4] 0.6365 0.2388 -0.4755 -0.9860
-0.2403 -0.0468 -0.0470 -1.0132
-0.5019 0.5005 -1.9270 1.0030
[torch.FloatTensor of size 3x4] 1 1 1 1
1 1 1 1
1 1 1 1
[torch.FloatTensor of size 3x4]

x is a variable, requires_grad=True.

x.data is a tensor.

x.grad is a variable of gradients(same shape as x.data).

x.grad.data is a tensor of gradients.

4. 随机数


  • 给随机数对象一个种子值,用于产生随机序列。
  • 对于同一个种子值的输入,之后产生的随机数序列也一样。
  • 通常是把时间秒数等变化值作为种子值,达到每次运行产生的随机系列都不一样
  • seed() 省略参数,意味着使用当前系统时间生成随机数



 import numpy as np
np.random.seed(0) print(np.random.random()) # 0.5488135039273248 不随时间改变
print(np.random.random()) # 0.7151893663724195 不随时间改变 np.random.seed(0)
print(np.random.random()) # 0.5488135039273248 不随时间改变 np.random.seed()
print(np.random.random()) # 0.9623797942471012 随时间改变
print(np.random.random()) # 0.12734792669918393 随时间改变


  • 对list列表随机打乱顺序,也就是洗牌
  • shuffle只作用于list,对Str会报错比如‘abcdfed’,而['1','2','3','5','6','7']可以
 import numpy as np

 item = [1,2,3,4,5,6,7]
print(item) # [1, 2, 3, 4, 5, 6, 7]
print(item) # [7, 1, 2, 5, 4, 6, 3] item2 = ['','','']
print(item2) # ['1', '3', '2']



