1. torch.nn与torch.nn.functional之间的区别和联系




import torch.nn.functional as F
class Conv2d(_ConvNd): def __init__(self, in_channels, out_channels, kernel_size, stride=1,
padding=0, dilation=1, groups=1, bias=True):
kernel_size = _pair(kernel_size)
stride = _pair(stride)
padding = _pair(padding)
dilation = _pair(dilation)
super(Conv2d, self).__init__(
in_channels, out_channels, kernel_size, stride, padding, dilation,
False, _pair(0), groups, bias) def forward(self, input):
return F.conv2d(input, self.weight, self.bias, self.stride,
self.padding, self.dilation, self.groups)


def conv2d(input, weight, bias=None, stride=1, padding=0, dilation=1,
groups=1): if input is not None and input.dim() != 4:
raise ValueError("Expected 4D tensor as input, got {}D tensor instead.".format(input.dim())) f = _ConvNd(_pair(stride), _pair(padding), _pair(dilation), False,
_pair(0), groups, torch.backends.cudnn.benchmark,
torch.backends.cudnn.deterministic, torch.backends.cudnn.enabled)
return f(input, weight, bias)


1. nn.Conv2d是一个类;F.conv2d是一个函数






在建图过程中,往往有两种层,一种如全连接层,卷积层等,当中有 Variable, 另一种如 Pooling层,ReLU层,当中没有 Variable.

如果所有的层都用 nn.functional 来定义,那么所有的Variable, 如 weights, bias 等,都需要用户手动定义,非常不便;

如果所有的层都用 nn 来定义,那么即便是简单的计算都需要建类来做,而这些可以用更为简单的函数来代替。

综上,在定义网络的时候,如果层内有 Variable, 那么用 nn 定义, 反之,则用 nn.functional定义。

2. ‘model.eval()’ vs ‘with torch.no_grad()’


1. model.eval() will notify all your layers that you are in eval mode, that way, batchnorm or dropout layers will work in eval model instead of training mode.

model.eval()会告知模型中的所有layers, 目前处在eval模式,batchnorm和dropout层等都会在eval模式中工作。
2. torch.no_grad() impacts the autograd engine and deactivate it. It will reduce memory usage and speed up computations but you won’t be able to backprop (which you don’t want in an eval script).

torch.no_grad() 会影响 autograd 引擎,并关闭它。这样会降低内存的使用并且加速计算。但是将不可以使用backprop.

3. nn.Sequential() vs nn.moduleList





3.1 nn.Sequential()

1. 模型的建立方式:

import torch
import torch.nn as nn
from torch.autograd import Variable ''' nn.Sequential
''' net1 = nn.Sequential()
net1.add_module('conv', nn.Conv2d(3, 3, 3))
# net1.add_module('conv2', nn.Conv2d(3, 3, 2))
net1.add_module('batchnorm', nn.BatchNorm2d(3))
net1.add_module('activation_layer', nn.ReLU()) print("net1:")
print(net1) # net1:
# Sequential(
# (conv): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1))
# (batchnorm): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
# (activation_layer): ReLU()
# ) net2 = nn.Sequential(
nn.Conv2d(3, 3, 3),
) print("net2:")
print(net2) # net2:
# Sequential(
# (0): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1))
# (1): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
# (2): ReLU()
# ) from collections import OrderedDict
net3 = nn.Sequential(OrderedDict([
('conv', nn.Conv2d(3, 3, 3)),
('batchnorm', nn.BatchNorm2d(3)),
('activation_layer', nn.ReLU())
])) print("net3:")
print(net3) # net3:
# Sequential(
# (conv): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1))
# (batchnorm): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
# (activation_layer): ReLU()
# )

2. 获取子Module对象

# get the sub module by the name or index
print("Get the sub module by the name or index:")
print(net3.conv) # Get the sub module by the name or index:
# Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1))
# Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1))
# Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1))

3. 调用模型

# use the model
input = Variable(torch.rand(1, 3, 4, 4))
output1 = net1(input)
output2 = net2(input)
output3 = net3(input)
output4 = net3.activation_layer(net1.batchnorm(net1.conv(input)))
print("output1:", output1)
print("output2:", output2)
print("output3:", output3)
print("output4:", output4)
# output1: tensor([[[[0.0000, 0.1066],
# [0.0075, 0.1379]], # [[0.0558, 0.9517],
# [0.0000, 0.0000]], # [[0.5355, 0.0000],
# [0.4478, 0.0000]]]], grad_fn=<ThresholdBackward0>)
# output2: tensor([[[[0.4227, 0.3509],
# [0.0868, 0.0000]], # [[0.0000, 0.0034],
# [0.0038, 0.0000]], # [[0.0000, 0.0000],
# [0.4002, 0.1882]]]], grad_fn=<ThresholdBackward0>)
# output3: tensor([[[[0.0000, 0.0000],
# [0.4779, 0.0000]], # [[0.0000, 1.5064],
# [0.0000, 0.1515]], # [[0.7417, 0.0000],
# [0.3366, 0.0000]]]], grad_fn=<ThresholdBackward0>)
# output4: tensor([[[[0.0000, 0.1066],
# [0.0075, 0.1379]], # [[0.0558, 0.9517],
# [0.0000, 0.0000]], # [[0.5355, 0.0000],
# [0.4478, 0.0000]]]], grad_fn=<ThresholdBackward0>)

3.2 nn.moduleList

它被设计用来存储任意数量的nn. module。


1. 可以采用迭代或下标索引方式获取Module

# 1. support index and enumerate
class MyModule(nn.Module):
def __init__(self):
super(MyModule, self).__init__()
self.linears = nn.ModuleList([nn.Linear(10, 10) for i in range(10)]) def forward(self, x):
for i, l in enumerate(self.linears):
x = self.linears[i // 2](x) + l(x)
return x

2. extend 和 append方法


extend是添加另一个modulelist ;


# 2. extend a modulelist; attend a module
class LinearNet(nn.Module):
"""docstring for LinearNet"""
def __init__(self, input_size, num_layers, layers_size, output_size):
super(LinearNet, self).__init__()
self.linears = nn.ModuleList([nn.Linear(input_size, layers_size)])
self.linears.extend([nn.Linear(layers_size, layers_size) for i in range(1, num_layers - 1)])
self.linears.append(nn.Linear(layers_size, output_size)) model1 = LinearNet(5, 3, 4, 2)
print("---model LinearNet---")
print() # ---model LinearNet---
# LinearNet(
# (linears): ModuleList(
# (0): Linear(in_features=5, out_features=4, bias=True)
# (1): Linear(in_features=4, out_features=4, bias=True)
# (2): Linear(in_features=4, out_features=2, bias=True)
# )
# )

3. 建立以及使用方法

# 3. create and use -- not implement the forward
modellist = nn.ModuleList([nn.Linear(3, 4), nn.ReLU(), nn.Linear(4, 2)])
input = Variable(torch.randn(1, 3))
for model in modellist:
input = model(input) # output = modellist(input) --> wrong 因为modellist没有实现forward方法

4. ModuleList与list的区别



class MyModule_list(nn.Module):
"""docstring for MyModule_list"""
def __init__(self):
super(MyModule_list, self).__init__()
self.list = [nn.Linear(3, 4), nn.ReLU()]
self.module_list = nn.ModuleList([nn.Conv2d(3, 3, 3), nn.ReLU()]) def forward(self):
model = MyModule_list()
print(model) # MyModule_list(
# (module_list): ModuleList(
# (0): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1))
# (1): ReLU()
# )
# )
# 只有ModuleList的信息,并没有list的信息 for name, param in model.named_parameters():
print(name, param.size()) # module_list.0.weight torch.Size([3, 3, 3, 3])
# module_list.0.bias torch.Size([3])
# 只有ModuleList的信息,并没有list的信息

