在caffe中训练的时候如果使用多GPU则直接在运行程序的时候指定GPU的index即可,但是在Pytorch中则需要在声明模型之后,对声明的模型进行初始化,如:

cnn = DataParallel(AlexNet())

之后直接运行Pytorch之后则默认使用所有的GPU,为了说明上述初始化的作用,我用了一组畸变图像的数据集,写了一个Resent的模块,过了50个epoch,对比一下实验耗时的差别,代码如下:

 # -*- coding: utf-8 -*-
# Implementation of https://arxiv.org/pdf/1512.03385.pdf/
# See section 4.2 for model architecture on CIFAR-10.
# Some part of the code was referenced below.
# https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py import os
from PIL import Image
import time import torch
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.autograd import Variable
import torch.utils.data as data
from torch.nn import DataParallel kwargs = {'num_workers': 1, 'pin_memory': True}
# def my dataloader, return the data and corresponding label def default_loader(path):
return Image.open(path).convert('RGB') class myImageFloder(data.Dataset): # Class inheritance
def __init__(self, root, label, transform=None, target_transform=None, loader=default_loader):
fh = open(label)
c = 0
imgs = []
class_names = []
for line in fh.readlines():
if c == 0:
class_names = [n.strip() for n in line.rstrip().split(' ')]
else:
cls = line.split() # cls is a list
fn = cls.pop(0)
if os.path.isfile(os.path.join(root, fn)):
imgs.append((fn, tuple([float(v) for v in cls]))) # imgs is the list,and the content is the tuple
# we can use the append way to append the element for list
c = c + 1
self.root = root
self.imgs = imgs
self.classes = class_names
self.transform = transform
self.target_transform = target_transform
self.loader = loader def __getitem__(self, index):
fn, label = self.imgs[index] # eventhough the imgs is just a list, it can return the elements of is
# in a proper way
img = self.loader(os.path.join(self.root, fn))
if self.transform is not None:
img = self.transform(img)
return img, torch.Tensor(label) def __len__(self):
return len(self.imgs) def getName(self):
return self.classes mytransform = transforms.Compose([transforms.ToTensor()]) # almost dont do any operation
train_data_root = "/home/ying/shiyongjie/rjp/generate_distortion_image_2016_03_15/0_Distorted_Image/Training"
test_data_root = "/home/ying/shiyongjie/rjp/generate_distortion_image_2016_03_15/0_Distorted_Image/Testing"
train_label = "/home/ying/shiyongjie/rjp/generate_distortion_image_2016_03_15/0_Distorted_Image/NameList_train.txt"
test_label = "/home/ying/shiyongjie/rjp/generate_distortion_image_2016_03_15/0_Distorted_Image/NameList_test.txt" train_loader = torch.utils.data.DataLoader(
myImageFloder(root=train_data_root, label=train_label, transform=mytransform),
batch_size=64, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader(
myImageFloder(root=test_data_root, label=test_label, transform=mytransform),
batch_size=64, shuffle=True, **kwargs) # 3x3 Convolution
def conv3x3(in_channels, out_channels, stride=1):
return nn.Conv2d(in_channels, out_channels, kernel_size=3,
stride=stride, padding=1, bias=False) # Residual Block
class ResidualBlock(nn.Module):
def __init__(self, in_channels, out_channels, stride=1, downsample=None):
super(ResidualBlock, self).__init__()
self.conv1 = conv3x3(in_channels, out_channels, stride) # kernel size is default 3
self.bn1 = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(out_channels, out_channels)
self.bn2 = nn.BatchNorm2d(out_channels)
self.downsample = downsample def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out # ResNet Module
class ResNet(nn.Module):
def __init__(self, block, layers, num_classes=1):
super(ResNet, self).__init__()
self.in_channels = 16
self.conv = conv3x3(3, 16)
self.bn = nn.BatchNorm2d(16)
self.relu = nn.ReLU(inplace=True)
self.layer1 = self.make_layer(block, 16, layers[0])
self.layer2 = self.make_layer(block, 32, layers[0], 2)
self.layer3 = self.make_layer(block, 64, layers[1], 2) # the input arg is blocks and the stride
self.layer4 = self.make_layer(block, 128, layers[1], 2)
self.layer5 = self.make_layer(block, 256, layers[1], 2)
self.avg_pool = nn.AvgPool2d(kernel_size=8,stride=8) # 2*2
self.fc = nn.Linear(256*2*2, num_classes) def make_layer(self, block, out_channels, blocks, stride=1):
downsample = None
if (stride != 1) or (self.in_channels != out_channels): # the input channel is not consistant with the output's
downsample = nn.Sequential( # do the downsample, def a conv, for example: 256*256*16 -> 128*128*32
conv3x3(self.in_channels, out_channels, stride=stride),
nn.BatchNorm2d(out_channels))
layers = []
layers.append(block(self.in_channels, out_channels, stride, downsample))
self.in_channels = out_channels # update the input channel and the output channel
for i in range(1, blocks): # reduce a block because the first block is already appened
layers.append(block(out_channels, out_channels)) # 32*32 -> 8*8
return nn.Sequential(*layers) def forward(self, x):
out = self.conv(x)
out = self.bn(out)
out = self.relu(out)
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out=self.layer4(out)
out=self.layer5(out)
out = self.avg_pool(out)
out = out.view(out.size(0), -1)
out = self.fc(out)
return out resnet = DataParallel(ResNet(ResidualBlock, [3, 3, 3]))
resnet.cuda() # Loss and Optimizer
criterion = nn.MSELoss()
lr = 0.001
optimizer = torch.optim.Adam(resnet.parameters(), lr=lr) # Training
start=time.clock()
for epoch in range(50):
for i, (images, labels) in enumerate(train_loader):
images = Variable(images.cuda())
labels = Variable(labels.cuda()) # Forward + Backward + Optimize
optimizer.zero_grad()
outputs = resnet(images)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step() if (i + 1) % 100 == 0:
print ("Epoch [%d/%d], Iter [%d/%d] Loss: %.4f" % (epoch + 1, 80, i + 1, 500, loss.data[0])) # Decaying Learning Rate
if (epoch + 1) % 20 == 0:
lr /= 3
optimizer = torch.optim.Adam(resnet.parameters(), lr=lr)
elapsed=time.clock()-start
print("time used:",elapsed)
# # Test
# correct = 0
# total = 0
# for images, labels in test_loader:
# images = Variable(images.cuda())
# outputs = resnet(images)
# _, predicted = torch.max(outputs.data, 1)
# total += labels.size(0)
# correct += (predicted.cpu() == labels).sum()
#
# print('Accuracy of the model on the test images: %d %%' % (100 * correct / total)) # Save the Model
torch.save(resnet.state_dict(), 'resnet.pkl')

作为对比实验,我们同时把ResNet的声明方式修改为

resnet = ResNet(ResidualBlock, [3, 3, 3])

其余不变,再运行程序的时候不指定GPU,直接python resnet.py,在声明DataParallel时,运行耗时结果如下:

('time used:', 17124.861335999998),watch -n 1 nvidia-smi确实显示占用两块GPU

在不声明DataParallel时,实验运行结果耗时如下:

('time used:', 30318.149681000003),watch -n 1 nvidia-smi确实显示占用一块GPU

可以看出,在声明DataParallel时时间压缩了近一半,所以在声明DataParalle是使用多GPU运行Pytorch的一种方法。

官方的doc也给出了多GPU使用的例子以及部分数据在GPU与部分数据在CPU上运行的例子

以下是两组实验结果的输出:

DataParalle初始化

 Epoch [1/80], Iter [100/500] Loss: 916.5578

 Epoch [1/80], Iter [200/500] Loss: 172.2591

 Epoch [1/80], Iter [300/500] Loss: 179.8360

 Epoch [1/80], Iter [400/500] Loss: 259.6867

 Epoch [1/80], Iter [500/500] Loss: 244.0616

 Epoch [1/80], Iter [600/500] Loss: 74.7015

 Epoch [1/80], Iter [700/500] Loss: 63.1657

 Epoch [1/80], Iter [800/500] Loss: 90.3517

 Epoch [1/80], Iter [900/500] Loss: 70.4562

 Epoch [2/80], Iter [100/500] Loss: 52.3249

 Epoch [2/80], Iter [200/500] Loss: 129.1855

 Epoch [2/80], Iter [300/500] Loss: 110.0157

 Epoch [2/80], Iter [400/500] Loss: 64.9313

 Epoch [2/80], Iter [500/500] Loss: 87.8385

 Epoch [2/80], Iter [600/500] Loss: 118.5828

 Epoch [2/80], Iter [700/500] Loss: 123.9575

 Epoch [2/80], Iter [800/500] Loss: 79.1908

 Epoch [2/80], Iter [900/500] Loss: 61.8099

 Epoch [3/80], Iter [100/500] Loss: 50.4294

 Epoch [3/80], Iter [200/500] Loss: 106.8135

 Epoch [3/80], Iter [300/500] Loss: 83.2198

 Epoch [3/80], Iter [400/500] Loss: 60.7116

 Epoch [3/80], Iter [500/500] Loss: 101.9553

 Epoch [3/80], Iter [600/500] Loss: 64.6967

 Epoch [3/80], Iter [700/500] Loss: 66.2446

 Epoch [3/80], Iter [800/500] Loss: 81.1825

 Epoch [3/80], Iter [900/500] Loss: 53.9905

 Epoch [4/80], Iter [100/500] Loss: 76.2977

 Epoch [4/80], Iter [200/500] Loss: 18.4255

 Epoch [4/80], Iter [300/500] Loss: 57.6188

 Epoch [4/80], Iter [400/500] Loss: 45.6235

 Epoch [4/80], Iter [500/500] Loss: 82.9265

 Epoch [4/80], Iter [600/500] Loss: 119.6085

 Epoch [4/80], Iter [700/500] Loss: 53.1355

 Epoch [4/80], Iter [800/500] Loss: 29.5248

 Epoch [4/80], Iter [900/500] Loss: 57.0401

 Epoch [5/80], Iter [100/500] Loss: 47.2671

 Epoch [5/80], Iter [200/500] Loss: 31.6928

 Epoch [5/80], Iter [300/500] Loss: 38.0040

 Epoch [5/80], Iter [400/500] Loss: 24.5184

 Epoch [5/80], Iter [500/500] Loss: 33.8515

 Epoch [5/80], Iter [600/500] Loss: 43.6560

 Epoch [5/80], Iter [700/500] Loss: 68.2500

 Epoch [5/80], Iter [800/500] Loss: 30.8259

 Epoch [5/80], Iter [900/500] Loss: 43.9696

 Epoch [6/80], Iter [100/500] Loss: 22.4120

 Epoch [6/80], Iter [200/500] Loss: 45.5722

 Epoch [6/80], Iter [300/500] Loss: 26.8331

 Epoch [6/80], Iter [400/500] Loss: 58.1139

 Epoch [6/80], Iter [500/500] Loss: 12.8767

 Epoch [6/80], Iter [600/500] Loss: 26.6725

 Epoch [6/80], Iter [700/500] Loss: 31.9800

 Epoch [6/80], Iter [800/500] Loss: 91.2332

 Epoch [6/80], Iter [900/500] Loss: 44.1361

 Epoch [7/80], Iter [100/500] Loss: 13.1401

 Epoch [7/80], Iter [200/500] Loss: 20.9435

 Epoch [7/80], Iter [300/500] Loss: 28.0944

 Epoch [7/80], Iter [400/500] Loss: 24.0240

 Epoch [7/80], Iter [500/500] Loss: 43.3279

 Epoch [7/80], Iter [600/500] Loss: 23.3077

 Epoch [7/80], Iter [700/500] Loss: 32.9658

 Epoch [7/80], Iter [800/500] Loss: 27.2044

 Epoch [7/80], Iter [900/500] Loss: 25.5850

 Epoch [8/80], Iter [100/500] Loss: 39.7642

 Epoch [8/80], Iter [200/500] Loss: 17.7421

 Epoch [8/80], Iter [300/500] Loss: 29.8965

 Epoch [8/80], Iter [400/500] Loss: 20.6153

 Epoch [8/80], Iter [500/500] Loss: 43.0224

 Epoch [8/80], Iter [600/500] Loss: 58.1552

 Epoch [8/80], Iter [700/500] Loss: 19.1967

 Epoch [8/80], Iter [800/500] Loss: 34.9122

 Epoch [8/80], Iter [900/500] Loss: 15.0651

 Epoch [9/80], Iter [100/500] Loss: 18.5950

 Epoch [9/80], Iter [200/500] Loss: 36.1891

 Epoch [9/80], Iter [300/500] Loss: 22.4936

 Epoch [9/80], Iter [400/500] Loss: 14.8044

 Epoch [9/80], Iter [500/500] Loss: 16.6958

 Epoch [9/80], Iter [600/500] Loss: 24.8461

 Epoch [9/80], Iter [700/500] Loss: 13.7112

 Epoch [9/80], Iter [800/500] Loss: 21.2906

 Epoch [9/80], Iter [900/500] Loss: 31.6950

 Epoch [10/80], Iter [100/500] Loss: 20.7707

 Epoch [10/80], Iter [200/500] Loss: 15.6260

 Epoch [10/80], Iter [300/500] Loss: 28.5737

 Epoch [10/80], Iter [400/500] Loss: 36.6791

 Epoch [10/80], Iter [500/500] Loss: 38.9839

 Epoch [10/80], Iter [600/500] Loss: 14.4459

 Epoch [10/80], Iter [700/500] Loss: 10.0907

 Epoch [10/80], Iter [800/500] Loss: 17.9035

 Epoch [10/80], Iter [900/500] Loss: 24.5759

 Epoch [11/80], Iter [100/500] Loss: 19.8531

 Epoch [11/80], Iter [200/500] Loss: 15.7126

 Epoch [11/80], Iter [300/500] Loss: 18.0198

 Epoch [11/80], Iter [400/500] Loss: 19.3038

 Epoch [11/80], Iter [500/500] Loss: 27.4435

 Epoch [11/80], Iter [600/500] Loss: 18.1086

 Epoch [11/80], Iter [700/500] Loss: 10.8124

 Epoch [11/80], Iter [800/500] Loss: 31.2389

 Epoch [11/80], Iter [900/500] Loss: 14.4881

 Epoch [12/80], Iter [100/500] Loss: 10.6320

 Epoch [12/80], Iter [200/500] Loss: 26.8394

 Epoch [12/80], Iter [300/500] Loss: 16.0246

 Epoch [12/80], Iter [400/500] Loss: 16.3263

 Epoch [12/80], Iter [500/500] Loss: 24.5880

 Epoch [12/80], Iter [600/500] Loss: 15.7498

 Epoch [12/80], Iter [700/500] Loss: 11.4933

 Epoch [12/80], Iter [800/500] Loss: 9.7252

 Epoch [12/80], Iter [900/500] Loss: 31.6774

 Epoch [13/80], Iter [100/500] Loss: 21.1929

 Epoch [13/80], Iter [200/500] Loss: 17.0953

 Epoch [13/80], Iter [300/500] Loss: 21.1883

 Epoch [13/80], Iter [400/500] Loss: 15.9005

 Epoch [13/80], Iter [500/500] Loss: 14.7924

 Epoch [13/80], Iter [600/500] Loss: 12.4324

 Epoch [13/80], Iter [700/500] Loss: 12.0840

 Epoch [13/80], Iter [800/500] Loss: 30.9664

 Epoch [13/80], Iter [900/500] Loss: 14.9601

 Epoch [14/80], Iter [100/500] Loss: 6.5126

 Epoch [14/80], Iter [200/500] Loss: 11.3227

 Epoch [14/80], Iter [300/500] Loss: 12.9980

 Epoch [14/80], Iter [400/500] Loss: 13.8523

 Epoch [14/80], Iter [500/500] Loss: 10.6771

 Epoch [14/80], Iter [600/500] Loss: 7.3953

 Epoch [14/80], Iter [700/500] Loss: 14.6829

 Epoch [14/80], Iter [800/500] Loss: 15.6956

 Epoch [14/80], Iter [900/500] Loss: 21.8876

 Epoch [15/80], Iter [100/500] Loss: 5.1943

 Epoch [15/80], Iter [200/500] Loss: 13.0731

 Epoch [15/80], Iter [300/500] Loss: 6.8931

 Epoch [15/80], Iter [400/500] Loss: 15.3212

 Epoch [15/80], Iter [500/500] Loss: 8.1775

 Epoch [15/80], Iter [600/500] Loss: 11.5664

 Epoch [15/80], Iter [700/500] Loss: 5.5951

 Epoch [15/80], Iter [800/500] Loss: 10.9075

 Epoch [15/80], Iter [900/500] Loss: 14.8503

 Epoch [16/80], Iter [100/500] Loss: 19.5184

 Epoch [16/80], Iter [200/500] Loss: 10.3570

 Epoch [16/80], Iter [300/500] Loss: 10.0997

 Epoch [16/80], Iter [400/500] Loss: 9.7350

 Epoch [16/80], Iter [500/500] Loss: 11.3000

 Epoch [16/80], Iter [600/500] Loss: 21.6213

 Epoch [16/80], Iter [700/500] Loss: 9.7907

 Epoch [16/80], Iter [800/500] Loss: 10.0128

 Epoch [16/80], Iter [900/500] Loss: 10.7869

 Epoch [17/80], Iter [100/500] Loss: 9.2015

 Epoch [17/80], Iter [200/500] Loss: 7.3021

 Epoch [17/80], Iter [300/500] Loss: 5.9662

 Epoch [17/80], Iter [400/500] Loss: 17.5215

 Epoch [17/80], Iter [500/500] Loss: 7.3349

 Epoch [17/80], Iter [600/500] Loss: 8.5626

 Epoch [17/80], Iter [700/500] Loss: 12.7575

 Epoch [17/80], Iter [800/500] Loss: 10.7792

 Epoch [17/80], Iter [900/500] Loss: 7.0889

 Epoch [18/80], Iter [100/500] Loss: 10.5613

 Epoch [18/80], Iter [200/500] Loss: 3.0777

 Epoch [18/80], Iter [300/500] Loss: 6.3598

 Epoch [18/80], Iter [400/500] Loss: 7.9515

 Epoch [18/80], Iter [500/500] Loss: 10.8023

 Epoch [18/80], Iter [600/500] Loss: 7.3443

 Epoch [18/80], Iter [700/500] Loss: 8.0862

 Epoch [18/80], Iter [800/500] Loss: 15.2795

 Epoch [18/80], Iter [900/500] Loss: 10.2788

 Epoch [19/80], Iter [100/500] Loss: 5.0786

 Epoch [19/80], Iter [200/500] Loss: 8.8248

 Epoch [19/80], Iter [300/500] Loss: 4.9262

 Epoch [19/80], Iter [400/500] Loss: 7.8992

 Epoch [19/80], Iter [500/500] Loss: 13.1279

 Epoch [19/80], Iter [600/500] Loss: 8.2703

 Epoch [19/80], Iter [700/500] Loss: 4.1547

 Epoch [19/80], Iter [800/500] Loss: 9.0542

 Epoch [19/80], Iter [900/500] Loss: 6.7904

 Epoch [20/80], Iter [100/500] Loss: 8.6150

 Epoch [20/80], Iter [200/500] Loss: 3.7212

 Epoch [20/80], Iter [300/500] Loss: 6.2832

 Epoch [20/80], Iter [400/500] Loss: 10.1591

 Epoch [20/80], Iter [500/500] Loss: 9.7668

 Epoch [20/80], Iter [600/500] Loss: 4.7498

 Epoch [20/80], Iter [700/500] Loss: 4.8831

 Epoch [20/80], Iter [800/500] Loss: 7.7877

 Epoch [20/80], Iter [900/500] Loss: 8.5114

 Epoch [21/80], Iter [100/500] Loss: 2.1853

 Epoch [21/80], Iter [200/500] Loss: 5.8741

 Epoch [21/80], Iter [300/500] Loss: 5.3676

 Epoch [21/80], Iter [400/500] Loss: 3.1155

 Epoch [21/80], Iter [500/500] Loss: 4.2433

 Epoch [21/80], Iter [600/500] Loss: 1.9783

 Epoch [21/80], Iter [700/500] Loss: 2.7622

 Epoch [21/80], Iter [800/500] Loss: 2.0112

 Epoch [21/80], Iter [900/500] Loss: 2.2692

 Epoch [22/80], Iter [100/500] Loss: 2.1882

 Epoch [22/80], Iter [200/500] Loss: 4.2540

 Epoch [22/80], Iter [300/500] Loss: 4.0126

 Epoch [22/80], Iter [400/500] Loss: 2.2220

 Epoch [22/80], Iter [500/500] Loss: 2.4755

 Epoch [22/80], Iter [600/500] Loss: 3.0793

 Epoch [22/80], Iter [700/500] Loss: 1.9128

 Epoch [22/80], Iter [800/500] Loss: 4.8721

 Epoch [22/80], Iter [900/500] Loss: 2.1349

 Epoch [23/80], Iter [100/500] Loss: 1.8705

 Epoch [23/80], Iter [200/500] Loss: 2.4326

 Epoch [23/80], Iter [300/500] Loss: 1.5636

 Epoch [23/80], Iter [400/500] Loss: 2.0465

 Epoch [23/80], Iter [500/500] Loss: 1.5183

 Epoch [23/80], Iter [600/500] Loss: 2.2711

 Epoch [23/80], Iter [700/500] Loss: 2.8997

 Epoch [23/80], Iter [800/500] Loss: 2.6150

 Epoch [23/80], Iter [900/500] Loss: 2.8083

 Epoch [24/80], Iter [100/500] Loss: 2.7177

 Epoch [24/80], Iter [200/500] Loss: 3.2044

 Epoch [24/80], Iter [300/500] Loss: 3.8137

 Epoch [24/80], Iter [400/500] Loss: 1.9400

 Epoch [24/80], Iter [500/500] Loss: 2.3550

 Epoch [24/80], Iter [600/500] Loss: 1.6304

 Epoch [24/80], Iter [700/500] Loss: 1.1287

 Epoch [24/80], Iter [800/500] Loss: 2.1436

 Epoch [24/80], Iter [900/500] Loss: 1.3761

 Epoch [25/80], Iter [100/500] Loss: 1.9115

 Epoch [25/80], Iter [200/500] Loss: 0.9423

 Epoch [25/80], Iter [300/500] Loss: 1.1732

 Epoch [25/80], Iter [400/500] Loss: 1.8946

 Epoch [25/80], Iter [500/500] Loss: 1.4359

 Epoch [25/80], Iter [600/500] Loss: 2.7499

 Epoch [25/80], Iter [700/500] Loss: 3.2734

 Epoch [25/80], Iter [800/500] Loss: 1.5863

 Epoch [25/80], Iter [900/500] Loss: 2.8276

 Epoch [26/80], Iter [100/500] Loss: 3.3783

 Epoch [26/80], Iter [200/500] Loss: 1.6336

 Epoch [26/80], Iter [300/500] Loss: 1.8298

 Epoch [26/80], Iter [400/500] Loss: 1.1775

 Epoch [26/80], Iter [500/500] Loss: 2.5811

 Epoch [26/80], Iter [600/500] Loss: 1.2587

 Epoch [26/80], Iter [700/500] Loss: 2.3547

 Epoch [26/80], Iter [800/500] Loss: 3.2238

 Epoch [26/80], Iter [900/500] Loss: 1.8571

 Epoch [27/80], Iter [100/500] Loss: 1.9582

 Epoch [27/80], Iter [200/500] Loss: 0.8752

 Epoch [27/80], Iter [300/500] Loss: 1.5140

 Epoch [27/80], Iter [400/500] Loss: 1.4624

 Epoch [27/80], Iter [500/500] Loss: 3.6735

 Epoch [27/80], Iter [600/500] Loss: 2.5618

 Epoch [27/80], Iter [700/500] Loss: 1.3707

 Epoch [27/80], Iter [800/500] Loss: 1.2286

 Epoch [27/80], Iter [900/500] Loss: 2.4623

 Epoch [28/80], Iter [100/500] Loss: 0.8966

 Epoch [28/80], Iter [200/500] Loss: 1.4363

 Epoch [28/80], Iter [300/500] Loss: 1.3229

 Epoch [28/80], Iter [400/500] Loss: 1.4402

 Epoch [28/80], Iter [500/500] Loss: 1.4920

 Epoch [28/80], Iter [600/500] Loss: 1.9604

 Epoch [28/80], Iter [700/500] Loss: 3.1165

 Epoch [28/80], Iter [800/500] Loss: 1.0391

 Epoch [28/80], Iter [900/500] Loss: 2.5201

 Epoch [29/80], Iter [100/500] Loss: 1.8787

 Epoch [29/80], Iter [200/500] Loss: 0.9840

 Epoch [29/80], Iter [300/500] Loss: 1.4460

 Epoch [29/80], Iter [400/500] Loss: 2.2886

 Epoch [29/80], Iter [500/500] Loss: 1.4231

 Epoch [29/80], Iter [600/500] Loss: 1.4980

 Epoch [29/80], Iter [700/500] Loss: 2.3995

 Epoch [29/80], Iter [800/500] Loss: 1.7662

 Epoch [29/80], Iter [900/500] Loss: 2.3659

 Epoch [30/80], Iter [100/500] Loss: 1.9505

 Epoch [30/80], Iter [200/500] Loss: 1.1663

 Epoch [30/80], Iter [300/500] Loss: 0.9471

 Epoch [30/80], Iter [400/500] Loss: 0.9364

 Epoch [30/80], Iter [500/500] Loss: 1.0124

 Epoch [30/80], Iter [600/500] Loss: 1.2437

 Epoch [30/80], Iter [700/500] Loss: 0.8796

 Epoch [30/80], Iter [800/500] Loss: 1.2183

 Epoch [30/80], Iter [900/500] Loss: 2.3959

 Epoch [31/80], Iter [100/500] Loss: 1.4337

 Epoch [31/80], Iter [200/500] Loss: 1.1861

 Epoch [31/80], Iter [300/500] Loss: 1.2915

 Epoch [31/80], Iter [400/500] Loss: 1.0188

 Epoch [31/80], Iter [500/500] Loss: 2.2067

 Epoch [31/80], Iter [600/500] Loss: 2.6476

 Epoch [31/80], Iter [700/500] Loss: 1.1402

 Epoch [31/80], Iter [800/500] Loss: 1.4248

 Epoch [31/80], Iter [900/500] Loss: 1.0669

 Epoch [32/80], Iter [100/500] Loss: 1.5955

 Epoch [32/80], Iter [200/500] Loss: 1.7216

 Epoch [32/80], Iter [300/500] Loss: 1.2304

 Epoch [32/80], Iter [400/500] Loss: 1.7058

 Epoch [32/80], Iter [500/500] Loss: 1.2115

 Epoch [32/80], Iter [600/500] Loss: 1.6176

 Epoch [32/80], Iter [700/500] Loss: 1.3043

 Epoch [32/80], Iter [800/500] Loss: 1.9501

 Epoch [32/80], Iter [900/500] Loss: 1.9035

 Epoch [33/80], Iter [100/500] Loss: 1.9505

 Epoch [33/80], Iter [200/500] Loss: 1.5603

 Epoch [33/80], Iter [300/500] Loss: 1.5528

 Epoch [33/80], Iter [400/500] Loss: 1.4192

 Epoch [33/80], Iter [500/500] Loss: 1.2211

 Epoch [33/80], Iter [600/500] Loss: 1.3927

 Epoch [33/80], Iter [700/500] Loss: 2.3885

 Epoch [33/80], Iter [800/500] Loss: 1.0948

 Epoch [33/80], Iter [900/500] Loss: 1.6951

 Epoch [34/80], Iter [100/500] Loss: 0.9534

 Epoch [34/80], Iter [200/500] Loss: 0.7364

 Epoch [34/80], Iter [300/500] Loss: 1.2372

 Epoch [34/80], Iter [400/500] Loss: 1.6718

 Epoch [34/80], Iter [500/500] Loss: 0.7804

 Epoch [34/80], Iter [600/500] Loss: 2.1848

 Epoch [34/80], Iter [700/500] Loss: 0.6333

 Epoch [34/80], Iter [800/500] Loss: 1.6399

 Epoch [34/80], Iter [900/500] Loss: 0.9555

 Epoch [35/80], Iter [100/500] Loss: 1.5851

 Epoch [35/80], Iter [200/500] Loss: 3.7824

 Epoch [35/80], Iter [300/500] Loss: 2.5642

 Epoch [35/80], Iter [400/500] Loss: 0.8965

 Epoch [35/80], Iter [500/500] Loss: 1.9092

 Epoch [35/80], Iter [600/500] Loss: 1.3729

 Epoch [35/80], Iter [700/500] Loss: 2.2079

 Epoch [35/80], Iter [800/500] Loss: 0.9051

 Epoch [35/80], Iter [900/500] Loss: 1.1845

 Epoch [36/80], Iter [100/500] Loss: 0.8240

 Epoch [36/80], Iter [200/500] Loss: 1.1929

 Epoch [36/80], Iter [300/500] Loss: 1.7051

 Epoch [36/80], Iter [400/500] Loss: 0.7341

 Epoch [36/80], Iter [500/500] Loss: 0.8078

 Epoch [36/80], Iter [600/500] Loss: 0.7525

 Epoch [36/80], Iter [700/500] Loss: 1.5739

 Epoch [36/80], Iter [800/500] Loss: 1.3938

 Epoch [36/80], Iter [900/500] Loss: 0.7145

 Epoch [37/80], Iter [100/500] Loss: 0.9577

 Epoch [37/80], Iter [200/500] Loss: 0.9464

 Epoch [37/80], Iter [300/500] Loss: 1.0931

 Epoch [37/80], Iter [400/500] Loss: 1.0390

 Epoch [37/80], Iter [500/500] Loss: 1.3472

 Epoch [37/80], Iter [600/500] Loss: 0.6312

 Epoch [37/80], Iter [700/500] Loss: 0.6754

 Epoch [37/80], Iter [800/500] Loss: 0.5888

 Epoch [37/80], Iter [900/500] Loss: 3.1377

 Epoch [38/80], Iter [100/500] Loss: 0.8339

 Epoch [38/80], Iter [200/500] Loss: 0.9345

 Epoch [38/80], Iter [300/500] Loss: 0.6615

 Epoch [38/80], Iter [400/500] Loss: 1.6327

 Epoch [38/80], Iter [500/500] Loss: 0.4701

 Epoch [38/80], Iter [600/500] Loss: 1.1513

 Epoch [38/80], Iter [700/500] Loss: 0.9013

 Epoch [38/80], Iter [800/500] Loss: 2.7680

 Epoch [38/80], Iter [900/500] Loss: 1.2733

 Epoch [39/80], Iter [100/500] Loss: 3.0368

 Epoch [39/80], Iter [200/500] Loss: 1.5569

 Epoch [39/80], Iter [300/500] Loss: 0.5049

 Epoch [39/80], Iter [400/500] Loss: 0.4075

 Epoch [39/80], Iter [500/500] Loss: 0.9771

 Epoch [39/80], Iter [600/500] Loss: 0.9003

 Epoch [39/80], Iter [700/500] Loss: 1.6323

 Epoch [39/80], Iter [800/500] Loss: 0.4881

 Epoch [39/80], Iter [900/500] Loss: 2.1344

 Epoch [40/80], Iter [100/500] Loss: 1.2439

 Epoch [40/80], Iter [200/500] Loss: 1.3419

 Epoch [40/80], Iter [300/500] Loss: 0.9575

 Epoch [40/80], Iter [400/500] Loss: 1.4438

 Epoch [40/80], Iter [500/500] Loss: 0.8559

 Epoch [40/80], Iter [600/500] Loss: 1.0400

 Epoch [40/80], Iter [700/500] Loss: 0.9063

 Epoch [40/80], Iter [800/500] Loss: 1.0714

 Epoch [40/80], Iter [900/500] Loss: 0.5098

 Epoch [41/80], Iter [100/500] Loss: 0.5906

 Epoch [41/80], Iter [200/500] Loss: 0.6610

 Epoch [41/80], Iter [300/500] Loss: 0.4230

 Epoch [41/80], Iter [400/500] Loss: 0.6014

 Epoch [41/80], Iter [500/500] Loss: 0.3004

 Epoch [41/80], Iter [600/500] Loss: 0.5606

 Epoch [41/80], Iter [700/500] Loss: 0.4994

 Epoch [41/80], Iter [800/500] Loss: 0.8664

 Epoch [41/80], Iter [900/500] Loss: 0.5302

 Epoch [42/80], Iter [100/500] Loss: 0.2961

 Epoch [42/80], Iter [200/500] Loss: 0.2826

 Epoch [42/80], Iter [300/500] Loss: 0.3575

 Epoch [42/80], Iter [400/500] Loss: 0.3224

 Epoch [42/80], Iter [500/500] Loss: 0.6851

 Epoch [42/80], Iter [600/500] Loss: 0.2997

 Epoch [42/80], Iter [700/500] Loss: 0.3907

 Epoch [42/80], Iter [800/500] Loss: 0.4437

 Epoch [42/80], Iter [900/500] Loss: 0.4847

 Epoch [43/80], Iter [100/500] Loss: 0.5418

 Epoch [43/80], Iter [200/500] Loss: 0.4099

 Epoch [43/80], Iter [300/500] Loss: 0.3339

 Epoch [43/80], Iter [400/500] Loss: 0.5546

 Epoch [43/80], Iter [500/500] Loss: 0.5867

 Epoch [43/80], Iter [600/500] Loss: 0.3540

 Epoch [43/80], Iter [700/500] Loss: 0.4656

 Epoch [43/80], Iter [800/500] Loss: 0.2922

 Epoch [43/80], Iter [900/500] Loss: 0.3042

 Epoch [44/80], Iter [100/500] Loss: 0.6309

 Epoch [44/80], Iter [200/500] Loss: 0.2412

 Epoch [44/80], Iter [300/500] Loss: 0.5505

 Epoch [44/80], Iter [400/500] Loss: 0.4133

 Epoch [44/80], Iter [500/500] Loss: 0.4317

 Epoch [44/80], Iter [600/500] Loss: 0.4152

 Epoch [44/80], Iter [700/500] Loss: 0.6375

 Epoch [44/80], Iter [800/500] Loss: 0.3283

 Epoch [44/80], Iter [900/500] Loss: 0.4399

 Epoch [45/80], Iter [100/500] Loss: 0.2777

 Epoch [45/80], Iter [200/500] Loss: 0.3131

 Epoch [45/80], Iter [300/500] Loss: 0.2451

 Epoch [45/80], Iter [400/500] Loss: 0.5350

 Epoch [45/80], Iter [500/500] Loss: 0.2501

 Epoch [45/80], Iter [600/500] Loss: 0.2076

 Epoch [45/80], Iter [700/500] Loss: 0.2317

 Epoch [45/80], Iter [800/500] Loss: 0.8772

 Epoch [45/80], Iter [900/500] Loss: 0.4162

 Epoch [46/80], Iter [100/500] Loss: 0.3190

 Epoch [46/80], Iter [200/500] Loss: 0.2458

 Epoch [46/80], Iter [300/500] Loss: 0.2976

 Epoch [46/80], Iter [400/500] Loss: 0.3712

 Epoch [46/80], Iter [500/500] Loss: 0.4305

 Epoch [46/80], Iter [600/500] Loss: 0.5143

 Epoch [46/80], Iter [700/500] Loss: 0.2622

 Epoch [46/80], Iter [800/500] Loss: 0.5331

 Epoch [46/80], Iter [900/500] Loss: 0.3598

 Epoch [47/80], Iter [100/500] Loss: 0.2180

 Epoch [47/80], Iter [200/500] Loss: 0.2275

 Epoch [47/80], Iter [300/500] Loss: 0.5302

 Epoch [47/80], Iter [400/500] Loss: 0.3535

 Epoch [47/80], Iter [500/500] Loss: 0.5790

 Epoch [47/80], Iter [600/500] Loss: 0.3741

 Epoch [47/80], Iter [700/500] Loss: 0.5120

 Epoch [47/80], Iter [800/500] Loss: 0.6204

 Epoch [47/80], Iter [900/500] Loss: 0.4902

 Epoch [48/80], Iter [100/500] Loss: 0.2668

 Epoch [48/80], Iter [200/500] Loss: 0.5693

 Epoch [48/80], Iter [300/500] Loss: 0.3328

 Epoch [48/80], Iter [400/500] Loss: 0.2399

 Epoch [48/80], Iter [500/500] Loss: 0.3160

 Epoch [48/80], Iter [600/500] Loss: 0.2944

 Epoch [48/80], Iter [700/500] Loss: 0.2742

 Epoch [48/80], Iter [800/500] Loss: 0.5297

 Epoch [48/80], Iter [900/500] Loss: 0.3755

 Epoch [49/80], Iter [100/500] Loss: 0.2658

 Epoch [49/80], Iter [200/500] Loss: 0.2223

 Epoch [49/80], Iter [300/500] Loss: 0.4348

 Epoch [49/80], Iter [400/500] Loss: 0.2313

 Epoch [49/80], Iter [500/500] Loss: 0.2838

 Epoch [49/80], Iter [600/500] Loss: 0.3415

 Epoch [49/80], Iter [700/500] Loss: 0.3633

 Epoch [49/80], Iter [800/500] Loss: 0.3768

 Epoch [49/80], Iter [900/500] Loss: 0.5177

 Epoch [50/80], Iter [100/500] Loss: 0.3538

 Epoch [50/80], Iter [200/500] Loss: 0.2759

 Epoch [50/80], Iter [300/500] Loss: 0.2255

 Epoch [50/80], Iter [400/500] Loss: 0.3148

 Epoch [50/80], Iter [500/500] Loss: 0.4502

 Epoch [50/80], Iter [600/500] Loss: 0.3382

 Epoch [50/80], Iter [700/500] Loss: 0.8207

 Epoch [50/80], Iter [800/500] Loss: 0.3541

 Epoch [50/80], Iter [900/500] Loss: 0.4090

 ('time used:', 17124.861335999998)

未被DaraParallel初始化

 Epoch [1/80], Iter [100/500] Loss: 635.6779

 Epoch [1/80], Iter [200/500] Loss: 247.5514

 Epoch [1/80], Iter [300/500] Loss: 231.7609

 Epoch [1/80], Iter [400/500] Loss: 198.7304

 Epoch [1/80], Iter [500/500] Loss: 207.1028

 Epoch [1/80], Iter [600/500] Loss: 114.7708

 Epoch [1/80], Iter [700/500] Loss: 126.9886

 Epoch [1/80], Iter [800/500] Loss: 160.8622

 Epoch [1/80], Iter [900/500] Loss: 153.8121

 Epoch [2/80], Iter [100/500] Loss: 106.6578

 Epoch [2/80], Iter [200/500] Loss: 91.5044

 Epoch [2/80], Iter [300/500] Loss: 111.4231

 Epoch [2/80], Iter [400/500] Loss: 50.7004

 Epoch [2/80], Iter [500/500] Loss: 58.9242

 Epoch [2/80], Iter [600/500] Loss: 55.2035

 Epoch [2/80], Iter [700/500] Loss: 26.7637

 Epoch [2/80], Iter [800/500] Loss: 52.5472

 Epoch [2/80], Iter [900/500] Loss: 51.7907

 Epoch [3/80], Iter [100/500] Loss: 35.7970

 Epoch [3/80], Iter [200/500] Loss: 59.1204

 Epoch [3/80], Iter [300/500] Loss: 70.5727

 Epoch [3/80], Iter [400/500] Loss: 50.1149

 Epoch [3/80], Iter [500/500] Loss: 26.3628

 Epoch [3/80], Iter [600/500] Loss: 67.3355

 Epoch [3/80], Iter [700/500] Loss: 56.8271

 Epoch [3/80], Iter [800/500] Loss: 46.5803

 Epoch [3/80], Iter [900/500] Loss: 34.9568

 Epoch [4/80], Iter [100/500] Loss: 67.0837

 Epoch [4/80], Iter [200/500] Loss: 36.8596

 Epoch [4/80], Iter [300/500] Loss: 37.6830

 Epoch [4/80], Iter [400/500] Loss: 52.1378

 Epoch [4/80], Iter [500/500] Loss: 104.5909

 Epoch [4/80], Iter [600/500] Loss: 71.3509

 Epoch [4/80], Iter [700/500] Loss: 28.4496

 Epoch [4/80], Iter [800/500] Loss: 56.1399

 Epoch [4/80], Iter [900/500] Loss: 58.7510

 Epoch [5/80], Iter [100/500] Loss: 42.5710

 Epoch [5/80], Iter [200/500] Loss: 25.5430

 Epoch [5/80], Iter [300/500] Loss: 25.9271

 Epoch [5/80], Iter [400/500] Loss: 75.8942

 Epoch [5/80], Iter [500/500] Loss: 70.6782

 Epoch [5/80], Iter [600/500] Loss: 10.7801

 Epoch [5/80], Iter [700/500] Loss: 29.9416

 Epoch [5/80], Iter [800/500] Loss: 47.0781

 Epoch [5/80], Iter [900/500] Loss: 45.4692

 Epoch [6/80], Iter [100/500] Loss: 51.3811

 Epoch [6/80], Iter [200/500] Loss: 30.6207

 Epoch [6/80], Iter [300/500] Loss: 35.4928

 Epoch [6/80], Iter [400/500] Loss: 37.9467

 Epoch [6/80], Iter [500/500] Loss: 36.7505

 Epoch [6/80], Iter [600/500] Loss: 64.3528

 Epoch [6/80], Iter [700/500] Loss: 73.6308

 Epoch [6/80], Iter [800/500] Loss: 33.1290

 Epoch [6/80], Iter [900/500] Loss: 34.2442

 Epoch [7/80], Iter [100/500] Loss: 34.9157

 Epoch [7/80], Iter [200/500] Loss: 26.8041

 Epoch [7/80], Iter [300/500] Loss: 43.5796

 Epoch [7/80], Iter [400/500] Loss: 31.5104

 Epoch [7/80], Iter [500/500] Loss: 41.2132

 Epoch [7/80], Iter [600/500] Loss: 23.1634

 Epoch [7/80], Iter [700/500] Loss: 26.7399

 Epoch [7/80], Iter [800/500] Loss: 60.4979

 Epoch [7/80], Iter [900/500] Loss: 32.8528

 Epoch [8/80], Iter [100/500] Loss: 36.6079

 Epoch [8/80], Iter [200/500] Loss: 49.1552

 Epoch [8/80], Iter [300/500] Loss: 21.2926

 Epoch [8/80], Iter [400/500] Loss: 33.5335

 Epoch [8/80], Iter [500/500] Loss: 50.1770

 Epoch [8/80], Iter [600/500] Loss: 21.9908

 Epoch [8/80], Iter [700/500] Loss: 40.2040

 Epoch [8/80], Iter [800/500] Loss: 22.5460

 Epoch [8/80], Iter [900/500] Loss: 43.9564

 Epoch [9/80], Iter [100/500] Loss: 19.8116

 Epoch [9/80], Iter [200/500] Loss: 8.5169

 Epoch [9/80], Iter [300/500] Loss: 37.0475

 Epoch [9/80], Iter [400/500] Loss: 74.2606

 Epoch [9/80], Iter [500/500] Loss: 16.3256

 Epoch [9/80], Iter [600/500] Loss: 26.0609

 Epoch [9/80], Iter [700/500] Loss: 24.3721

 Epoch [9/80], Iter [800/500] Loss: 37.5132

 Epoch [9/80], Iter [900/500] Loss: 27.4818

 Epoch [10/80], Iter [100/500] Loss: 11.7654

 Epoch [10/80], Iter [200/500] Loss: 9.3536

 Epoch [10/80], Iter [300/500] Loss: 11.6718

 Epoch [10/80], Iter [400/500] Loss: 24.4423

 Epoch [10/80], Iter [500/500] Loss: 25.6966

 Epoch [10/80], Iter [600/500] Loss: 35.2358

 Epoch [10/80], Iter [700/500] Loss: 17.2685

 Epoch [10/80], Iter [800/500] Loss: 22.3965

 Epoch [10/80], Iter [900/500] Loss: 42.6901

 Epoch [11/80], Iter [100/500] Loss: 17.9832

 Epoch [11/80], Iter [200/500] Loss: 18.8705

 Epoch [11/80], Iter [300/500] Loss: 25.3700

 Epoch [11/80], Iter [400/500] Loss: 10.8511

 Epoch [11/80], Iter [500/500] Loss: 18.3028

 Epoch [11/80], Iter [600/500] Loss: 23.2316

 Epoch [11/80], Iter [700/500] Loss: 10.2498

 Epoch [11/80], Iter [800/500] Loss: 14.7609

 Epoch [11/80], Iter [900/500] Loss: 20.1801

 Epoch [12/80], Iter [100/500] Loss: 23.8675

 Epoch [12/80], Iter [200/500] Loss: 15.7924

 Epoch [12/80], Iter [300/500] Loss: 13.7092

 Epoch [12/80], Iter [400/500] Loss: 12.0196

 Epoch [12/80], Iter [500/500] Loss: 7.2408

 Epoch [12/80], Iter [600/500] Loss: 10.7912

 Epoch [12/80], Iter [700/500] Loss: 11.9665

 Epoch [12/80], Iter [800/500] Loss: 13.7599

 Epoch [12/80], Iter [900/500] Loss: 18.3869

 Epoch [13/80], Iter [100/500] Loss: 11.1715

 Epoch [13/80], Iter [200/500] Loss: 17.6397

 Epoch [13/80], Iter [300/500] Loss: 9.3256

 Epoch [13/80], Iter [400/500] Loss: 12.7995

 Epoch [13/80], Iter [500/500] Loss: 7.8598

 Epoch [13/80], Iter [600/500] Loss: 10.7001

 Epoch [13/80], Iter [700/500] Loss: 26.3672

 Epoch [13/80], Iter [800/500] Loss: 15.4815

 Epoch [13/80], Iter [900/500] Loss: 14.0478

 Epoch [14/80], Iter [100/500] Loss: 16.0473

 Epoch [14/80], Iter [200/500] Loss: 4.7192

 Epoch [14/80], Iter [300/500] Loss: 10.7586

 Epoch [14/80], Iter [400/500] Loss: 13.6734

 Epoch [14/80], Iter [500/500] Loss: 9.3228

 Epoch [14/80], Iter [600/500] Loss: 5.5830

 Epoch [14/80], Iter [700/500] Loss: 7.5252

 Epoch [14/80], Iter [800/500] Loss: 7.6239

 Epoch [14/80], Iter [900/500] Loss: 7.1024

 Epoch [15/80], Iter [100/500] Loss: 17.5188

 Epoch [15/80], Iter [200/500] Loss: 11.8842

 Epoch [15/80], Iter [300/500] Loss: 9.0330

 Epoch [15/80], Iter [400/500] Loss: 11.7120

 Epoch [15/80], Iter [500/500] Loss: 17.0862

 Epoch [15/80], Iter [600/500] Loss: 11.4103

 Epoch [15/80], Iter [700/500] Loss: 12.2746

 Epoch [15/80], Iter [800/500] Loss: 13.6224

 Epoch [15/80], Iter [900/500] Loss: 12.7686

 Epoch [16/80], Iter [100/500] Loss: 5.5978

 Epoch [16/80], Iter [200/500] Loss: 12.2122

 Epoch [16/80], Iter [300/500] Loss: 5.1189

 Epoch [16/80], Iter [400/500] Loss: 14.1793

 Epoch [16/80], Iter [500/500] Loss: 10.3744

 Epoch [16/80], Iter [600/500] Loss: 5.2099

 Epoch [16/80], Iter [700/500] Loss: 6.7522

 Epoch [16/80], Iter [800/500] Loss: 13.2532

 Epoch [16/80], Iter [900/500] Loss: 6.7040

 Epoch [17/80], Iter [100/500] Loss: 10.7390

 Epoch [17/80], Iter [200/500] Loss: 8.1525

 Epoch [17/80], Iter [300/500] Loss: 14.2229

 Epoch [17/80], Iter [400/500] Loss: 7.6302

 Epoch [17/80], Iter [500/500] Loss: 6.4554

 Epoch [17/80], Iter [600/500] Loss: 8.2380

 Epoch [17/80], Iter [700/500] Loss: 6.4445

 Epoch [17/80], Iter [800/500] Loss: 8.4644

 Epoch [17/80], Iter [900/500] Loss: 9.0200

 Epoch [18/80], Iter [100/500] Loss: 9.5088

 Epoch [18/80], Iter [200/500] Loss: 3.8648

 Epoch [18/80], Iter [300/500] Loss: 8.8408

 Epoch [18/80], Iter [400/500] Loss: 7.4195

 Epoch [18/80], Iter [500/500] Loss: 15.0480

 Epoch [18/80], Iter [600/500] Loss: 5.6232

 Epoch [18/80], Iter [700/500] Loss: 5.2233

 Epoch [18/80], Iter [800/500] Loss: 6.5702

 Epoch [18/80], Iter [900/500] Loss: 13.7427

 Epoch [19/80], Iter [100/500] Loss: 3.5658

 Epoch [19/80], Iter [200/500] Loss: 4.7062

 Epoch [19/80], Iter [300/500] Loss: 10.7831

 Epoch [19/80], Iter [400/500] Loss: 13.1375

 Epoch [19/80], Iter [500/500] Loss: 22.2764

 Epoch [19/80], Iter [600/500] Loss: 10.3463

 Epoch [19/80], Iter [700/500] Loss: 7.2373

 Epoch [19/80], Iter [800/500] Loss: 5.5266

 Epoch [19/80], Iter [900/500] Loss: 9.2434

 Epoch [20/80], Iter [100/500] Loss: 7.8164

 Epoch [20/80], Iter [200/500] Loss: 9.6628

 Epoch [20/80], Iter [300/500] Loss: 4.1032

 Epoch [20/80], Iter [400/500] Loss: 16.5922

 Epoch [20/80], Iter [500/500] Loss: 6.9907

 Epoch [20/80], Iter [600/500] Loss: 10.9906

 Epoch [20/80], Iter [700/500] Loss: 8.5092

 Epoch [20/80], Iter [800/500] Loss: 7.1332

 Epoch [20/80], Iter [900/500] Loss: 6.1639

 Epoch [21/80], Iter [100/500] Loss: 6.3100

 Epoch [21/80], Iter [200/500] Loss: 4.5190

 Epoch [21/80], Iter [300/500] Loss: 4.3493

 Epoch [21/80], Iter [400/500] Loss: 7.9860

 Epoch [21/80], Iter [500/500] Loss: 8.8312

 Epoch [21/80], Iter [600/500] Loss: 10.7502

 Epoch [21/80], Iter [700/500] Loss: 3.2116

 Epoch [21/80], Iter [800/500] Loss: 4.0126

 Epoch [21/80], Iter [900/500] Loss: 5.3675

 Epoch [22/80], Iter [100/500] Loss: 1.4893

 Epoch [22/80], Iter [200/500] Loss: 1.6984

 Epoch [22/80], Iter [300/500] Loss: 2.6195

 Epoch [22/80], Iter [400/500] Loss: 2.1465

 Epoch [22/80], Iter [500/500] Loss: 2.9847

 Epoch [22/80], Iter [600/500] Loss: 4.9699

 Epoch [22/80], Iter [700/500] Loss: 1.6728

 Epoch [22/80], Iter [800/500] Loss: 1.3381

 Epoch [22/80], Iter [900/500] Loss: 2.0680

 Epoch [23/80], Iter [100/500] Loss: 1.9145

 Epoch [23/80], Iter [200/500] Loss: 0.9280

 Epoch [23/80], Iter [300/500] Loss: 2.9585

 Epoch [23/80], Iter [400/500] Loss: 1.0787

 Epoch [23/80], Iter [500/500] Loss: 3.1779

 Epoch [23/80], Iter [600/500] Loss: 2.4411

 Epoch [23/80], Iter [700/500] Loss: 2.0049

 Epoch [23/80], Iter [800/500] Loss: 2.2844

 Epoch [23/80], Iter [900/500] Loss: 2.2328

 Epoch [24/80], Iter [100/500] Loss: 1.5221

 Epoch [24/80], Iter [200/500] Loss: 2.0100

 Epoch [24/80], Iter [300/500] Loss: 1.8868

 Epoch [24/80], Iter [400/500] Loss: 1.4898

 Epoch [24/80], Iter [500/500] Loss: 1.1626

 Epoch [24/80], Iter [600/500] Loss: 1.2527

 Epoch [24/80], Iter [700/500] Loss: 1.3430

 Epoch [24/80], Iter [800/500] Loss: 1.3355

 Epoch [24/80], Iter [900/500] Loss: 1.8292

 Epoch [25/80], Iter [100/500] Loss: 2.2471

 Epoch [25/80], Iter [200/500] Loss: 2.8727

 Epoch [25/80], Iter [300/500] Loss: 1.3531

 Epoch [25/80], Iter [400/500] Loss: 1.1110

 Epoch [25/80], Iter [500/500] Loss: 2.7648

 Epoch [25/80], Iter [600/500] Loss: 1.8364

 Epoch [25/80], Iter [700/500] Loss: 1.4299

 Epoch [25/80], Iter [800/500] Loss: 1.5985

 Epoch [25/80], Iter [900/500] Loss: 2.5364

 Epoch [26/80], Iter [100/500] Loss: 2.6469

 Epoch [26/80], Iter [200/500] Loss: 3.1215

 Epoch [26/80], Iter [300/500] Loss: 1.4029

 Epoch [26/80], Iter [400/500] Loss: 1.2688

 Epoch [26/80], Iter [500/500] Loss: 2.4794

 Epoch [26/80], Iter [600/500] Loss: 1.1937

 Epoch [26/80], Iter [700/500] Loss: 1.0709

 Epoch [26/80], Iter [800/500] Loss: 1.4961

 Epoch [26/80], Iter [900/500] Loss: 1.4560

 Epoch [27/80], Iter [100/500] Loss: 2.0633

 Epoch [27/80], Iter [200/500] Loss: 2.6687

 Epoch [27/80], Iter [300/500] Loss: 5.2073

 Epoch [27/80], Iter [400/500] Loss: 2.2762

 Epoch [27/80], Iter [500/500] Loss: 1.6105

 Epoch [27/80], Iter [600/500] Loss: 1.6631

 Epoch [27/80], Iter [700/500] Loss: 1.0523

 Epoch [27/80], Iter [800/500] Loss: 2.8945

 Epoch [27/80], Iter [900/500] Loss: 1.5388

 Epoch [28/80], Iter [100/500] Loss: 1.6230

 Epoch [28/80], Iter [200/500] Loss: 1.8003

 Epoch [28/80], Iter [300/500] Loss: 1.4840

 Epoch [28/80], Iter [400/500] Loss: 0.9465

 Epoch [28/80], Iter [500/500] Loss: 1.6054

 Epoch [28/80], Iter [600/500] Loss: 3.3669

 Epoch [28/80], Iter [700/500] Loss: 1.4555

 Epoch [28/80], Iter [800/500] Loss: 2.2903

 Epoch [28/80], Iter [900/500] Loss: 1.2850

 Epoch [29/80], Iter [100/500] Loss: 1.7152

 Epoch [29/80], Iter [200/500] Loss: 1.2824

 Epoch [29/80], Iter [300/500] Loss: 1.5778

 Epoch [29/80], Iter [400/500] Loss: 3.1152

 Epoch [29/80], Iter [500/500] Loss: 1.2492

 Epoch [29/80], Iter [600/500] Loss: 0.9721

 Epoch [29/80], Iter [700/500] Loss: 1.4465

 Epoch [29/80], Iter [800/500] Loss: 0.9678

 Epoch [29/80], Iter [900/500] Loss: 1.5000

 Epoch [30/80], Iter [100/500] Loss: 1.5524

 Epoch [30/80], Iter [200/500] Loss: 1.5233

 Epoch [30/80], Iter [300/500] Loss: 1.4226

 Epoch [30/80], Iter [400/500] Loss: 0.9432

 Epoch [30/80], Iter [500/500] Loss: 1.4623

 Epoch [30/80], Iter [600/500] Loss: 1.3845

 Epoch [30/80], Iter [700/500] Loss: 1.3301

 Epoch [30/80], Iter [800/500] Loss: 1.0105

 Epoch [30/80], Iter [900/500] Loss: 1.8372

 Epoch [31/80], Iter [100/500] Loss: 1.3019

 Epoch [31/80], Iter [200/500] Loss: 1.1216

 Epoch [31/80], Iter [300/500] Loss: 0.8553

 Epoch [31/80], Iter [400/500] Loss: 1.6882

 Epoch [31/80], Iter [500/500] Loss: 1.7691

 Epoch [31/80], Iter [600/500] Loss: 1.7412

 Epoch [31/80], Iter [700/500] Loss: 2.2204

 Epoch [31/80], Iter [800/500] Loss: 0.6559

 Epoch [31/80], Iter [900/500] Loss: 1.4613

 Epoch [32/80], Iter [100/500] Loss: 1.1408

 Epoch [32/80], Iter [200/500] Loss: 3.6378

 Epoch [32/80], Iter [300/500] Loss: 1.5543

 Epoch [32/80], Iter [400/500] Loss: 2.1538

 Epoch [32/80], Iter [500/500] Loss: 1.1102

 Epoch [32/80], Iter [600/500] Loss: 1.3187

 Epoch [32/80], Iter [700/500] Loss: 0.7230

 Epoch [32/80], Iter [800/500] Loss: 1.6149

 Epoch [32/80], Iter [900/500] Loss: 1.0926

 Epoch [33/80], Iter [100/500] Loss: 1.9460

 Epoch [33/80], Iter [200/500] Loss: 0.9948

 Epoch [33/80], Iter [300/500] Loss: 1.4460

 Epoch [33/80], Iter [400/500] Loss: 1.5855

 Epoch [33/80], Iter [500/500] Loss: 1.5834

 Epoch [33/80], Iter [600/500] Loss: 0.8896

 Epoch [33/80], Iter [700/500] Loss: 1.1927

 Epoch [33/80], Iter [800/500] Loss: 1.5707

 Epoch [33/80], Iter [900/500] Loss: 0.7817

 Epoch [34/80], Iter [100/500] Loss: 0.9155

 Epoch [34/80], Iter [200/500] Loss: 0.7930

 Epoch [34/80], Iter [300/500] Loss: 1.2760

 Epoch [34/80], Iter [400/500] Loss: 0.7170

 Epoch [34/80], Iter [500/500] Loss: 1.9962

 Epoch [34/80], Iter [600/500] Loss: 1.2418

 Epoch [34/80], Iter [700/500] Loss: 1.4847

 Epoch [34/80], Iter [800/500] Loss: 0.8495

 Epoch [34/80], Iter [900/500] Loss: 1.3709

 Epoch [35/80], Iter [100/500] Loss: 1.8495

 Epoch [35/80], Iter [200/500] Loss: 0.9494

 Epoch [35/80], Iter [300/500] Loss: 0.6224

 Epoch [35/80], Iter [400/500] Loss: 0.5101

 Epoch [35/80], Iter [500/500] Loss: 0.9373

 Epoch [35/80], Iter [600/500] Loss: 1.5811

 Epoch [35/80], Iter [700/500] Loss: 1.5295

 Epoch [35/80], Iter [800/500] Loss: 0.7787

 Epoch [35/80], Iter [900/500] Loss: 1.0337

 Epoch [36/80], Iter [100/500] Loss: 0.6236

 Epoch [36/80], Iter [200/500] Loss: 1.8516

 Epoch [36/80], Iter [300/500] Loss: 1.5021

 Epoch [36/80], Iter [400/500] Loss: 1.0459

 Epoch [36/80], Iter [500/500] Loss: 1.4737

 Epoch [36/80], Iter [600/500] Loss: 0.7842

 Epoch [36/80], Iter [700/500] Loss: 1.6798

 Epoch [36/80], Iter [800/500] Loss: 1.7413

 Epoch [36/80], Iter [900/500] Loss: 0.6222

 Epoch [37/80], Iter [100/500] Loss: 0.5713

 Epoch [37/80], Iter [200/500] Loss: 1.3030

 Epoch [37/80], Iter [300/500] Loss: 1.6937

 Epoch [37/80], Iter [400/500] Loss: 0.8656

 Epoch [37/80], Iter [500/500] Loss: 1.3340

 Epoch [37/80], Iter [600/500] Loss: 0.6310

 Epoch [37/80], Iter [700/500] Loss: 1.1445

 Epoch [37/80], Iter [800/500] Loss: 0.6099

 Epoch [37/80], Iter [900/500] Loss: 1.3679

 Epoch [38/80], Iter [100/500] Loss: 0.9127

 Epoch [38/80], Iter [200/500] Loss: 1.9450

 Epoch [38/80], Iter [300/500] Loss: 1.2240

 Epoch [38/80], Iter [400/500] Loss: 1.4049

 Epoch [38/80], Iter [500/500] Loss: 0.9247

 Epoch [38/80], Iter [600/500] Loss: 1.5308

 Epoch [38/80], Iter [700/500] Loss: 1.9777

 Epoch [38/80], Iter [800/500] Loss: 1.2109

 Epoch [38/80], Iter [900/500] Loss: 0.8337

 Epoch [39/80], Iter [100/500] Loss: 0.7904

 Epoch [39/80], Iter [200/500] Loss: 0.8451

 Epoch [39/80], Iter [300/500] Loss: 1.6993

 Epoch [39/80], Iter [400/500] Loss: 1.2196

 Epoch [39/80], Iter [500/500] Loss: 1.0665

 Epoch [39/80], Iter [600/500] Loss: 0.7412

 Epoch [39/80], Iter [700/500] Loss: 0.6486

 Epoch [39/80], Iter [800/500] Loss: 1.5608

 Epoch [39/80], Iter [900/500] Loss: 1.9978

 Epoch [40/80], Iter [100/500] Loss: 1.7101

 Epoch [40/80], Iter [200/500] Loss: 1.4484

 Epoch [40/80], Iter [300/500] Loss: 1.5894

 Epoch [40/80], Iter [400/500] Loss: 1.3371

 Epoch [40/80], Iter [500/500] Loss: 0.9766

 Epoch [40/80], Iter [600/500] Loss: 1.9935

 Epoch [40/80], Iter [700/500] Loss: 2.0719

 Epoch [40/80], Iter [800/500] Loss: 0.9455

 Epoch [40/80], Iter [900/500] Loss: 0.8072

 Epoch [41/80], Iter [100/500] Loss: 1.3899

 Epoch [41/80], Iter [200/500] Loss: 0.9863

 Epoch [41/80], Iter [300/500] Loss: 1.3738

 Epoch [41/80], Iter [400/500] Loss: 0.6883

 Epoch [41/80], Iter [500/500] Loss: 0.8442

 Epoch [41/80], Iter [600/500] Loss: 2.0286

 Epoch [41/80], Iter [700/500] Loss: 1.1960

 Epoch [41/80], Iter [800/500] Loss: 1.2499

 Epoch [41/80], Iter [900/500] Loss: 0.6043

 Epoch [42/80], Iter [100/500] Loss: 0.3437

 Epoch [42/80], Iter [200/500] Loss: 0.6596

 Epoch [42/80], Iter [300/500] Loss: 0.4450

 Epoch [42/80], Iter [400/500] Loss: 0.7189

 Epoch [42/80], Iter [500/500] Loss: 0.5022

 Epoch [42/80], Iter [600/500] Loss: 0.4597

 Epoch [42/80], Iter [700/500] Loss: 0.7743

 Epoch [42/80], Iter [800/500] Loss: 0.3344

 Epoch [42/80], Iter [900/500] Loss: 0.7295

 Epoch [43/80], Iter [100/500] Loss: 0.5074

 Epoch [43/80], Iter [200/500] Loss: 0.3128

 Epoch [43/80], Iter [300/500] Loss: 0.2800

 Epoch [43/80], Iter [400/500] Loss: 0.3059

 Epoch [43/80], Iter [500/500] Loss: 0.3486

 Epoch [43/80], Iter [600/500] Loss: 0.7222

 Epoch [43/80], Iter [700/500] Loss: 0.7349

 Epoch [43/80], Iter [800/500] Loss: 0.8455

 Epoch [43/80], Iter [900/500] Loss: 0.7261

 Epoch [44/80], Iter [100/500] Loss: 0.5404

 Epoch [44/80], Iter [200/500] Loss: 0.5428

 Epoch [44/80], Iter [300/500] Loss: 0.5385

 Epoch [44/80], Iter [400/500] Loss: 0.4106

 Epoch [44/80], Iter [500/500] Loss: 0.5296

 Epoch [44/80], Iter [600/500] Loss: 0.6045

 Epoch [44/80], Iter [700/500] Loss: 0.3837

 Epoch [44/80], Iter [800/500] Loss: 0.7552

 Epoch [44/80], Iter [900/500] Loss: 0.4996

 Epoch [45/80], Iter [100/500] Loss: 0.3381

 Epoch [45/80], Iter [200/500] Loss: 0.3910

 Epoch [45/80], Iter [300/500] Loss: 0.3790

 Epoch [45/80], Iter [400/500] Loss: 0.2718

 Epoch [45/80], Iter [500/500] Loss: 0.3572

 Epoch [45/80], Iter [600/500] Loss: 0.2913

 Epoch [45/80], Iter [700/500] Loss: 0.5244

 Epoch [45/80], Iter [800/500] Loss: 0.3647

 Epoch [45/80], Iter [900/500] Loss: 0.3161

 Epoch [46/80], Iter [100/500] Loss: 0.4728

 Epoch [46/80], Iter [200/500] Loss: 0.4386

 Epoch [46/80], Iter [300/500] Loss: 0.2861

 Epoch [46/80], Iter [400/500] Loss: 0.2460

 Epoch [46/80], Iter [500/500] Loss: 0.3490

 Epoch [46/80], Iter [600/500] Loss: 0.5804

 Epoch [46/80], Iter [700/500] Loss: 0.4951

 Epoch [46/80], Iter [800/500] Loss: 0.4600

 Epoch [46/80], Iter [900/500] Loss: 0.5658

 Epoch [47/80], Iter [100/500] Loss: 0.2479

 Epoch [47/80], Iter [200/500] Loss: 0.2688

 Epoch [47/80], Iter [300/500] Loss: 0.3082

 Epoch [47/80], Iter [400/500] Loss: 0.3929

 Epoch [47/80], Iter [500/500] Loss: 0.3126

 Epoch [47/80], Iter [600/500] Loss: 0.5041

 Epoch [47/80], Iter [700/500] Loss: 0.5848

 Epoch [47/80], Iter [800/500] Loss: 0.4968

 Epoch [47/80], Iter [900/500] Loss: 0.3496

 Epoch [48/80], Iter [100/500] Loss: 0.2753

 Epoch [48/80], Iter [200/500] Loss: 0.3885

 Epoch [48/80], Iter [300/500] Loss: 0.3743

 Epoch [48/80], Iter [400/500] Loss: 0.2425

 Epoch [48/80], Iter [500/500] Loss: 0.2472

 Epoch [48/80], Iter [600/500] Loss: 0.3003

 Epoch [48/80], Iter [700/500] Loss: 0.4936

 Epoch [48/80], Iter [800/500] Loss: 0.3169

 Epoch [48/80], Iter [900/500] Loss: 0.2543

 Epoch [49/80], Iter [100/500] Loss: 0.4262

 Epoch [49/80], Iter [200/500] Loss: 0.3396

 Epoch [49/80], Iter [300/500] Loss: 0.4670

 Epoch [49/80], Iter [400/500] Loss: 0.2543

 Epoch [49/80], Iter [500/500] Loss: 0.3146

 Epoch [49/80], Iter [600/500] Loss: 1.3187

 Epoch [49/80], Iter [700/500] Loss: 0.2993

 Epoch [49/80], Iter [800/500] Loss: 0.3053

 Epoch [49/80], Iter [900/500] Loss: 0.3343

 Epoch [50/80], Iter [100/500] Loss: 0.2081

 Epoch [50/80], Iter [200/500] Loss: 0.5631

 Epoch [50/80], Iter [300/500] Loss: 0.4358

 Epoch [50/80], Iter [400/500] Loss: 0.4028

 Epoch [50/80], Iter [500/500] Loss: 0.2510

 Epoch [50/80], Iter [600/500] Loss: 0.5876

 Epoch [50/80], Iter [700/500] Loss: 0.3692

 Epoch [50/80], Iter [800/500] Loss: 0.4500

 Epoch [50/80], Iter [900/500] Loss: 0.1850

 ('time used:', 30318.149681000003)

Pytorch使用多GPU的更多相关文章

  1. anaconda+pytorch安装(无GPU版本)

    anaconda+pytorch安装(无GPU版本) 待办 https://blog.csdn.net/nnUyi/article/details/78471326

  2. [源码解析] PyTorch 如何使用GPU

    [源码解析] PyTorch 如何使用GPU 目录 [源码解析] PyTorch 如何使用GPU 0x00 摘要 0x01 问题 0x02 移动模型到GPU 2.1 cuda 操作 2.2 Modul ...

  3. pytorch中查看gpu信息

    其他:windows使用nvidia-smi查看gpu信息 为什么将数据转移至GPU的方法叫做.cuda而不是.gpu,就像将数据转移至CPU调用的方法是.cpu?这是因为GPU的编程接口采用CUDA ...

  4. Pytorch中多GPU训练指北

    前言 在数据越来越多的时代,随着模型规模参数的增多,以及数据量的不断提升,使用多GPU去训练是不可避免的事情.Pytorch在0.4.0及以后的版本中已经提供了多GPU训练的方式,本文简单讲解下使用P ...

  5. 使用Pytorch在多GPU下保存和加载训练模型参数遇到的问题

    最近使用Pytorch在学习一个深度学习项目,在模型保存和加载过程中遇到了问题,最终通过在网卡查找资料得已解决,故以此记之,以备忘却. 首先,是在使用多GPU进行模型训练的过程中,在保存模型参数时,应 ...

  6. 从头学pytorch(十三):使用GPU做计算

    GPU计算 默认情况下,pytorch将数据保存在内存,而不是显存. 查看显卡信息 nvidia-smi 我的机器输出如下: Fri Jan 3 16:20:51 2020 +------------ ...

  7. pytorch设置多GPU运行的方法

    1.DataParallel layers (multi-GPU, distributed) 1)DataParallel CLASS torch.nn.DataParallel(module, de ...

  8. Pytorch:使用GPU训练

    1.模型转为cuda gpus = [0] #使用哪几个GPU进行训练,这里选择0号GPU cuda_gpu = torch.cuda.is_available() #判断GPU是否存在可用 net ...

  9. Pytorch多GPU并行处理

    可以参数2017coco detection 旷视冠军MegDet: MegDet 与 Synchronized BatchNorm PyTorch-Encoding官方文档对CGBN(cross g ...

随机推荐

  1. 【转】Entity Framework Extended Library (EF扩展类库,支持批量更新、删除、合并多个查询等)

    E文好的可以直接看https://github.com/loresoft/EntityFramework.Extended 也可以在nuget上直接安装这个包,它的说明有点过时了,最新版本已经改用对I ...

  2. Spring循环依赖

    Spring-bean的循环依赖以及解决方式 Spring里面Bean的生命周期和循环依赖问题 什么是循环依赖? 循环依赖其实就是循环引用,也就是两个或者两个以上的bean互相持有对方,最终形成闭环. ...

  3. JAVA的SPI简单应用

    最近在研究dubbo时,发现了JAVA的SPI特性.SPI的全名为Service Provider Interface,是JDK内置的一种服务发现机制. 具体实现: 1.定义一个接口 public i ...

  4. Oracle中使用PL/SQL如何定义参数、参数赋值、输出参数和 if 判断

    1.pl/sql如何定义参数 declare --1)定义参数 -- ban_Id number; ban_Name ); 2.pl/sql如何参数赋值 --2)参数赋值-- ban_Id :; ba ...

  5. python 关闭垃圾回收

    import gc gc.disable() http://blog.csdn.net/aixiaohei/article/details/6446869

  6. window.open()打开页面

    一.window.open()支持环境:JavaScript1.0+/JScript1.0+/Nav2+/IE3+/Opera3+ 二.基本语法:window.open(pageURL,name,pa ...

  7. nginx+tomcat集群

    参考: 简单:http://blog.csdn.net/wang379275614/article/details/47778201 详细:http://www.jb51.net/article/77 ...

  8. urllib 获取页面或发送信息

    #! /usr/bin/env python3 # -*- coding:utf-8 -*- #urllib提供了一系列用于操作URL的功能. #urllib的request模块可以非常方便地抓取UR ...

  9. 使用AWR报告诊断Oracle性能问题

    在做单交易负载测试时,有的交易响应时间超出了指标值,在排除完测试环境等可能造成交易超时的原因后,去分析数据库问题.数据库用的是Oracle,对于Oracle数据库整体的性能问题, awr的报告是一个非 ...

  10. fk的使用细节

    在seiman的博客fk2.3的用法笔记一文中,介绍了当震中距很大时,fk在使用时需要加上-f,这一展平变换的选项. 另外,nt*dt为最终数据的总长度,当震中距为远震范围时,such as:40°, ...