在caffe中训练的时候如果使用多GPU则直接在运行程序的时候指定GPU的index即可,但是在Pytorch中则需要在声明模型之后,对声明的模型进行初始化,如:

  1. cnn = DataParallel(AlexNet())

之后直接运行Pytorch之后则默认使用所有的GPU,为了说明上述初始化的作用,我用了一组畸变图像的数据集,写了一个Resent的模块,过了50个epoch,对比一下实验耗时的差别,代码如下:

  1. # -*- coding: utf-8 -*-
  2. # Implementation of https://arxiv.org/pdf/1512.03385.pdf/
  3. # See section 4.2 for model architecture on CIFAR-10.
  4. # Some part of the code was referenced below.
  5. # https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py
  6.  
  7. import os
  8. from PIL import Image
  9. import time
  10.  
  11. import torch
  12. import torch.nn as nn
  13. import torchvision.datasets as dsets
  14. import torchvision.transforms as transforms
  15. from torch.autograd import Variable
  16. import torch.utils.data as data
  17. from torch.nn import DataParallel
  18.  
  19. kwargs = {'num_workers': 1, 'pin_memory': True}
  20. # def my dataloader, return the data and corresponding label
  21.  
  22. def default_loader(path):
  23. return Image.open(path).convert('RGB')
  24.  
  25. class myImageFloder(data.Dataset): # Class inheritance
  26. def __init__(self, root, label, transform=None, target_transform=None, loader=default_loader):
  27. fh = open(label)
  28. c = 0
  29. imgs = []
  30. class_names = []
  31. for line in fh.readlines():
  32. if c == 0:
  33. class_names = [n.strip() for n in line.rstrip().split(' ')]
  34. else:
  35. cls = line.split() # cls is a list
  36. fn = cls.pop(0)
  37. if os.path.isfile(os.path.join(root, fn)):
  38. imgs.append((fn, tuple([float(v) for v in cls]))) # imgs is the list,and the content is the tuple
  39. # we can use the append way to append the element for list
  40. c = c + 1
  41. self.root = root
  42. self.imgs = imgs
  43. self.classes = class_names
  44. self.transform = transform
  45. self.target_transform = target_transform
  46. self.loader = loader
  47.  
  48. def __getitem__(self, index):
  49. fn, label = self.imgs[index] # eventhough the imgs is just a list, it can return the elements of is
  50. # in a proper way
  51. img = self.loader(os.path.join(self.root, fn))
  52. if self.transform is not None:
  53. img = self.transform(img)
  54. return img, torch.Tensor(label)
  55.  
  56. def __len__(self):
  57. return len(self.imgs)
  58.  
  59. def getName(self):
  60. return self.classes
  61.  
  62. mytransform = transforms.Compose([transforms.ToTensor()]) # almost dont do any operation
  63. train_data_root = "/home/ying/shiyongjie/rjp/generate_distortion_image_2016_03_15/0_Distorted_Image/Training"
  64. test_data_root = "/home/ying/shiyongjie/rjp/generate_distortion_image_2016_03_15/0_Distorted_Image/Testing"
  65. train_label = "/home/ying/shiyongjie/rjp/generate_distortion_image_2016_03_15/0_Distorted_Image/NameList_train.txt"
  66. test_label = "/home/ying/shiyongjie/rjp/generate_distortion_image_2016_03_15/0_Distorted_Image/NameList_test.txt"
  67.  
  68. train_loader = torch.utils.data.DataLoader(
  69. myImageFloder(root=train_data_root, label=train_label, transform=mytransform),
  70. batch_size=64, shuffle=True, **kwargs)
  71.  
  72. test_loader = torch.utils.data.DataLoader(
  73. myImageFloder(root=test_data_root, label=test_label, transform=mytransform),
  74. batch_size=64, shuffle=True, **kwargs)
  75.  
  76. # 3x3 Convolution
  77. def conv3x3(in_channels, out_channels, stride=1):
  78. return nn.Conv2d(in_channels, out_channels, kernel_size=3,
  79. stride=stride, padding=1, bias=False)
  80.  
  81. # Residual Block
  82. class ResidualBlock(nn.Module):
  83. def __init__(self, in_channels, out_channels, stride=1, downsample=None):
  84. super(ResidualBlock, self).__init__()
  85. self.conv1 = conv3x3(in_channels, out_channels, stride) # kernel size is default 3
  86. self.bn1 = nn.BatchNorm2d(out_channels)
  87. self.relu = nn.ReLU(inplace=True)
  88. self.conv2 = conv3x3(out_channels, out_channels)
  89. self.bn2 = nn.BatchNorm2d(out_channels)
  90. self.downsample = downsample
  91.  
  92. def forward(self, x):
  93. residual = x
  94. out = self.conv1(x)
  95. out = self.bn1(out)
  96. out = self.relu(out)
  97. out = self.conv2(out)
  98. out = self.bn2(out)
  99. if self.downsample:
  100. residual = self.downsample(x)
  101. out += residual
  102. out = self.relu(out)
  103. return out
  104.  
  105. # ResNet Module
  106. class ResNet(nn.Module):
  107. def __init__(self, block, layers, num_classes=1):
  108. super(ResNet, self).__init__()
  109. self.in_channels = 16
  110. self.conv = conv3x3(3, 16)
  111. self.bn = nn.BatchNorm2d(16)
  112. self.relu = nn.ReLU(inplace=True)
  113. self.layer1 = self.make_layer(block, 16, layers[0])
  114. self.layer2 = self.make_layer(block, 32, layers[0], 2)
  115. self.layer3 = self.make_layer(block, 64, layers[1], 2) # the input arg is blocks and the stride
  116. self.layer4 = self.make_layer(block, 128, layers[1], 2)
  117. self.layer5 = self.make_layer(block, 256, layers[1], 2)
  118. self.avg_pool = nn.AvgPool2d(kernel_size=8,stride=8) # 2*2
  119. self.fc = nn.Linear(256*2*2, num_classes)
  120.  
  121. def make_layer(self, block, out_channels, blocks, stride=1):
  122. downsample = None
  123. if (stride != 1) or (self.in_channels != out_channels): # the input channel is not consistant with the output's
  124. downsample = nn.Sequential( # do the downsample, def a conv, for example: 256*256*16 -> 128*128*32
  125. conv3x3(self.in_channels, out_channels, stride=stride),
  126. nn.BatchNorm2d(out_channels))
  127. layers = []
  128. layers.append(block(self.in_channels, out_channels, stride, downsample))
  129. self.in_channels = out_channels # update the input channel and the output channel
  130. for i in range(1, blocks): # reduce a block because the first block is already appened
  131. layers.append(block(out_channels, out_channels)) # 32*32 -> 8*8
  132. return nn.Sequential(*layers)
  133.  
  134. def forward(self, x):
  135. out = self.conv(x)
  136. out = self.bn(out)
  137. out = self.relu(out)
  138. out = self.layer1(out)
  139. out = self.layer2(out)
  140. out = self.layer3(out)
  141. out=self.layer4(out)
  142. out=self.layer5(out)
  143. out = self.avg_pool(out)
  144. out = out.view(out.size(0), -1)
  145. out = self.fc(out)
  146. return out
  147.  
  148. resnet = DataParallel(ResNet(ResidualBlock, [3, 3, 3]))
  149. resnet.cuda()
  150.  
  151. # Loss and Optimizer
  152. criterion = nn.MSELoss()
  153. lr = 0.001
  154. optimizer = torch.optim.Adam(resnet.parameters(), lr=lr)
  155.  
  156. # Training
  157. start=time.clock()
  158. for epoch in range(50):
  159. for i, (images, labels) in enumerate(train_loader):
  160. images = Variable(images.cuda())
  161. labels = Variable(labels.cuda())
  162.  
  163. # Forward + Backward + Optimize
  164. optimizer.zero_grad()
  165. outputs = resnet(images)
  166. loss = criterion(outputs, labels)
  167. loss.backward()
  168. optimizer.step()
  169.  
  170. if (i + 1) % 100 == 0:
  171. print ("Epoch [%d/%d], Iter [%d/%d] Loss: %.4f" % (epoch + 1, 80, i + 1, 500, loss.data[0]))
  172.  
  173. # Decaying Learning Rate
  174. if (epoch + 1) % 20 == 0:
  175. lr /= 3
  176. optimizer = torch.optim.Adam(resnet.parameters(), lr=lr)
  177. elapsed=time.clock()-start
  178. print("time used:",elapsed)
  179. # # Test
  180. # correct = 0
  181. # total = 0
  182. # for images, labels in test_loader:
  183. # images = Variable(images.cuda())
  184. # outputs = resnet(images)
  185. # _, predicted = torch.max(outputs.data, 1)
  186. # total += labels.size(0)
  187. # correct += (predicted.cpu() == labels).sum()
  188. #
  189. # print('Accuracy of the model on the test images: %d %%' % (100 * correct / total))
  190.  
  191. # Save the Model
  192. torch.save(resnet.state_dict(), 'resnet.pkl')

作为对比实验,我们同时把ResNet的声明方式修改为

resnet = ResNet(ResidualBlock, [3, 3, 3])

其余不变,再运行程序的时候不指定GPU,直接python resnet.py,在声明DataParallel时,运行耗时结果如下:

('time used:', 17124.861335999998),watch -n 1 nvidia-smi确实显示占用两块GPU

在不声明DataParallel时,实验运行结果耗时如下:

('time used:', 30318.149681000003),watch -n 1 nvidia-smi确实显示占用一块GPU

可以看出,在声明DataParallel时时间压缩了近一半,所以在声明DataParalle是使用多GPU运行Pytorch的一种方法。

官方的doc也给出了多GPU使用的例子以及部分数据在GPU与部分数据在CPU上运行的例子

以下是两组实验结果的输出:

DataParalle初始化

  1. Epoch [1/80], Iter [100/500] Loss: 916.5578
  2.  
  3. Epoch [1/80], Iter [200/500] Loss: 172.2591
  4.  
  5. Epoch [1/80], Iter [300/500] Loss: 179.8360
  6.  
  7. Epoch [1/80], Iter [400/500] Loss: 259.6867
  8.  
  9. Epoch [1/80], Iter [500/500] Loss: 244.0616
  10.  
  11. Epoch [1/80], Iter [600/500] Loss: 74.7015
  12.  
  13. Epoch [1/80], Iter [700/500] Loss: 63.1657
  14.  
  15. Epoch [1/80], Iter [800/500] Loss: 90.3517
  16.  
  17. Epoch [1/80], Iter [900/500] Loss: 70.4562
  18.  
  19. Epoch [2/80], Iter [100/500] Loss: 52.3249
  20.  
  21. Epoch [2/80], Iter [200/500] Loss: 129.1855
  22.  
  23. Epoch [2/80], Iter [300/500] Loss: 110.0157
  24.  
  25. Epoch [2/80], Iter [400/500] Loss: 64.9313
  26.  
  27. Epoch [2/80], Iter [500/500] Loss: 87.8385
  28.  
  29. Epoch [2/80], Iter [600/500] Loss: 118.5828
  30.  
  31. Epoch [2/80], Iter [700/500] Loss: 123.9575
  32.  
  33. Epoch [2/80], Iter [800/500] Loss: 79.1908
  34.  
  35. Epoch [2/80], Iter [900/500] Loss: 61.8099
  36.  
  37. Epoch [3/80], Iter [100/500] Loss: 50.4294
  38.  
  39. Epoch [3/80], Iter [200/500] Loss: 106.8135
  40.  
  41. Epoch [3/80], Iter [300/500] Loss: 83.2198
  42.  
  43. Epoch [3/80], Iter [400/500] Loss: 60.7116
  44.  
  45. Epoch [3/80], Iter [500/500] Loss: 101.9553
  46.  
  47. Epoch [3/80], Iter [600/500] Loss: 64.6967
  48.  
  49. Epoch [3/80], Iter [700/500] Loss: 66.2446
  50.  
  51. Epoch [3/80], Iter [800/500] Loss: 81.1825
  52.  
  53. Epoch [3/80], Iter [900/500] Loss: 53.9905
  54.  
  55. Epoch [4/80], Iter [100/500] Loss: 76.2977
  56.  
  57. Epoch [4/80], Iter [200/500] Loss: 18.4255
  58.  
  59. Epoch [4/80], Iter [300/500] Loss: 57.6188
  60.  
  61. Epoch [4/80], Iter [400/500] Loss: 45.6235
  62.  
  63. Epoch [4/80], Iter [500/500] Loss: 82.9265
  64.  
  65. Epoch [4/80], Iter [600/500] Loss: 119.6085
  66.  
  67. Epoch [4/80], Iter [700/500] Loss: 53.1355
  68.  
  69. Epoch [4/80], Iter [800/500] Loss: 29.5248
  70.  
  71. Epoch [4/80], Iter [900/500] Loss: 57.0401
  72.  
  73. Epoch [5/80], Iter [100/500] Loss: 47.2671
  74.  
  75. Epoch [5/80], Iter [200/500] Loss: 31.6928
  76.  
  77. Epoch [5/80], Iter [300/500] Loss: 38.0040
  78.  
  79. Epoch [5/80], Iter [400/500] Loss: 24.5184
  80.  
  81. Epoch [5/80], Iter [500/500] Loss: 33.8515
  82.  
  83. Epoch [5/80], Iter [600/500] Loss: 43.6560
  84.  
  85. Epoch [5/80], Iter [700/500] Loss: 68.2500
  86.  
  87. Epoch [5/80], Iter [800/500] Loss: 30.8259
  88.  
  89. Epoch [5/80], Iter [900/500] Loss: 43.9696
  90.  
  91. Epoch [6/80], Iter [100/500] Loss: 22.4120
  92.  
  93. Epoch [6/80], Iter [200/500] Loss: 45.5722
  94.  
  95. Epoch [6/80], Iter [300/500] Loss: 26.8331
  96.  
  97. Epoch [6/80], Iter [400/500] Loss: 58.1139
  98.  
  99. Epoch [6/80], Iter [500/500] Loss: 12.8767
  100.  
  101. Epoch [6/80], Iter [600/500] Loss: 26.6725
  102.  
  103. Epoch [6/80], Iter [700/500] Loss: 31.9800
  104.  
  105. Epoch [6/80], Iter [800/500] Loss: 91.2332
  106.  
  107. Epoch [6/80], Iter [900/500] Loss: 44.1361
  108.  
  109. Epoch [7/80], Iter [100/500] Loss: 13.1401
  110.  
  111. Epoch [7/80], Iter [200/500] Loss: 20.9435
  112.  
  113. Epoch [7/80], Iter [300/500] Loss: 28.0944
  114.  
  115. Epoch [7/80], Iter [400/500] Loss: 24.0240
  116.  
  117. Epoch [7/80], Iter [500/500] Loss: 43.3279
  118.  
  119. Epoch [7/80], Iter [600/500] Loss: 23.3077
  120.  
  121. Epoch [7/80], Iter [700/500] Loss: 32.9658
  122.  
  123. Epoch [7/80], Iter [800/500] Loss: 27.2044
  124.  
  125. Epoch [7/80], Iter [900/500] Loss: 25.5850
  126.  
  127. Epoch [8/80], Iter [100/500] Loss: 39.7642
  128.  
  129. Epoch [8/80], Iter [200/500] Loss: 17.7421
  130.  
  131. Epoch [8/80], Iter [300/500] Loss: 29.8965
  132.  
  133. Epoch [8/80], Iter [400/500] Loss: 20.6153
  134.  
  135. Epoch [8/80], Iter [500/500] Loss: 43.0224
  136.  
  137. Epoch [8/80], Iter [600/500] Loss: 58.1552
  138.  
  139. Epoch [8/80], Iter [700/500] Loss: 19.1967
  140.  
  141. Epoch [8/80], Iter [800/500] Loss: 34.9122
  142.  
  143. Epoch [8/80], Iter [900/500] Loss: 15.0651
  144.  
  145. Epoch [9/80], Iter [100/500] Loss: 18.5950
  146.  
  147. Epoch [9/80], Iter [200/500] Loss: 36.1891
  148.  
  149. Epoch [9/80], Iter [300/500] Loss: 22.4936
  150.  
  151. Epoch [9/80], Iter [400/500] Loss: 14.8044
  152.  
  153. Epoch [9/80], Iter [500/500] Loss: 16.6958
  154.  
  155. Epoch [9/80], Iter [600/500] Loss: 24.8461
  156.  
  157. Epoch [9/80], Iter [700/500] Loss: 13.7112
  158.  
  159. Epoch [9/80], Iter [800/500] Loss: 21.2906
  160.  
  161. Epoch [9/80], Iter [900/500] Loss: 31.6950
  162.  
  163. Epoch [10/80], Iter [100/500] Loss: 20.7707
  164.  
  165. Epoch [10/80], Iter [200/500] Loss: 15.6260
  166.  
  167. Epoch [10/80], Iter [300/500] Loss: 28.5737
  168.  
  169. Epoch [10/80], Iter [400/500] Loss: 36.6791
  170.  
  171. Epoch [10/80], Iter [500/500] Loss: 38.9839
  172.  
  173. Epoch [10/80], Iter [600/500] Loss: 14.4459
  174.  
  175. Epoch [10/80], Iter [700/500] Loss: 10.0907
  176.  
  177. Epoch [10/80], Iter [800/500] Loss: 17.9035
  178.  
  179. Epoch [10/80], Iter [900/500] Loss: 24.5759
  180.  
  181. Epoch [11/80], Iter [100/500] Loss: 19.8531
  182.  
  183. Epoch [11/80], Iter [200/500] Loss: 15.7126
  184.  
  185. Epoch [11/80], Iter [300/500] Loss: 18.0198
  186.  
  187. Epoch [11/80], Iter [400/500] Loss: 19.3038
  188.  
  189. Epoch [11/80], Iter [500/500] Loss: 27.4435
  190.  
  191. Epoch [11/80], Iter [600/500] Loss: 18.1086
  192.  
  193. Epoch [11/80], Iter [700/500] Loss: 10.8124
  194.  
  195. Epoch [11/80], Iter [800/500] Loss: 31.2389
  196.  
  197. Epoch [11/80], Iter [900/500] Loss: 14.4881
  198.  
  199. Epoch [12/80], Iter [100/500] Loss: 10.6320
  200.  
  201. Epoch [12/80], Iter [200/500] Loss: 26.8394
  202.  
  203. Epoch [12/80], Iter [300/500] Loss: 16.0246
  204.  
  205. Epoch [12/80], Iter [400/500] Loss: 16.3263
  206.  
  207. Epoch [12/80], Iter [500/500] Loss: 24.5880
  208.  
  209. Epoch [12/80], Iter [600/500] Loss: 15.7498
  210.  
  211. Epoch [12/80], Iter [700/500] Loss: 11.4933
  212.  
  213. Epoch [12/80], Iter [800/500] Loss: 9.7252
  214.  
  215. Epoch [12/80], Iter [900/500] Loss: 31.6774
  216.  
  217. Epoch [13/80], Iter [100/500] Loss: 21.1929
  218.  
  219. Epoch [13/80], Iter [200/500] Loss: 17.0953
  220.  
  221. Epoch [13/80], Iter [300/500] Loss: 21.1883
  222.  
  223. Epoch [13/80], Iter [400/500] Loss: 15.9005
  224.  
  225. Epoch [13/80], Iter [500/500] Loss: 14.7924
  226.  
  227. Epoch [13/80], Iter [600/500] Loss: 12.4324
  228.  
  229. Epoch [13/80], Iter [700/500] Loss: 12.0840
  230.  
  231. Epoch [13/80], Iter [800/500] Loss: 30.9664
  232.  
  233. Epoch [13/80], Iter [900/500] Loss: 14.9601
  234.  
  235. Epoch [14/80], Iter [100/500] Loss: 6.5126
  236.  
  237. Epoch [14/80], Iter [200/500] Loss: 11.3227
  238.  
  239. Epoch [14/80], Iter [300/500] Loss: 12.9980
  240.  
  241. Epoch [14/80], Iter [400/500] Loss: 13.8523
  242.  
  243. Epoch [14/80], Iter [500/500] Loss: 10.6771
  244.  
  245. Epoch [14/80], Iter [600/500] Loss: 7.3953
  246.  
  247. Epoch [14/80], Iter [700/500] Loss: 14.6829
  248.  
  249. Epoch [14/80], Iter [800/500] Loss: 15.6956
  250.  
  251. Epoch [14/80], Iter [900/500] Loss: 21.8876
  252.  
  253. Epoch [15/80], Iter [100/500] Loss: 5.1943
  254.  
  255. Epoch [15/80], Iter [200/500] Loss: 13.0731
  256.  
  257. Epoch [15/80], Iter [300/500] Loss: 6.8931
  258.  
  259. Epoch [15/80], Iter [400/500] Loss: 15.3212
  260.  
  261. Epoch [15/80], Iter [500/500] Loss: 8.1775
  262.  
  263. Epoch [15/80], Iter [600/500] Loss: 11.5664
  264.  
  265. Epoch [15/80], Iter [700/500] Loss: 5.5951
  266.  
  267. Epoch [15/80], Iter [800/500] Loss: 10.9075
  268.  
  269. Epoch [15/80], Iter [900/500] Loss: 14.8503
  270.  
  271. Epoch [16/80], Iter [100/500] Loss: 19.5184
  272.  
  273. Epoch [16/80], Iter [200/500] Loss: 10.3570
  274.  
  275. Epoch [16/80], Iter [300/500] Loss: 10.0997
  276.  
  277. Epoch [16/80], Iter [400/500] Loss: 9.7350
  278.  
  279. Epoch [16/80], Iter [500/500] Loss: 11.3000
  280.  
  281. Epoch [16/80], Iter [600/500] Loss: 21.6213
  282.  
  283. Epoch [16/80], Iter [700/500] Loss: 9.7907
  284.  
  285. Epoch [16/80], Iter [800/500] Loss: 10.0128
  286.  
  287. Epoch [16/80], Iter [900/500] Loss: 10.7869
  288.  
  289. Epoch [17/80], Iter [100/500] Loss: 9.2015
  290.  
  291. Epoch [17/80], Iter [200/500] Loss: 7.3021
  292.  
  293. Epoch [17/80], Iter [300/500] Loss: 5.9662
  294.  
  295. Epoch [17/80], Iter [400/500] Loss: 17.5215
  296.  
  297. Epoch [17/80], Iter [500/500] Loss: 7.3349
  298.  
  299. Epoch [17/80], Iter [600/500] Loss: 8.5626
  300.  
  301. Epoch [17/80], Iter [700/500] Loss: 12.7575
  302.  
  303. Epoch [17/80], Iter [800/500] Loss: 10.7792
  304.  
  305. Epoch [17/80], Iter [900/500] Loss: 7.0889
  306.  
  307. Epoch [18/80], Iter [100/500] Loss: 10.5613
  308.  
  309. Epoch [18/80], Iter [200/500] Loss: 3.0777
  310.  
  311. Epoch [18/80], Iter [300/500] Loss: 6.3598
  312.  
  313. Epoch [18/80], Iter [400/500] Loss: 7.9515
  314.  
  315. Epoch [18/80], Iter [500/500] Loss: 10.8023
  316.  
  317. Epoch [18/80], Iter [600/500] Loss: 7.3443
  318.  
  319. Epoch [18/80], Iter [700/500] Loss: 8.0862
  320.  
  321. Epoch [18/80], Iter [800/500] Loss: 15.2795
  322.  
  323. Epoch [18/80], Iter [900/500] Loss: 10.2788
  324.  
  325. Epoch [19/80], Iter [100/500] Loss: 5.0786
  326.  
  327. Epoch [19/80], Iter [200/500] Loss: 8.8248
  328.  
  329. Epoch [19/80], Iter [300/500] Loss: 4.9262
  330.  
  331. Epoch [19/80], Iter [400/500] Loss: 7.8992
  332.  
  333. Epoch [19/80], Iter [500/500] Loss: 13.1279
  334.  
  335. Epoch [19/80], Iter [600/500] Loss: 8.2703
  336.  
  337. Epoch [19/80], Iter [700/500] Loss: 4.1547
  338.  
  339. Epoch [19/80], Iter [800/500] Loss: 9.0542
  340.  
  341. Epoch [19/80], Iter [900/500] Loss: 6.7904
  342.  
  343. Epoch [20/80], Iter [100/500] Loss: 8.6150
  344.  
  345. Epoch [20/80], Iter [200/500] Loss: 3.7212
  346.  
  347. Epoch [20/80], Iter [300/500] Loss: 6.2832
  348.  
  349. Epoch [20/80], Iter [400/500] Loss: 10.1591
  350.  
  351. Epoch [20/80], Iter [500/500] Loss: 9.7668
  352.  
  353. Epoch [20/80], Iter [600/500] Loss: 4.7498
  354.  
  355. Epoch [20/80], Iter [700/500] Loss: 4.8831
  356.  
  357. Epoch [20/80], Iter [800/500] Loss: 7.7877
  358.  
  359. Epoch [20/80], Iter [900/500] Loss: 8.5114
  360.  
  361. Epoch [21/80], Iter [100/500] Loss: 2.1853
  362.  
  363. Epoch [21/80], Iter [200/500] Loss: 5.8741
  364.  
  365. Epoch [21/80], Iter [300/500] Loss: 5.3676
  366.  
  367. Epoch [21/80], Iter [400/500] Loss: 3.1155
  368.  
  369. Epoch [21/80], Iter [500/500] Loss: 4.2433
  370.  
  371. Epoch [21/80], Iter [600/500] Loss: 1.9783
  372.  
  373. Epoch [21/80], Iter [700/500] Loss: 2.7622
  374.  
  375. Epoch [21/80], Iter [800/500] Loss: 2.0112
  376.  
  377. Epoch [21/80], Iter [900/500] Loss: 2.2692
  378.  
  379. Epoch [22/80], Iter [100/500] Loss: 2.1882
  380.  
  381. Epoch [22/80], Iter [200/500] Loss: 4.2540
  382.  
  383. Epoch [22/80], Iter [300/500] Loss: 4.0126
  384.  
  385. Epoch [22/80], Iter [400/500] Loss: 2.2220
  386.  
  387. Epoch [22/80], Iter [500/500] Loss: 2.4755
  388.  
  389. Epoch [22/80], Iter [600/500] Loss: 3.0793
  390.  
  391. Epoch [22/80], Iter [700/500] Loss: 1.9128
  392.  
  393. Epoch [22/80], Iter [800/500] Loss: 4.8721
  394.  
  395. Epoch [22/80], Iter [900/500] Loss: 2.1349
  396.  
  397. Epoch [23/80], Iter [100/500] Loss: 1.8705
  398.  
  399. Epoch [23/80], Iter [200/500] Loss: 2.4326
  400.  
  401. Epoch [23/80], Iter [300/500] Loss: 1.5636
  402.  
  403. Epoch [23/80], Iter [400/500] Loss: 2.0465
  404.  
  405. Epoch [23/80], Iter [500/500] Loss: 1.5183
  406.  
  407. Epoch [23/80], Iter [600/500] Loss: 2.2711
  408.  
  409. Epoch [23/80], Iter [700/500] Loss: 2.8997
  410.  
  411. Epoch [23/80], Iter [800/500] Loss: 2.6150
  412.  
  413. Epoch [23/80], Iter [900/500] Loss: 2.8083
  414.  
  415. Epoch [24/80], Iter [100/500] Loss: 2.7177
  416.  
  417. Epoch [24/80], Iter [200/500] Loss: 3.2044
  418.  
  419. Epoch [24/80], Iter [300/500] Loss: 3.8137
  420.  
  421. Epoch [24/80], Iter [400/500] Loss: 1.9400
  422.  
  423. Epoch [24/80], Iter [500/500] Loss: 2.3550
  424.  
  425. Epoch [24/80], Iter [600/500] Loss: 1.6304
  426.  
  427. Epoch [24/80], Iter [700/500] Loss: 1.1287
  428.  
  429. Epoch [24/80], Iter [800/500] Loss: 2.1436
  430.  
  431. Epoch [24/80], Iter [900/500] Loss: 1.3761
  432.  
  433. Epoch [25/80], Iter [100/500] Loss: 1.9115
  434.  
  435. Epoch [25/80], Iter [200/500] Loss: 0.9423
  436.  
  437. Epoch [25/80], Iter [300/500] Loss: 1.1732
  438.  
  439. Epoch [25/80], Iter [400/500] Loss: 1.8946
  440.  
  441. Epoch [25/80], Iter [500/500] Loss: 1.4359
  442.  
  443. Epoch [25/80], Iter [600/500] Loss: 2.7499
  444.  
  445. Epoch [25/80], Iter [700/500] Loss: 3.2734
  446.  
  447. Epoch [25/80], Iter [800/500] Loss: 1.5863
  448.  
  449. Epoch [25/80], Iter [900/500] Loss: 2.8276
  450.  
  451. Epoch [26/80], Iter [100/500] Loss: 3.3783
  452.  
  453. Epoch [26/80], Iter [200/500] Loss: 1.6336
  454.  
  455. Epoch [26/80], Iter [300/500] Loss: 1.8298
  456.  
  457. Epoch [26/80], Iter [400/500] Loss: 1.1775
  458.  
  459. Epoch [26/80], Iter [500/500] Loss: 2.5811
  460.  
  461. Epoch [26/80], Iter [600/500] Loss: 1.2587
  462.  
  463. Epoch [26/80], Iter [700/500] Loss: 2.3547
  464.  
  465. Epoch [26/80], Iter [800/500] Loss: 3.2238
  466.  
  467. Epoch [26/80], Iter [900/500] Loss: 1.8571
  468.  
  469. Epoch [27/80], Iter [100/500] Loss: 1.9582
  470.  
  471. Epoch [27/80], Iter [200/500] Loss: 0.8752
  472.  
  473. Epoch [27/80], Iter [300/500] Loss: 1.5140
  474.  
  475. Epoch [27/80], Iter [400/500] Loss: 1.4624
  476.  
  477. Epoch [27/80], Iter [500/500] Loss: 3.6735
  478.  
  479. Epoch [27/80], Iter [600/500] Loss: 2.5618
  480.  
  481. Epoch [27/80], Iter [700/500] Loss: 1.3707
  482.  
  483. Epoch [27/80], Iter [800/500] Loss: 1.2286
  484.  
  485. Epoch [27/80], Iter [900/500] Loss: 2.4623
  486.  
  487. Epoch [28/80], Iter [100/500] Loss: 0.8966
  488.  
  489. Epoch [28/80], Iter [200/500] Loss: 1.4363
  490.  
  491. Epoch [28/80], Iter [300/500] Loss: 1.3229
  492.  
  493. Epoch [28/80], Iter [400/500] Loss: 1.4402
  494.  
  495. Epoch [28/80], Iter [500/500] Loss: 1.4920
  496.  
  497. Epoch [28/80], Iter [600/500] Loss: 1.9604
  498.  
  499. Epoch [28/80], Iter [700/500] Loss: 3.1165
  500.  
  501. Epoch [28/80], Iter [800/500] Loss: 1.0391
  502.  
  503. Epoch [28/80], Iter [900/500] Loss: 2.5201
  504.  
  505. Epoch [29/80], Iter [100/500] Loss: 1.8787
  506.  
  507. Epoch [29/80], Iter [200/500] Loss: 0.9840
  508.  
  509. Epoch [29/80], Iter [300/500] Loss: 1.4460
  510.  
  511. Epoch [29/80], Iter [400/500] Loss: 2.2886
  512.  
  513. Epoch [29/80], Iter [500/500] Loss: 1.4231
  514.  
  515. Epoch [29/80], Iter [600/500] Loss: 1.4980
  516.  
  517. Epoch [29/80], Iter [700/500] Loss: 2.3995
  518.  
  519. Epoch [29/80], Iter [800/500] Loss: 1.7662
  520.  
  521. Epoch [29/80], Iter [900/500] Loss: 2.3659
  522.  
  523. Epoch [30/80], Iter [100/500] Loss: 1.9505
  524.  
  525. Epoch [30/80], Iter [200/500] Loss: 1.1663
  526.  
  527. Epoch [30/80], Iter [300/500] Loss: 0.9471
  528.  
  529. Epoch [30/80], Iter [400/500] Loss: 0.9364
  530.  
  531. Epoch [30/80], Iter [500/500] Loss: 1.0124
  532.  
  533. Epoch [30/80], Iter [600/500] Loss: 1.2437
  534.  
  535. Epoch [30/80], Iter [700/500] Loss: 0.8796
  536.  
  537. Epoch [30/80], Iter [800/500] Loss: 1.2183
  538.  
  539. Epoch [30/80], Iter [900/500] Loss: 2.3959
  540.  
  541. Epoch [31/80], Iter [100/500] Loss: 1.4337
  542.  
  543. Epoch [31/80], Iter [200/500] Loss: 1.1861
  544.  
  545. Epoch [31/80], Iter [300/500] Loss: 1.2915
  546.  
  547. Epoch [31/80], Iter [400/500] Loss: 1.0188
  548.  
  549. Epoch [31/80], Iter [500/500] Loss: 2.2067
  550.  
  551. Epoch [31/80], Iter [600/500] Loss: 2.6476
  552.  
  553. Epoch [31/80], Iter [700/500] Loss: 1.1402
  554.  
  555. Epoch [31/80], Iter [800/500] Loss: 1.4248
  556.  
  557. Epoch [31/80], Iter [900/500] Loss: 1.0669
  558.  
  559. Epoch [32/80], Iter [100/500] Loss: 1.5955
  560.  
  561. Epoch [32/80], Iter [200/500] Loss: 1.7216
  562.  
  563. Epoch [32/80], Iter [300/500] Loss: 1.2304
  564.  
  565. Epoch [32/80], Iter [400/500] Loss: 1.7058
  566.  
  567. Epoch [32/80], Iter [500/500] Loss: 1.2115
  568.  
  569. Epoch [32/80], Iter [600/500] Loss: 1.6176
  570.  
  571. Epoch [32/80], Iter [700/500] Loss: 1.3043
  572.  
  573. Epoch [32/80], Iter [800/500] Loss: 1.9501
  574.  
  575. Epoch [32/80], Iter [900/500] Loss: 1.9035
  576.  
  577. Epoch [33/80], Iter [100/500] Loss: 1.9505
  578.  
  579. Epoch [33/80], Iter [200/500] Loss: 1.5603
  580.  
  581. Epoch [33/80], Iter [300/500] Loss: 1.5528
  582.  
  583. Epoch [33/80], Iter [400/500] Loss: 1.4192
  584.  
  585. Epoch [33/80], Iter [500/500] Loss: 1.2211
  586.  
  587. Epoch [33/80], Iter [600/500] Loss: 1.3927
  588.  
  589. Epoch [33/80], Iter [700/500] Loss: 2.3885
  590.  
  591. Epoch [33/80], Iter [800/500] Loss: 1.0948
  592.  
  593. Epoch [33/80], Iter [900/500] Loss: 1.6951
  594.  
  595. Epoch [34/80], Iter [100/500] Loss: 0.9534
  596.  
  597. Epoch [34/80], Iter [200/500] Loss: 0.7364
  598.  
  599. Epoch [34/80], Iter [300/500] Loss: 1.2372
  600.  
  601. Epoch [34/80], Iter [400/500] Loss: 1.6718
  602.  
  603. Epoch [34/80], Iter [500/500] Loss: 0.7804
  604.  
  605. Epoch [34/80], Iter [600/500] Loss: 2.1848
  606.  
  607. Epoch [34/80], Iter [700/500] Loss: 0.6333
  608.  
  609. Epoch [34/80], Iter [800/500] Loss: 1.6399
  610.  
  611. Epoch [34/80], Iter [900/500] Loss: 0.9555
  612.  
  613. Epoch [35/80], Iter [100/500] Loss: 1.5851
  614.  
  615. Epoch [35/80], Iter [200/500] Loss: 3.7824
  616.  
  617. Epoch [35/80], Iter [300/500] Loss: 2.5642
  618.  
  619. Epoch [35/80], Iter [400/500] Loss: 0.8965
  620.  
  621. Epoch [35/80], Iter [500/500] Loss: 1.9092
  622.  
  623. Epoch [35/80], Iter [600/500] Loss: 1.3729
  624.  
  625. Epoch [35/80], Iter [700/500] Loss: 2.2079
  626.  
  627. Epoch [35/80], Iter [800/500] Loss: 0.9051
  628.  
  629. Epoch [35/80], Iter [900/500] Loss: 1.1845
  630.  
  631. Epoch [36/80], Iter [100/500] Loss: 0.8240
  632.  
  633. Epoch [36/80], Iter [200/500] Loss: 1.1929
  634.  
  635. Epoch [36/80], Iter [300/500] Loss: 1.7051
  636.  
  637. Epoch [36/80], Iter [400/500] Loss: 0.7341
  638.  
  639. Epoch [36/80], Iter [500/500] Loss: 0.8078
  640.  
  641. Epoch [36/80], Iter [600/500] Loss: 0.7525
  642.  
  643. Epoch [36/80], Iter [700/500] Loss: 1.5739
  644.  
  645. Epoch [36/80], Iter [800/500] Loss: 1.3938
  646.  
  647. Epoch [36/80], Iter [900/500] Loss: 0.7145
  648.  
  649. Epoch [37/80], Iter [100/500] Loss: 0.9577
  650.  
  651. Epoch [37/80], Iter [200/500] Loss: 0.9464
  652.  
  653. Epoch [37/80], Iter [300/500] Loss: 1.0931
  654.  
  655. Epoch [37/80], Iter [400/500] Loss: 1.0390
  656.  
  657. Epoch [37/80], Iter [500/500] Loss: 1.3472
  658.  
  659. Epoch [37/80], Iter [600/500] Loss: 0.6312
  660.  
  661. Epoch [37/80], Iter [700/500] Loss: 0.6754
  662.  
  663. Epoch [37/80], Iter [800/500] Loss: 0.5888
  664.  
  665. Epoch [37/80], Iter [900/500] Loss: 3.1377
  666.  
  667. Epoch [38/80], Iter [100/500] Loss: 0.8339
  668.  
  669. Epoch [38/80], Iter [200/500] Loss: 0.9345
  670.  
  671. Epoch [38/80], Iter [300/500] Loss: 0.6615
  672.  
  673. Epoch [38/80], Iter [400/500] Loss: 1.6327
  674.  
  675. Epoch [38/80], Iter [500/500] Loss: 0.4701
  676.  
  677. Epoch [38/80], Iter [600/500] Loss: 1.1513
  678.  
  679. Epoch [38/80], Iter [700/500] Loss: 0.9013
  680.  
  681. Epoch [38/80], Iter [800/500] Loss: 2.7680
  682.  
  683. Epoch [38/80], Iter [900/500] Loss: 1.2733
  684.  
  685. Epoch [39/80], Iter [100/500] Loss: 3.0368
  686.  
  687. Epoch [39/80], Iter [200/500] Loss: 1.5569
  688.  
  689. Epoch [39/80], Iter [300/500] Loss: 0.5049
  690.  
  691. Epoch [39/80], Iter [400/500] Loss: 0.4075
  692.  
  693. Epoch [39/80], Iter [500/500] Loss: 0.9771
  694.  
  695. Epoch [39/80], Iter [600/500] Loss: 0.9003
  696.  
  697. Epoch [39/80], Iter [700/500] Loss: 1.6323
  698.  
  699. Epoch [39/80], Iter [800/500] Loss: 0.4881
  700.  
  701. Epoch [39/80], Iter [900/500] Loss: 2.1344
  702.  
  703. Epoch [40/80], Iter [100/500] Loss: 1.2439
  704.  
  705. Epoch [40/80], Iter [200/500] Loss: 1.3419
  706.  
  707. Epoch [40/80], Iter [300/500] Loss: 0.9575
  708.  
  709. Epoch [40/80], Iter [400/500] Loss: 1.4438
  710.  
  711. Epoch [40/80], Iter [500/500] Loss: 0.8559
  712.  
  713. Epoch [40/80], Iter [600/500] Loss: 1.0400
  714.  
  715. Epoch [40/80], Iter [700/500] Loss: 0.9063
  716.  
  717. Epoch [40/80], Iter [800/500] Loss: 1.0714
  718.  
  719. Epoch [40/80], Iter [900/500] Loss: 0.5098
  720.  
  721. Epoch [41/80], Iter [100/500] Loss: 0.5906
  722.  
  723. Epoch [41/80], Iter [200/500] Loss: 0.6610
  724.  
  725. Epoch [41/80], Iter [300/500] Loss: 0.4230
  726.  
  727. Epoch [41/80], Iter [400/500] Loss: 0.6014
  728.  
  729. Epoch [41/80], Iter [500/500] Loss: 0.3004
  730.  
  731. Epoch [41/80], Iter [600/500] Loss: 0.5606
  732.  
  733. Epoch [41/80], Iter [700/500] Loss: 0.4994
  734.  
  735. Epoch [41/80], Iter [800/500] Loss: 0.8664
  736.  
  737. Epoch [41/80], Iter [900/500] Loss: 0.5302
  738.  
  739. Epoch [42/80], Iter [100/500] Loss: 0.2961
  740.  
  741. Epoch [42/80], Iter [200/500] Loss: 0.2826
  742.  
  743. Epoch [42/80], Iter [300/500] Loss: 0.3575
  744.  
  745. Epoch [42/80], Iter [400/500] Loss: 0.3224
  746.  
  747. Epoch [42/80], Iter [500/500] Loss: 0.6851
  748.  
  749. Epoch [42/80], Iter [600/500] Loss: 0.2997
  750.  
  751. Epoch [42/80], Iter [700/500] Loss: 0.3907
  752.  
  753. Epoch [42/80], Iter [800/500] Loss: 0.4437
  754.  
  755. Epoch [42/80], Iter [900/500] Loss: 0.4847
  756.  
  757. Epoch [43/80], Iter [100/500] Loss: 0.5418
  758.  
  759. Epoch [43/80], Iter [200/500] Loss: 0.4099
  760.  
  761. Epoch [43/80], Iter [300/500] Loss: 0.3339
  762.  
  763. Epoch [43/80], Iter [400/500] Loss: 0.5546
  764.  
  765. Epoch [43/80], Iter [500/500] Loss: 0.5867
  766.  
  767. Epoch [43/80], Iter [600/500] Loss: 0.3540
  768.  
  769. Epoch [43/80], Iter [700/500] Loss: 0.4656
  770.  
  771. Epoch [43/80], Iter [800/500] Loss: 0.2922
  772.  
  773. Epoch [43/80], Iter [900/500] Loss: 0.3042
  774.  
  775. Epoch [44/80], Iter [100/500] Loss: 0.6309
  776.  
  777. Epoch [44/80], Iter [200/500] Loss: 0.2412
  778.  
  779. Epoch [44/80], Iter [300/500] Loss: 0.5505
  780.  
  781. Epoch [44/80], Iter [400/500] Loss: 0.4133
  782.  
  783. Epoch [44/80], Iter [500/500] Loss: 0.4317
  784.  
  785. Epoch [44/80], Iter [600/500] Loss: 0.4152
  786.  
  787. Epoch [44/80], Iter [700/500] Loss: 0.6375
  788.  
  789. Epoch [44/80], Iter [800/500] Loss: 0.3283
  790.  
  791. Epoch [44/80], Iter [900/500] Loss: 0.4399
  792.  
  793. Epoch [45/80], Iter [100/500] Loss: 0.2777
  794.  
  795. Epoch [45/80], Iter [200/500] Loss: 0.3131
  796.  
  797. Epoch [45/80], Iter [300/500] Loss: 0.2451
  798.  
  799. Epoch [45/80], Iter [400/500] Loss: 0.5350
  800.  
  801. Epoch [45/80], Iter [500/500] Loss: 0.2501
  802.  
  803. Epoch [45/80], Iter [600/500] Loss: 0.2076
  804.  
  805. Epoch [45/80], Iter [700/500] Loss: 0.2317
  806.  
  807. Epoch [45/80], Iter [800/500] Loss: 0.8772
  808.  
  809. Epoch [45/80], Iter [900/500] Loss: 0.4162
  810.  
  811. Epoch [46/80], Iter [100/500] Loss: 0.3190
  812.  
  813. Epoch [46/80], Iter [200/500] Loss: 0.2458
  814.  
  815. Epoch [46/80], Iter [300/500] Loss: 0.2976
  816.  
  817. Epoch [46/80], Iter [400/500] Loss: 0.3712
  818.  
  819. Epoch [46/80], Iter [500/500] Loss: 0.4305
  820.  
  821. Epoch [46/80], Iter [600/500] Loss: 0.5143
  822.  
  823. Epoch [46/80], Iter [700/500] Loss: 0.2622
  824.  
  825. Epoch [46/80], Iter [800/500] Loss: 0.5331
  826.  
  827. Epoch [46/80], Iter [900/500] Loss: 0.3598
  828.  
  829. Epoch [47/80], Iter [100/500] Loss: 0.2180
  830.  
  831. Epoch [47/80], Iter [200/500] Loss: 0.2275
  832.  
  833. Epoch [47/80], Iter [300/500] Loss: 0.5302
  834.  
  835. Epoch [47/80], Iter [400/500] Loss: 0.3535
  836.  
  837. Epoch [47/80], Iter [500/500] Loss: 0.5790
  838.  
  839. Epoch [47/80], Iter [600/500] Loss: 0.3741
  840.  
  841. Epoch [47/80], Iter [700/500] Loss: 0.5120
  842.  
  843. Epoch [47/80], Iter [800/500] Loss: 0.6204
  844.  
  845. Epoch [47/80], Iter [900/500] Loss: 0.4902
  846.  
  847. Epoch [48/80], Iter [100/500] Loss: 0.2668
  848.  
  849. Epoch [48/80], Iter [200/500] Loss: 0.5693
  850.  
  851. Epoch [48/80], Iter [300/500] Loss: 0.3328
  852.  
  853. Epoch [48/80], Iter [400/500] Loss: 0.2399
  854.  
  855. Epoch [48/80], Iter [500/500] Loss: 0.3160
  856.  
  857. Epoch [48/80], Iter [600/500] Loss: 0.2944
  858.  
  859. Epoch [48/80], Iter [700/500] Loss: 0.2742
  860.  
  861. Epoch [48/80], Iter [800/500] Loss: 0.5297
  862.  
  863. Epoch [48/80], Iter [900/500] Loss: 0.3755
  864.  
  865. Epoch [49/80], Iter [100/500] Loss: 0.2658
  866.  
  867. Epoch [49/80], Iter [200/500] Loss: 0.2223
  868.  
  869. Epoch [49/80], Iter [300/500] Loss: 0.4348
  870.  
  871. Epoch [49/80], Iter [400/500] Loss: 0.2313
  872.  
  873. Epoch [49/80], Iter [500/500] Loss: 0.2838
  874.  
  875. Epoch [49/80], Iter [600/500] Loss: 0.3415
  876.  
  877. Epoch [49/80], Iter [700/500] Loss: 0.3633
  878.  
  879. Epoch [49/80], Iter [800/500] Loss: 0.3768
  880.  
  881. Epoch [49/80], Iter [900/500] Loss: 0.5177
  882.  
  883. Epoch [50/80], Iter [100/500] Loss: 0.3538
  884.  
  885. Epoch [50/80], Iter [200/500] Loss: 0.2759
  886.  
  887. Epoch [50/80], Iter [300/500] Loss: 0.2255
  888.  
  889. Epoch [50/80], Iter [400/500] Loss: 0.3148
  890.  
  891. Epoch [50/80], Iter [500/500] Loss: 0.4502
  892.  
  893. Epoch [50/80], Iter [600/500] Loss: 0.3382
  894.  
  895. Epoch [50/80], Iter [700/500] Loss: 0.8207
  896.  
  897. Epoch [50/80], Iter [800/500] Loss: 0.3541
  898.  
  899. Epoch [50/80], Iter [900/500] Loss: 0.4090
  900.  
  901. ('time used:', 17124.861335999998)

未被DaraParallel初始化

  1. Epoch [1/80], Iter [100/500] Loss: 635.6779
  2.  
  3. Epoch [1/80], Iter [200/500] Loss: 247.5514
  4.  
  5. Epoch [1/80], Iter [300/500] Loss: 231.7609
  6.  
  7. Epoch [1/80], Iter [400/500] Loss: 198.7304
  8.  
  9. Epoch [1/80], Iter [500/500] Loss: 207.1028
  10.  
  11. Epoch [1/80], Iter [600/500] Loss: 114.7708
  12.  
  13. Epoch [1/80], Iter [700/500] Loss: 126.9886
  14.  
  15. Epoch [1/80], Iter [800/500] Loss: 160.8622
  16.  
  17. Epoch [1/80], Iter [900/500] Loss: 153.8121
  18.  
  19. Epoch [2/80], Iter [100/500] Loss: 106.6578
  20.  
  21. Epoch [2/80], Iter [200/500] Loss: 91.5044
  22.  
  23. Epoch [2/80], Iter [300/500] Loss: 111.4231
  24.  
  25. Epoch [2/80], Iter [400/500] Loss: 50.7004
  26.  
  27. Epoch [2/80], Iter [500/500] Loss: 58.9242
  28.  
  29. Epoch [2/80], Iter [600/500] Loss: 55.2035
  30.  
  31. Epoch [2/80], Iter [700/500] Loss: 26.7637
  32.  
  33. Epoch [2/80], Iter [800/500] Loss: 52.5472
  34.  
  35. Epoch [2/80], Iter [900/500] Loss: 51.7907
  36.  
  37. Epoch [3/80], Iter [100/500] Loss: 35.7970
  38.  
  39. Epoch [3/80], Iter [200/500] Loss: 59.1204
  40.  
  41. Epoch [3/80], Iter [300/500] Loss: 70.5727
  42.  
  43. Epoch [3/80], Iter [400/500] Loss: 50.1149
  44.  
  45. Epoch [3/80], Iter [500/500] Loss: 26.3628
  46.  
  47. Epoch [3/80], Iter [600/500] Loss: 67.3355
  48.  
  49. Epoch [3/80], Iter [700/500] Loss: 56.8271
  50.  
  51. Epoch [3/80], Iter [800/500] Loss: 46.5803
  52.  
  53. Epoch [3/80], Iter [900/500] Loss: 34.9568
  54.  
  55. Epoch [4/80], Iter [100/500] Loss: 67.0837
  56.  
  57. Epoch [4/80], Iter [200/500] Loss: 36.8596
  58.  
  59. Epoch [4/80], Iter [300/500] Loss: 37.6830
  60.  
  61. Epoch [4/80], Iter [400/500] Loss: 52.1378
  62.  
  63. Epoch [4/80], Iter [500/500] Loss: 104.5909
  64.  
  65. Epoch [4/80], Iter [600/500] Loss: 71.3509
  66.  
  67. Epoch [4/80], Iter [700/500] Loss: 28.4496
  68.  
  69. Epoch [4/80], Iter [800/500] Loss: 56.1399
  70.  
  71. Epoch [4/80], Iter [900/500] Loss: 58.7510
  72.  
  73. Epoch [5/80], Iter [100/500] Loss: 42.5710
  74.  
  75. Epoch [5/80], Iter [200/500] Loss: 25.5430
  76.  
  77. Epoch [5/80], Iter [300/500] Loss: 25.9271
  78.  
  79. Epoch [5/80], Iter [400/500] Loss: 75.8942
  80.  
  81. Epoch [5/80], Iter [500/500] Loss: 70.6782
  82.  
  83. Epoch [5/80], Iter [600/500] Loss: 10.7801
  84.  
  85. Epoch [5/80], Iter [700/500] Loss: 29.9416
  86.  
  87. Epoch [5/80], Iter [800/500] Loss: 47.0781
  88.  
  89. Epoch [5/80], Iter [900/500] Loss: 45.4692
  90.  
  91. Epoch [6/80], Iter [100/500] Loss: 51.3811
  92.  
  93. Epoch [6/80], Iter [200/500] Loss: 30.6207
  94.  
  95. Epoch [6/80], Iter [300/500] Loss: 35.4928
  96.  
  97. Epoch [6/80], Iter [400/500] Loss: 37.9467
  98.  
  99. Epoch [6/80], Iter [500/500] Loss: 36.7505
  100.  
  101. Epoch [6/80], Iter [600/500] Loss: 64.3528
  102.  
  103. Epoch [6/80], Iter [700/500] Loss: 73.6308
  104.  
  105. Epoch [6/80], Iter [800/500] Loss: 33.1290
  106.  
  107. Epoch [6/80], Iter [900/500] Loss: 34.2442
  108.  
  109. Epoch [7/80], Iter [100/500] Loss: 34.9157
  110.  
  111. Epoch [7/80], Iter [200/500] Loss: 26.8041
  112.  
  113. Epoch [7/80], Iter [300/500] Loss: 43.5796
  114.  
  115. Epoch [7/80], Iter [400/500] Loss: 31.5104
  116.  
  117. Epoch [7/80], Iter [500/500] Loss: 41.2132
  118.  
  119. Epoch [7/80], Iter [600/500] Loss: 23.1634
  120.  
  121. Epoch [7/80], Iter [700/500] Loss: 26.7399
  122.  
  123. Epoch [7/80], Iter [800/500] Loss: 60.4979
  124.  
  125. Epoch [7/80], Iter [900/500] Loss: 32.8528
  126.  
  127. Epoch [8/80], Iter [100/500] Loss: 36.6079
  128.  
  129. Epoch [8/80], Iter [200/500] Loss: 49.1552
  130.  
  131. Epoch [8/80], Iter [300/500] Loss: 21.2926
  132.  
  133. Epoch [8/80], Iter [400/500] Loss: 33.5335
  134.  
  135. Epoch [8/80], Iter [500/500] Loss: 50.1770
  136.  
  137. Epoch [8/80], Iter [600/500] Loss: 21.9908
  138.  
  139. Epoch [8/80], Iter [700/500] Loss: 40.2040
  140.  
  141. Epoch [8/80], Iter [800/500] Loss: 22.5460
  142.  
  143. Epoch [8/80], Iter [900/500] Loss: 43.9564
  144.  
  145. Epoch [9/80], Iter [100/500] Loss: 19.8116
  146.  
  147. Epoch [9/80], Iter [200/500] Loss: 8.5169
  148.  
  149. Epoch [9/80], Iter [300/500] Loss: 37.0475
  150.  
  151. Epoch [9/80], Iter [400/500] Loss: 74.2606
  152.  
  153. Epoch [9/80], Iter [500/500] Loss: 16.3256
  154.  
  155. Epoch [9/80], Iter [600/500] Loss: 26.0609
  156.  
  157. Epoch [9/80], Iter [700/500] Loss: 24.3721
  158.  
  159. Epoch [9/80], Iter [800/500] Loss: 37.5132
  160.  
  161. Epoch [9/80], Iter [900/500] Loss: 27.4818
  162.  
  163. Epoch [10/80], Iter [100/500] Loss: 11.7654
  164.  
  165. Epoch [10/80], Iter [200/500] Loss: 9.3536
  166.  
  167. Epoch [10/80], Iter [300/500] Loss: 11.6718
  168.  
  169. Epoch [10/80], Iter [400/500] Loss: 24.4423
  170.  
  171. Epoch [10/80], Iter [500/500] Loss: 25.6966
  172.  
  173. Epoch [10/80], Iter [600/500] Loss: 35.2358
  174.  
  175. Epoch [10/80], Iter [700/500] Loss: 17.2685
  176.  
  177. Epoch [10/80], Iter [800/500] Loss: 22.3965
  178.  
  179. Epoch [10/80], Iter [900/500] Loss: 42.6901
  180.  
  181. Epoch [11/80], Iter [100/500] Loss: 17.9832
  182.  
  183. Epoch [11/80], Iter [200/500] Loss: 18.8705
  184.  
  185. Epoch [11/80], Iter [300/500] Loss: 25.3700
  186.  
  187. Epoch [11/80], Iter [400/500] Loss: 10.8511
  188.  
  189. Epoch [11/80], Iter [500/500] Loss: 18.3028
  190.  
  191. Epoch [11/80], Iter [600/500] Loss: 23.2316
  192.  
  193. Epoch [11/80], Iter [700/500] Loss: 10.2498
  194.  
  195. Epoch [11/80], Iter [800/500] Loss: 14.7609
  196.  
  197. Epoch [11/80], Iter [900/500] Loss: 20.1801
  198.  
  199. Epoch [12/80], Iter [100/500] Loss: 23.8675
  200.  
  201. Epoch [12/80], Iter [200/500] Loss: 15.7924
  202.  
  203. Epoch [12/80], Iter [300/500] Loss: 13.7092
  204.  
  205. Epoch [12/80], Iter [400/500] Loss: 12.0196
  206.  
  207. Epoch [12/80], Iter [500/500] Loss: 7.2408
  208.  
  209. Epoch [12/80], Iter [600/500] Loss: 10.7912
  210.  
  211. Epoch [12/80], Iter [700/500] Loss: 11.9665
  212.  
  213. Epoch [12/80], Iter [800/500] Loss: 13.7599
  214.  
  215. Epoch [12/80], Iter [900/500] Loss: 18.3869
  216.  
  217. Epoch [13/80], Iter [100/500] Loss: 11.1715
  218.  
  219. Epoch [13/80], Iter [200/500] Loss: 17.6397
  220.  
  221. Epoch [13/80], Iter [300/500] Loss: 9.3256
  222.  
  223. Epoch [13/80], Iter [400/500] Loss: 12.7995
  224.  
  225. Epoch [13/80], Iter [500/500] Loss: 7.8598
  226.  
  227. Epoch [13/80], Iter [600/500] Loss: 10.7001
  228.  
  229. Epoch [13/80], Iter [700/500] Loss: 26.3672
  230.  
  231. Epoch [13/80], Iter [800/500] Loss: 15.4815
  232.  
  233. Epoch [13/80], Iter [900/500] Loss: 14.0478
  234.  
  235. Epoch [14/80], Iter [100/500] Loss: 16.0473
  236.  
  237. Epoch [14/80], Iter [200/500] Loss: 4.7192
  238.  
  239. Epoch [14/80], Iter [300/500] Loss: 10.7586
  240.  
  241. Epoch [14/80], Iter [400/500] Loss: 13.6734
  242.  
  243. Epoch [14/80], Iter [500/500] Loss: 9.3228
  244.  
  245. Epoch [14/80], Iter [600/500] Loss: 5.5830
  246.  
  247. Epoch [14/80], Iter [700/500] Loss: 7.5252
  248.  
  249. Epoch [14/80], Iter [800/500] Loss: 7.6239
  250.  
  251. Epoch [14/80], Iter [900/500] Loss: 7.1024
  252.  
  253. Epoch [15/80], Iter [100/500] Loss: 17.5188
  254.  
  255. Epoch [15/80], Iter [200/500] Loss: 11.8842
  256.  
  257. Epoch [15/80], Iter [300/500] Loss: 9.0330
  258.  
  259. Epoch [15/80], Iter [400/500] Loss: 11.7120
  260.  
  261. Epoch [15/80], Iter [500/500] Loss: 17.0862
  262.  
  263. Epoch [15/80], Iter [600/500] Loss: 11.4103
  264.  
  265. Epoch [15/80], Iter [700/500] Loss: 12.2746
  266.  
  267. Epoch [15/80], Iter [800/500] Loss: 13.6224
  268.  
  269. Epoch [15/80], Iter [900/500] Loss: 12.7686
  270.  
  271. Epoch [16/80], Iter [100/500] Loss: 5.5978
  272.  
  273. Epoch [16/80], Iter [200/500] Loss: 12.2122
  274.  
  275. Epoch [16/80], Iter [300/500] Loss: 5.1189
  276.  
  277. Epoch [16/80], Iter [400/500] Loss: 14.1793
  278.  
  279. Epoch [16/80], Iter [500/500] Loss: 10.3744
  280.  
  281. Epoch [16/80], Iter [600/500] Loss: 5.2099
  282.  
  283. Epoch [16/80], Iter [700/500] Loss: 6.7522
  284.  
  285. Epoch [16/80], Iter [800/500] Loss: 13.2532
  286.  
  287. Epoch [16/80], Iter [900/500] Loss: 6.7040
  288.  
  289. Epoch [17/80], Iter [100/500] Loss: 10.7390
  290.  
  291. Epoch [17/80], Iter [200/500] Loss: 8.1525
  292.  
  293. Epoch [17/80], Iter [300/500] Loss: 14.2229
  294.  
  295. Epoch [17/80], Iter [400/500] Loss: 7.6302
  296.  
  297. Epoch [17/80], Iter [500/500] Loss: 6.4554
  298.  
  299. Epoch [17/80], Iter [600/500] Loss: 8.2380
  300.  
  301. Epoch [17/80], Iter [700/500] Loss: 6.4445
  302.  
  303. Epoch [17/80], Iter [800/500] Loss: 8.4644
  304.  
  305. Epoch [17/80], Iter [900/500] Loss: 9.0200
  306.  
  307. Epoch [18/80], Iter [100/500] Loss: 9.5088
  308.  
  309. Epoch [18/80], Iter [200/500] Loss: 3.8648
  310.  
  311. Epoch [18/80], Iter [300/500] Loss: 8.8408
  312.  
  313. Epoch [18/80], Iter [400/500] Loss: 7.4195
  314.  
  315. Epoch [18/80], Iter [500/500] Loss: 15.0480
  316.  
  317. Epoch [18/80], Iter [600/500] Loss: 5.6232
  318.  
  319. Epoch [18/80], Iter [700/500] Loss: 5.2233
  320.  
  321. Epoch [18/80], Iter [800/500] Loss: 6.5702
  322.  
  323. Epoch [18/80], Iter [900/500] Loss: 13.7427
  324.  
  325. Epoch [19/80], Iter [100/500] Loss: 3.5658
  326.  
  327. Epoch [19/80], Iter [200/500] Loss: 4.7062
  328.  
  329. Epoch [19/80], Iter [300/500] Loss: 10.7831
  330.  
  331. Epoch [19/80], Iter [400/500] Loss: 13.1375
  332.  
  333. Epoch [19/80], Iter [500/500] Loss: 22.2764
  334.  
  335. Epoch [19/80], Iter [600/500] Loss: 10.3463
  336.  
  337. Epoch [19/80], Iter [700/500] Loss: 7.2373
  338.  
  339. Epoch [19/80], Iter [800/500] Loss: 5.5266
  340.  
  341. Epoch [19/80], Iter [900/500] Loss: 9.2434
  342.  
  343. Epoch [20/80], Iter [100/500] Loss: 7.8164
  344.  
  345. Epoch [20/80], Iter [200/500] Loss: 9.6628
  346.  
  347. Epoch [20/80], Iter [300/500] Loss: 4.1032
  348.  
  349. Epoch [20/80], Iter [400/500] Loss: 16.5922
  350.  
  351. Epoch [20/80], Iter [500/500] Loss: 6.9907
  352.  
  353. Epoch [20/80], Iter [600/500] Loss: 10.9906
  354.  
  355. Epoch [20/80], Iter [700/500] Loss: 8.5092
  356.  
  357. Epoch [20/80], Iter [800/500] Loss: 7.1332
  358.  
  359. Epoch [20/80], Iter [900/500] Loss: 6.1639
  360.  
  361. Epoch [21/80], Iter [100/500] Loss: 6.3100
  362.  
  363. Epoch [21/80], Iter [200/500] Loss: 4.5190
  364.  
  365. Epoch [21/80], Iter [300/500] Loss: 4.3493
  366.  
  367. Epoch [21/80], Iter [400/500] Loss: 7.9860
  368.  
  369. Epoch [21/80], Iter [500/500] Loss: 8.8312
  370.  
  371. Epoch [21/80], Iter [600/500] Loss: 10.7502
  372.  
  373. Epoch [21/80], Iter [700/500] Loss: 3.2116
  374.  
  375. Epoch [21/80], Iter [800/500] Loss: 4.0126
  376.  
  377. Epoch [21/80], Iter [900/500] Loss: 5.3675
  378.  
  379. Epoch [22/80], Iter [100/500] Loss: 1.4893
  380.  
  381. Epoch [22/80], Iter [200/500] Loss: 1.6984
  382.  
  383. Epoch [22/80], Iter [300/500] Loss: 2.6195
  384.  
  385. Epoch [22/80], Iter [400/500] Loss: 2.1465
  386.  
  387. Epoch [22/80], Iter [500/500] Loss: 2.9847
  388.  
  389. Epoch [22/80], Iter [600/500] Loss: 4.9699
  390.  
  391. Epoch [22/80], Iter [700/500] Loss: 1.6728
  392.  
  393. Epoch [22/80], Iter [800/500] Loss: 1.3381
  394.  
  395. Epoch [22/80], Iter [900/500] Loss: 2.0680
  396.  
  397. Epoch [23/80], Iter [100/500] Loss: 1.9145
  398.  
  399. Epoch [23/80], Iter [200/500] Loss: 0.9280
  400.  
  401. Epoch [23/80], Iter [300/500] Loss: 2.9585
  402.  
  403. Epoch [23/80], Iter [400/500] Loss: 1.0787
  404.  
  405. Epoch [23/80], Iter [500/500] Loss: 3.1779
  406.  
  407. Epoch [23/80], Iter [600/500] Loss: 2.4411
  408.  
  409. Epoch [23/80], Iter [700/500] Loss: 2.0049
  410.  
  411. Epoch [23/80], Iter [800/500] Loss: 2.2844
  412.  
  413. Epoch [23/80], Iter [900/500] Loss: 2.2328
  414.  
  415. Epoch [24/80], Iter [100/500] Loss: 1.5221
  416.  
  417. Epoch [24/80], Iter [200/500] Loss: 2.0100
  418.  
  419. Epoch [24/80], Iter [300/500] Loss: 1.8868
  420.  
  421. Epoch [24/80], Iter [400/500] Loss: 1.4898
  422.  
  423. Epoch [24/80], Iter [500/500] Loss: 1.1626
  424.  
  425. Epoch [24/80], Iter [600/500] Loss: 1.2527
  426.  
  427. Epoch [24/80], Iter [700/500] Loss: 1.3430
  428.  
  429. Epoch [24/80], Iter [800/500] Loss: 1.3355
  430.  
  431. Epoch [24/80], Iter [900/500] Loss: 1.8292
  432.  
  433. Epoch [25/80], Iter [100/500] Loss: 2.2471
  434.  
  435. Epoch [25/80], Iter [200/500] Loss: 2.8727
  436.  
  437. Epoch [25/80], Iter [300/500] Loss: 1.3531
  438.  
  439. Epoch [25/80], Iter [400/500] Loss: 1.1110
  440.  
  441. Epoch [25/80], Iter [500/500] Loss: 2.7648
  442.  
  443. Epoch [25/80], Iter [600/500] Loss: 1.8364
  444.  
  445. Epoch [25/80], Iter [700/500] Loss: 1.4299
  446.  
  447. Epoch [25/80], Iter [800/500] Loss: 1.5985
  448.  
  449. Epoch [25/80], Iter [900/500] Loss: 2.5364
  450.  
  451. Epoch [26/80], Iter [100/500] Loss: 2.6469
  452.  
  453. Epoch [26/80], Iter [200/500] Loss: 3.1215
  454.  
  455. Epoch [26/80], Iter [300/500] Loss: 1.4029
  456.  
  457. Epoch [26/80], Iter [400/500] Loss: 1.2688
  458.  
  459. Epoch [26/80], Iter [500/500] Loss: 2.4794
  460.  
  461. Epoch [26/80], Iter [600/500] Loss: 1.1937
  462.  
  463. Epoch [26/80], Iter [700/500] Loss: 1.0709
  464.  
  465. Epoch [26/80], Iter [800/500] Loss: 1.4961
  466.  
  467. Epoch [26/80], Iter [900/500] Loss: 1.4560
  468.  
  469. Epoch [27/80], Iter [100/500] Loss: 2.0633
  470.  
  471. Epoch [27/80], Iter [200/500] Loss: 2.6687
  472.  
  473. Epoch [27/80], Iter [300/500] Loss: 5.2073
  474.  
  475. Epoch [27/80], Iter [400/500] Loss: 2.2762
  476.  
  477. Epoch [27/80], Iter [500/500] Loss: 1.6105
  478.  
  479. Epoch [27/80], Iter [600/500] Loss: 1.6631
  480.  
  481. Epoch [27/80], Iter [700/500] Loss: 1.0523
  482.  
  483. Epoch [27/80], Iter [800/500] Loss: 2.8945
  484.  
  485. Epoch [27/80], Iter [900/500] Loss: 1.5388
  486.  
  487. Epoch [28/80], Iter [100/500] Loss: 1.6230
  488.  
  489. Epoch [28/80], Iter [200/500] Loss: 1.8003
  490.  
  491. Epoch [28/80], Iter [300/500] Loss: 1.4840
  492.  
  493. Epoch [28/80], Iter [400/500] Loss: 0.9465
  494.  
  495. Epoch [28/80], Iter [500/500] Loss: 1.6054
  496.  
  497. Epoch [28/80], Iter [600/500] Loss: 3.3669
  498.  
  499. Epoch [28/80], Iter [700/500] Loss: 1.4555
  500.  
  501. Epoch [28/80], Iter [800/500] Loss: 2.2903
  502.  
  503. Epoch [28/80], Iter [900/500] Loss: 1.2850
  504.  
  505. Epoch [29/80], Iter [100/500] Loss: 1.7152
  506.  
  507. Epoch [29/80], Iter [200/500] Loss: 1.2824
  508.  
  509. Epoch [29/80], Iter [300/500] Loss: 1.5778
  510.  
  511. Epoch [29/80], Iter [400/500] Loss: 3.1152
  512.  
  513. Epoch [29/80], Iter [500/500] Loss: 1.2492
  514.  
  515. Epoch [29/80], Iter [600/500] Loss: 0.9721
  516.  
  517. Epoch [29/80], Iter [700/500] Loss: 1.4465
  518.  
  519. Epoch [29/80], Iter [800/500] Loss: 0.9678
  520.  
  521. Epoch [29/80], Iter [900/500] Loss: 1.5000
  522.  
  523. Epoch [30/80], Iter [100/500] Loss: 1.5524
  524.  
  525. Epoch [30/80], Iter [200/500] Loss: 1.5233
  526.  
  527. Epoch [30/80], Iter [300/500] Loss: 1.4226
  528.  
  529. Epoch [30/80], Iter [400/500] Loss: 0.9432
  530.  
  531. Epoch [30/80], Iter [500/500] Loss: 1.4623
  532.  
  533. Epoch [30/80], Iter [600/500] Loss: 1.3845
  534.  
  535. Epoch [30/80], Iter [700/500] Loss: 1.3301
  536.  
  537. Epoch [30/80], Iter [800/500] Loss: 1.0105
  538.  
  539. Epoch [30/80], Iter [900/500] Loss: 1.8372
  540.  
  541. Epoch [31/80], Iter [100/500] Loss: 1.3019
  542.  
  543. Epoch [31/80], Iter [200/500] Loss: 1.1216
  544.  
  545. Epoch [31/80], Iter [300/500] Loss: 0.8553
  546.  
  547. Epoch [31/80], Iter [400/500] Loss: 1.6882
  548.  
  549. Epoch [31/80], Iter [500/500] Loss: 1.7691
  550.  
  551. Epoch [31/80], Iter [600/500] Loss: 1.7412
  552.  
  553. Epoch [31/80], Iter [700/500] Loss: 2.2204
  554.  
  555. Epoch [31/80], Iter [800/500] Loss: 0.6559
  556.  
  557. Epoch [31/80], Iter [900/500] Loss: 1.4613
  558.  
  559. Epoch [32/80], Iter [100/500] Loss: 1.1408
  560.  
  561. Epoch [32/80], Iter [200/500] Loss: 3.6378
  562.  
  563. Epoch [32/80], Iter [300/500] Loss: 1.5543
  564.  
  565. Epoch [32/80], Iter [400/500] Loss: 2.1538
  566.  
  567. Epoch [32/80], Iter [500/500] Loss: 1.1102
  568.  
  569. Epoch [32/80], Iter [600/500] Loss: 1.3187
  570.  
  571. Epoch [32/80], Iter [700/500] Loss: 0.7230
  572.  
  573. Epoch [32/80], Iter [800/500] Loss: 1.6149
  574.  
  575. Epoch [32/80], Iter [900/500] Loss: 1.0926
  576.  
  577. Epoch [33/80], Iter [100/500] Loss: 1.9460
  578.  
  579. Epoch [33/80], Iter [200/500] Loss: 0.9948
  580.  
  581. Epoch [33/80], Iter [300/500] Loss: 1.4460
  582.  
  583. Epoch [33/80], Iter [400/500] Loss: 1.5855
  584.  
  585. Epoch [33/80], Iter [500/500] Loss: 1.5834
  586.  
  587. Epoch [33/80], Iter [600/500] Loss: 0.8896
  588.  
  589. Epoch [33/80], Iter [700/500] Loss: 1.1927
  590.  
  591. Epoch [33/80], Iter [800/500] Loss: 1.5707
  592.  
  593. Epoch [33/80], Iter [900/500] Loss: 0.7817
  594.  
  595. Epoch [34/80], Iter [100/500] Loss: 0.9155
  596.  
  597. Epoch [34/80], Iter [200/500] Loss: 0.7930
  598.  
  599. Epoch [34/80], Iter [300/500] Loss: 1.2760
  600.  
  601. Epoch [34/80], Iter [400/500] Loss: 0.7170
  602.  
  603. Epoch [34/80], Iter [500/500] Loss: 1.9962
  604.  
  605. Epoch [34/80], Iter [600/500] Loss: 1.2418
  606.  
  607. Epoch [34/80], Iter [700/500] Loss: 1.4847
  608.  
  609. Epoch [34/80], Iter [800/500] Loss: 0.8495
  610.  
  611. Epoch [34/80], Iter [900/500] Loss: 1.3709
  612.  
  613. Epoch [35/80], Iter [100/500] Loss: 1.8495
  614.  
  615. Epoch [35/80], Iter [200/500] Loss: 0.9494
  616.  
  617. Epoch [35/80], Iter [300/500] Loss: 0.6224
  618.  
  619. Epoch [35/80], Iter [400/500] Loss: 0.5101
  620.  
  621. Epoch [35/80], Iter [500/500] Loss: 0.9373
  622.  
  623. Epoch [35/80], Iter [600/500] Loss: 1.5811
  624.  
  625. Epoch [35/80], Iter [700/500] Loss: 1.5295
  626.  
  627. Epoch [35/80], Iter [800/500] Loss: 0.7787
  628.  
  629. Epoch [35/80], Iter [900/500] Loss: 1.0337
  630.  
  631. Epoch [36/80], Iter [100/500] Loss: 0.6236
  632.  
  633. Epoch [36/80], Iter [200/500] Loss: 1.8516
  634.  
  635. Epoch [36/80], Iter [300/500] Loss: 1.5021
  636.  
  637. Epoch [36/80], Iter [400/500] Loss: 1.0459
  638.  
  639. Epoch [36/80], Iter [500/500] Loss: 1.4737
  640.  
  641. Epoch [36/80], Iter [600/500] Loss: 0.7842
  642.  
  643. Epoch [36/80], Iter [700/500] Loss: 1.6798
  644.  
  645. Epoch [36/80], Iter [800/500] Loss: 1.7413
  646.  
  647. Epoch [36/80], Iter [900/500] Loss: 0.6222
  648.  
  649. Epoch [37/80], Iter [100/500] Loss: 0.5713
  650.  
  651. Epoch [37/80], Iter [200/500] Loss: 1.3030
  652.  
  653. Epoch [37/80], Iter [300/500] Loss: 1.6937
  654.  
  655. Epoch [37/80], Iter [400/500] Loss: 0.8656
  656.  
  657. Epoch [37/80], Iter [500/500] Loss: 1.3340
  658.  
  659. Epoch [37/80], Iter [600/500] Loss: 0.6310
  660.  
  661. Epoch [37/80], Iter [700/500] Loss: 1.1445
  662.  
  663. Epoch [37/80], Iter [800/500] Loss: 0.6099
  664.  
  665. Epoch [37/80], Iter [900/500] Loss: 1.3679
  666.  
  667. Epoch [38/80], Iter [100/500] Loss: 0.9127
  668.  
  669. Epoch [38/80], Iter [200/500] Loss: 1.9450
  670.  
  671. Epoch [38/80], Iter [300/500] Loss: 1.2240
  672.  
  673. Epoch [38/80], Iter [400/500] Loss: 1.4049
  674.  
  675. Epoch [38/80], Iter [500/500] Loss: 0.9247
  676.  
  677. Epoch [38/80], Iter [600/500] Loss: 1.5308
  678.  
  679. Epoch [38/80], Iter [700/500] Loss: 1.9777
  680.  
  681. Epoch [38/80], Iter [800/500] Loss: 1.2109
  682.  
  683. Epoch [38/80], Iter [900/500] Loss: 0.8337
  684.  
  685. Epoch [39/80], Iter [100/500] Loss: 0.7904
  686.  
  687. Epoch [39/80], Iter [200/500] Loss: 0.8451
  688.  
  689. Epoch [39/80], Iter [300/500] Loss: 1.6993
  690.  
  691. Epoch [39/80], Iter [400/500] Loss: 1.2196
  692.  
  693. Epoch [39/80], Iter [500/500] Loss: 1.0665
  694.  
  695. Epoch [39/80], Iter [600/500] Loss: 0.7412
  696.  
  697. Epoch [39/80], Iter [700/500] Loss: 0.6486
  698.  
  699. Epoch [39/80], Iter [800/500] Loss: 1.5608
  700.  
  701. Epoch [39/80], Iter [900/500] Loss: 1.9978
  702.  
  703. Epoch [40/80], Iter [100/500] Loss: 1.7101
  704.  
  705. Epoch [40/80], Iter [200/500] Loss: 1.4484
  706.  
  707. Epoch [40/80], Iter [300/500] Loss: 1.5894
  708.  
  709. Epoch [40/80], Iter [400/500] Loss: 1.3371
  710.  
  711. Epoch [40/80], Iter [500/500] Loss: 0.9766
  712.  
  713. Epoch [40/80], Iter [600/500] Loss: 1.9935
  714.  
  715. Epoch [40/80], Iter [700/500] Loss: 2.0719
  716.  
  717. Epoch [40/80], Iter [800/500] Loss: 0.9455
  718.  
  719. Epoch [40/80], Iter [900/500] Loss: 0.8072
  720.  
  721. Epoch [41/80], Iter [100/500] Loss: 1.3899
  722.  
  723. Epoch [41/80], Iter [200/500] Loss: 0.9863
  724.  
  725. Epoch [41/80], Iter [300/500] Loss: 1.3738
  726.  
  727. Epoch [41/80], Iter [400/500] Loss: 0.6883
  728.  
  729. Epoch [41/80], Iter [500/500] Loss: 0.8442
  730.  
  731. Epoch [41/80], Iter [600/500] Loss: 2.0286
  732.  
  733. Epoch [41/80], Iter [700/500] Loss: 1.1960
  734.  
  735. Epoch [41/80], Iter [800/500] Loss: 1.2499
  736.  
  737. Epoch [41/80], Iter [900/500] Loss: 0.6043
  738.  
  739. Epoch [42/80], Iter [100/500] Loss: 0.3437
  740.  
  741. Epoch [42/80], Iter [200/500] Loss: 0.6596
  742.  
  743. Epoch [42/80], Iter [300/500] Loss: 0.4450
  744.  
  745. Epoch [42/80], Iter [400/500] Loss: 0.7189
  746.  
  747. Epoch [42/80], Iter [500/500] Loss: 0.5022
  748.  
  749. Epoch [42/80], Iter [600/500] Loss: 0.4597
  750.  
  751. Epoch [42/80], Iter [700/500] Loss: 0.7743
  752.  
  753. Epoch [42/80], Iter [800/500] Loss: 0.3344
  754.  
  755. Epoch [42/80], Iter [900/500] Loss: 0.7295
  756.  
  757. Epoch [43/80], Iter [100/500] Loss: 0.5074
  758.  
  759. Epoch [43/80], Iter [200/500] Loss: 0.3128
  760.  
  761. Epoch [43/80], Iter [300/500] Loss: 0.2800
  762.  
  763. Epoch [43/80], Iter [400/500] Loss: 0.3059
  764.  
  765. Epoch [43/80], Iter [500/500] Loss: 0.3486
  766.  
  767. Epoch [43/80], Iter [600/500] Loss: 0.7222
  768.  
  769. Epoch [43/80], Iter [700/500] Loss: 0.7349
  770.  
  771. Epoch [43/80], Iter [800/500] Loss: 0.8455
  772.  
  773. Epoch [43/80], Iter [900/500] Loss: 0.7261
  774.  
  775. Epoch [44/80], Iter [100/500] Loss: 0.5404
  776.  
  777. Epoch [44/80], Iter [200/500] Loss: 0.5428
  778.  
  779. Epoch [44/80], Iter [300/500] Loss: 0.5385
  780.  
  781. Epoch [44/80], Iter [400/500] Loss: 0.4106
  782.  
  783. Epoch [44/80], Iter [500/500] Loss: 0.5296
  784.  
  785. Epoch [44/80], Iter [600/500] Loss: 0.6045
  786.  
  787. Epoch [44/80], Iter [700/500] Loss: 0.3837
  788.  
  789. Epoch [44/80], Iter [800/500] Loss: 0.7552
  790.  
  791. Epoch [44/80], Iter [900/500] Loss: 0.4996
  792.  
  793. Epoch [45/80], Iter [100/500] Loss: 0.3381
  794.  
  795. Epoch [45/80], Iter [200/500] Loss: 0.3910
  796.  
  797. Epoch [45/80], Iter [300/500] Loss: 0.3790
  798.  
  799. Epoch [45/80], Iter [400/500] Loss: 0.2718
  800.  
  801. Epoch [45/80], Iter [500/500] Loss: 0.3572
  802.  
  803. Epoch [45/80], Iter [600/500] Loss: 0.2913
  804.  
  805. Epoch [45/80], Iter [700/500] Loss: 0.5244
  806.  
  807. Epoch [45/80], Iter [800/500] Loss: 0.3647
  808.  
  809. Epoch [45/80], Iter [900/500] Loss: 0.3161
  810.  
  811. Epoch [46/80], Iter [100/500] Loss: 0.4728
  812.  
  813. Epoch [46/80], Iter [200/500] Loss: 0.4386
  814.  
  815. Epoch [46/80], Iter [300/500] Loss: 0.2861
  816.  
  817. Epoch [46/80], Iter [400/500] Loss: 0.2460
  818.  
  819. Epoch [46/80], Iter [500/500] Loss: 0.3490
  820.  
  821. Epoch [46/80], Iter [600/500] Loss: 0.5804
  822.  
  823. Epoch [46/80], Iter [700/500] Loss: 0.4951
  824.  
  825. Epoch [46/80], Iter [800/500] Loss: 0.4600
  826.  
  827. Epoch [46/80], Iter [900/500] Loss: 0.5658
  828.  
  829. Epoch [47/80], Iter [100/500] Loss: 0.2479
  830.  
  831. Epoch [47/80], Iter [200/500] Loss: 0.2688
  832.  
  833. Epoch [47/80], Iter [300/500] Loss: 0.3082
  834.  
  835. Epoch [47/80], Iter [400/500] Loss: 0.3929
  836.  
  837. Epoch [47/80], Iter [500/500] Loss: 0.3126
  838.  
  839. Epoch [47/80], Iter [600/500] Loss: 0.5041
  840.  
  841. Epoch [47/80], Iter [700/500] Loss: 0.5848
  842.  
  843. Epoch [47/80], Iter [800/500] Loss: 0.4968
  844.  
  845. Epoch [47/80], Iter [900/500] Loss: 0.3496
  846.  
  847. Epoch [48/80], Iter [100/500] Loss: 0.2753
  848.  
  849. Epoch [48/80], Iter [200/500] Loss: 0.3885
  850.  
  851. Epoch [48/80], Iter [300/500] Loss: 0.3743
  852.  
  853. Epoch [48/80], Iter [400/500] Loss: 0.2425
  854.  
  855. Epoch [48/80], Iter [500/500] Loss: 0.2472
  856.  
  857. Epoch [48/80], Iter [600/500] Loss: 0.3003
  858.  
  859. Epoch [48/80], Iter [700/500] Loss: 0.4936
  860.  
  861. Epoch [48/80], Iter [800/500] Loss: 0.3169
  862.  
  863. Epoch [48/80], Iter [900/500] Loss: 0.2543
  864.  
  865. Epoch [49/80], Iter [100/500] Loss: 0.4262
  866.  
  867. Epoch [49/80], Iter [200/500] Loss: 0.3396
  868.  
  869. Epoch [49/80], Iter [300/500] Loss: 0.4670
  870.  
  871. Epoch [49/80], Iter [400/500] Loss: 0.2543
  872.  
  873. Epoch [49/80], Iter [500/500] Loss: 0.3146
  874.  
  875. Epoch [49/80], Iter [600/500] Loss: 1.3187
  876.  
  877. Epoch [49/80], Iter [700/500] Loss: 0.2993
  878.  
  879. Epoch [49/80], Iter [800/500] Loss: 0.3053
  880.  
  881. Epoch [49/80], Iter [900/500] Loss: 0.3343
  882.  
  883. Epoch [50/80], Iter [100/500] Loss: 0.2081
  884.  
  885. Epoch [50/80], Iter [200/500] Loss: 0.5631
  886.  
  887. Epoch [50/80], Iter [300/500] Loss: 0.4358
  888.  
  889. Epoch [50/80], Iter [400/500] Loss: 0.4028
  890.  
  891. Epoch [50/80], Iter [500/500] Loss: 0.2510
  892.  
  893. Epoch [50/80], Iter [600/500] Loss: 0.5876
  894.  
  895. Epoch [50/80], Iter [700/500] Loss: 0.3692
  896.  
  897. Epoch [50/80], Iter [800/500] Loss: 0.4500
  898.  
  899. Epoch [50/80], Iter [900/500] Loss: 0.1850
  900.  
  901. ('time used:', 30318.149681000003)

Pytorch使用多GPU的更多相关文章

  1. anaconda+pytorch安装(无GPU版本)

    anaconda+pytorch安装(无GPU版本) 待办 https://blog.csdn.net/nnUyi/article/details/78471326

  2. [源码解析] PyTorch 如何使用GPU

    [源码解析] PyTorch 如何使用GPU 目录 [源码解析] PyTorch 如何使用GPU 0x00 摘要 0x01 问题 0x02 移动模型到GPU 2.1 cuda 操作 2.2 Modul ...

  3. pytorch中查看gpu信息

    其他:windows使用nvidia-smi查看gpu信息 为什么将数据转移至GPU的方法叫做.cuda而不是.gpu,就像将数据转移至CPU调用的方法是.cpu?这是因为GPU的编程接口采用CUDA ...

  4. Pytorch中多GPU训练指北

    前言 在数据越来越多的时代,随着模型规模参数的增多,以及数据量的不断提升,使用多GPU去训练是不可避免的事情.Pytorch在0.4.0及以后的版本中已经提供了多GPU训练的方式,本文简单讲解下使用P ...

  5. 使用Pytorch在多GPU下保存和加载训练模型参数遇到的问题

    最近使用Pytorch在学习一个深度学习项目,在模型保存和加载过程中遇到了问题,最终通过在网卡查找资料得已解决,故以此记之,以备忘却. 首先,是在使用多GPU进行模型训练的过程中,在保存模型参数时,应 ...

  6. 从头学pytorch(十三):使用GPU做计算

    GPU计算 默认情况下,pytorch将数据保存在内存,而不是显存. 查看显卡信息 nvidia-smi 我的机器输出如下: Fri Jan 3 16:20:51 2020 +------------ ...

  7. pytorch设置多GPU运行的方法

    1.DataParallel layers (multi-GPU, distributed) 1)DataParallel CLASS torch.nn.DataParallel(module, de ...

  8. Pytorch:使用GPU训练

    1.模型转为cuda gpus = [0] #使用哪几个GPU进行训练,这里选择0号GPU cuda_gpu = torch.cuda.is_available() #判断GPU是否存在可用 net ...

  9. Pytorch多GPU并行处理

    可以参数2017coco detection 旷视冠军MegDet: MegDet 与 Synchronized BatchNorm PyTorch-Encoding官方文档对CGBN(cross g ...

随机推荐

  1. PHP闭包函数

    # 提到闭包就不得不想起匿名函数,也叫闭包函数(closures),貌似PHP闭包实现主要就是靠它.声明一个匿名函数是这样: $func = function() { }; //带结束符 # 可以看到 ...

  2. 微信小程序: rpx与px,rem相互转换

    官方上规定屏幕宽度为20rem,规定屏幕宽为750rpx,则1rem=750/20rpx. 微信官方建议视觉稿以iPhone 6为标准:在 iPhone6 上,屏幕宽度为375px,共有750个物理像 ...

  3. MySql查询最近一个月,一周,一天

    最近一个月 SELECT * FROM table WHERE DATE_SUB(CURDATE(), INTERVAL 1 MONTH) <= date(time); 本月.当前月 SELEC ...

  4. lodash 学习资料

    lodash.js 是什么不多说,工作时间长了就基本绕不过去他,工作项目中也很好的弥补angular ,jquery 的不足,由中文bootstrap 退出的中文版学习资料 http://lodash ...

  5. unity中让摄像机移动到鼠标点击的位置和鼠标控制平移视角

    private Vector3 targetVector3; private float movespeed=0.5f; private bool IsOver = true; private Gam ...

  6. Netty完成网络通信(二)

    Netty是基于NIO的框架,完善了NIO的一些缺陷,因此可以用Netty替代NIO Netty实现通信步骤: 1.创建两个NIO线程组,一个专门用于网络事件处理(接受客户端的连接),另一个则进行网络 ...

  7. JavaScript简介及作用

    JavaScript是一门脚本语言,是可以插入HTML页面的编程代码,插入HTML以后可以由所有现代浏览器运行 一.写如html输出 <body> <script> docum ...

  8. weblogic连接池过小导致TPS呈周期性跳坑现象

    利用晚上时间跑个12小时稳定性,第二天发现TPS曲线图成了这个样子. 排查步骤: 1.观察TPS图发现,几乎每两个小时TPS掉一次坑,是周期性的,而且TPS有掉到0的现象.LR上也有失败的交易,猜想是 ...

  9. 查看电脑的IP地址及配置

    自己主机的IP地址查看cmd----ipconfig/all,如下图

  10. Office 365 - For security reasons DTD is prohibited in this XML document

    博客地址:http://blog.csdn.net/FoxDave 今天在测试东西的时候发现在本机运行CSOM代码或使用Office 365 PowerShell时,出现了如下错误: Connec ...