import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms # Device configuration
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') # Hyper parameters
num_epochs = 5
num_classes = 10
batch_size = 100
learning_rate = 0.001 # MNIST dataset
train_dataset = torchvision.datasets.MNIST(root='../../data/',
download=True) test_dataset = torchvision.datasets.MNIST(root='../../data/',
transform=transforms.ToTensor()) # Data loader
train_loader =,
shuffle=True) test_loader =,
shuffle=False) # Convolutional neural network (two convolutional layers)
class ConvNet(nn.Module):
def __init__(self, input_channel, num_classes):
super(ConvNet, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(input_channel, 16, kernel_size=5, stride=1, padding=2),
nn.MaxPool2d(kernel_size=2, stride=2)
) # 28*28*1 -> 14*14*16
self.layer2 = nn.Sequential(
nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2),
nn.MaxPool2d(kernel_size=2, stride=2)
) # 14*14*16 -> 7*7*32
self.fc = nn.Linear(7*7*32, num_classes) def forward(self, input):
out = self.layer1(input)
out = self.layer2(out)
out = out.reshape(out.size(0), -1)
out = self.fc(out)
return out model = ConvNet(1, num_classes).to(device) # Construct Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
images =
labels = # Forward pass
outputs = model(images)
loss = criterion(outputs, labels) # Backward and optimize
optimizer.step() if (i+1) % 100 == 0:
print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
.format(epoch+1, num_epochs, i+1, total_step, loss.item())) # Test the model
model.eval() # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
with torch.no_grad():
correct = 0
total = 0
for images, labels in test_loader:
images =
labels =
outputs = model(images)
_, predicted = torch.max(, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item() print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total)) # Save the model checkpoint, 'model.ckpt')
# ---------------------------------------------------------------------------- #
# An implementation of #
# See section 4.2 for the model architecture on CIFAR-10 #
# Some part of the code was referenced from below #
# #
# ---------------------------------------------------------------------------- # import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms # Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Hyper-parameters
num_epochs = 80
learning_rate = 0.001 # Image preprocessing modules
transform = transforms.Compose([
transforms.ToTensor()]) # CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='../../data/',
download=True) test_dataset = torchvision.datasets.CIFAR10(root='../../data/',
transform=transforms.ToTensor()) # Data loader
train_loader =,
shuffle=True) test_loader =,
shuffle=False) # 3x3 convolution
def conv3x3(in_channels, out_channels, stride=1):
return nn.Conv2d(in_channels, out_channels, kernel_size=3,
stride=stride, padding=1, bias=False) # Residual block
class ResidualBlock(nn.Module):
def __init__(self, in_channels, out_channels, stride=1, downsample=None):
super(ResidualBlock, self).__init__()
self.conv1 = conv3x3(in_channels, out_channels, stride)
self.bn1 = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(out_channels, out_channels)
self.bn2 = nn.BatchNorm2d(out_channels)
self.downsample = downsample def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out # ResNet
class ResNet(nn.Module):
def __init__(self, block, layers, num_classes=10):
super(ResNet, self).__init__()
self.in_channels = 16
self.conv = conv3x3(3, 16) = nn.BatchNorm2d(16)
self.relu = nn.ReLU(inplace=True)
self.layer1 = self.make_layer(block, 16, layers[0])
self.layer2 = self.make_layer(block, 32, layers[0], 2)
self.layer3 = self.make_layer(block, 64, layers[1], 2)
self.avg_pool = nn.AvgPool2d(8)
self.fc = nn.Linear(64, num_classes) def make_layer(self, block, out_channels, blocks, stride=1):
downsample = None
if (stride != 1) or (self.in_channels != out_channels):
downsample = nn.Sequential(
conv3x3(self.in_channels, out_channels, stride=stride),
layers = []
layers.append(block(self.in_channels, out_channels, stride, downsample))
self.in_channels = out_channels
for i in range(1, blocks):
layers.append(block(out_channels, out_channels))
return nn.Sequential(*layers) def forward(self, x):
out = self.conv(x)
out =
out = self.relu(out)
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.avg_pool(out)
out = out.view(out.size(0), -1)
out = self.fc(out)
return out model = ResNet(ResidualBlock, [2, 2, 2, 2]).to(device) # Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # For updating learning rate
def update_lr(optimizer, lr):
for param_group in optimizer.param_groups:
param_group['lr'] = lr # Train the model
total_step = len(train_loader)
curr_lr = learning_rate
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
images =
labels = # Forward pass
outputs = model(images)
loss = criterion(outputs, labels) # Backward and optimize
optimizer.step() if (i+1) % 100 == 0:
print ("Epoch [{}/{}], Step [{}/{}] Loss: {:.4f}"
.format(epoch+1, num_epochs, i+1, total_step, loss.item())) # Decay learning rate
if (epoch+1) % 20 == 0:
curr_lr /= 3
update_lr(optimizer, curr_lr) # Test the model
with torch.no_grad():
correct = 0
total = 0
for images, labels in test_loader:
images =
labels =
outputs = model(images)
_, predicted = torch.max(, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item() print('Accuracy of the model on the test images: {} %'.format(100 * correct / total)) # Save the model checkpoint, 'resnet.ckpt')
# from torchsummary import summary
# summary(model, (3, 32, 32)) #
# from tensorboardX import SummaryWriter
# dummy_input = torch.rand(1, 3, 32, 32).to(device)
# with SummaryWriter(comment='residual') as w:
# w.add_graph(model, (dummy_input, ))
Layer (type) Output Shape Param #
Conv2d-1 [-1, 16, 32, 32] 432
BatchNorm2d-2 [-1, 16, 32, 32] 32
ReLU-3 [-1, 16, 32, 32] 0
Conv2d-4 [-1, 16, 32, 32] 2,304
BatchNorm2d-5 [-1, 16, 32, 32] 32
ReLU-6 [-1, 16, 32, 32] 0
Conv2d-7 [-1, 16, 32, 32] 2,304
BatchNorm2d-8 [-1, 16, 32, 32] 32
ReLU-9 [-1, 16, 32, 32] 0
ResidualBlock-10 [-1, 16, 32, 32] 0
Conv2d-11 [-1, 16, 32, 32] 2,304
BatchNorm2d-12 [-1, 16, 32, 32] 32
ReLU-13 [-1, 16, 32, 32] 0
Conv2d-14 [-1, 16, 32, 32] 2,304
BatchNorm2d-15 [-1, 16, 32, 32] 32
ReLU-16 [-1, 16, 32, 32] 0
ResidualBlock-17 [-1, 16, 32, 32] 0
Conv2d-18 [-1, 32, 16, 16] 4,608
BatchNorm2d-19 [-1, 32, 16, 16] 64
ReLU-20 [-1, 32, 16, 16] 0
Conv2d-21 [-1, 32, 16, 16] 9,216
BatchNorm2d-22 [-1, 32, 16, 16] 64
Conv2d-23 [-1, 32, 16, 16] 4,608
BatchNorm2d-24 [-1, 32, 16, 16] 64
ReLU-25 [-1, 32, 16, 16] 0
ResidualBlock-26 [-1, 32, 16, 16] 0
Conv2d-27 [-1, 32, 16, 16] 9,216
BatchNorm2d-28 [-1, 32, 16, 16] 64
ReLU-29 [-1, 32, 16, 16] 0
Conv2d-30 [-1, 32, 16, 16] 9,216
BatchNorm2d-31 [-1, 32, 16, 16] 64
ReLU-32 [-1, 32, 16, 16] 0
ResidualBlock-33 [-1, 32, 16, 16] 0
Conv2d-34 [-1, 64, 8, 8] 18,432
BatchNorm2d-35 [-1, 64, 8, 8] 128
ReLU-36 [-1, 64, 8, 8] 0
Conv2d-37 [-1, 64, 8, 8] 36,864
BatchNorm2d-38 [-1, 64, 8, 8] 128
Conv2d-39 [-1, 64, 8, 8] 18,432
BatchNorm2d-40 [-1, 64, 8, 8] 128
ReLU-41 [-1, 64, 8, 8] 0
ResidualBlock-42 [-1, 64, 8, 8] 0
Conv2d-43 [-1, 64, 8, 8] 36,864
BatchNorm2d-44 [-1, 64, 8, 8] 128
ReLU-45 [-1, 64, 8, 8] 0
Conv2d-46 [-1, 64, 8, 8] 36,864
BatchNorm2d-47 [-1, 64, 8, 8] 128
ReLU-48 [-1, 64, 8, 8] 0
ResidualBlock-49 [-1, 64, 8, 8] 0
AvgPool2d-50 [-1, 64, 1, 1] 0
Linear-51 [-1, 10] 650
Total params: 195,738
Trainable params: 195,738
Non-trainable params: 0
Input size (MB): 0.01
Forward/backward pass size (MB): 3.63
Params size (MB): 0.75
Estimated Total Size (MB): 4.38
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms # Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Hyper-parameters
sequence_length = 28
input_size = 28
hidden_size = 128
num_layers = 2
num_classes = 10
batch_size = 100
num_epochs = 2
learning_rate = 0.01 # MNIST dataset
train_dataset = torchvision.datasets.MNIST(root='../../data/',
download=True) test_dataset = torchvision.datasets.MNIST(root='../../data/',
transform=transforms.ToTensor()) # Data loader
train_loader =,
shuffle=True) test_loader =,
shuffle=False) # Recurrent neural network (many-to-one)
class RNN(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, num_classes):
super(RNN, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
self.fc = nn.Linear(hidden_size, num_classes) def forward(self, x):
# Set initial hidden and cell states
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) # Forward propagate LSTM
out, _ = self.lstm(x, (h0, c0)) # out: tensor of shape (batch_size, seq_length, hidden_size) # Decode the hidden state of the last time step
out = self.fc(out[:, -1, :])
return out model = RNN(input_size, hidden_size, num_layers, num_classes).to(device) # Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
images = images.reshape(-1, sequence_length, input_size).to(device)
labels = # Forward pass
outputs = model(images)
loss = criterion(outputs, labels) # Backward and optimize
optimizer.step() if (i+1) % 100 == 0:
print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
.format(epoch+1, num_epochs, i+1, total_step, loss.item())) # Test the model
with torch.no_grad():
correct = 0
total = 0
for images, labels in test_loader:
images = images.reshape(-1, sequence_length, input_size).to(device)
labels =
outputs = model(images)
_, predicted = torch.max(, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item() print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total)) # Save the model checkpoint, 'model.ckpt')
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms # Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Hyper-parameters
sequence_length = 28
input_size = 28
hidden_size = 128
num_layers = 2
num_classes = 10
batch_size = 100
num_epochs = 2
learning_rate = 0.003 # MNIST dataset
train_dataset = torchvision.datasets.MNIST(root='../../data/',
download=True) test_dataset = torchvision.datasets.MNIST(root='../../data/',
transform=transforms.ToTensor()) # Data loader
train_loader =,
shuffle=True) test_loader =,
shuffle=False) # Bidirectional recurrent neural network (many-to-one)
class BiRNN(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, num_classes):
super(BiRNN, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
self.fc = nn.Linear(hidden_size*2, num_classes) # 2 for bidirection def forward(self, x):
# Set initial states
h0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).to(device) # 2 for bidirection
c0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).to(device) # Forward propagate LSTM
out, _ = self.lstm(x, (h0, c0)) # out: tensor of shape (batch_size, seq_length, hidden_size*2) # Decode the hidden state of the last time step
out = self.fc(out[:, -1, :])
return out model = BiRNN(input_size, hidden_size, num_layers, num_classes).to(device) # Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
images = images.reshape(-1, sequence_length, input_size).to(device)
labels = # Forward pass
outputs = model(images)
loss = criterion(outputs, labels) # Backward and optimize
optimizer.step() if (i+1) % 100 == 0:
print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
.format(epoch+1, num_epochs, i+1, total_step, loss.item())) # Test the model
with torch.no_grad():
correct = 0
total = 0
for images, labels in test_loader:
images = images.reshape(-1, sequence_length, input_size).to(device)
labels =
outputs = model(images)
_, predicted = torch.max(, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item() print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total)) # Save the model checkpoint, 'model.ckpt')
# Some part of the code was referenced from below.
import torch
import torch.nn as nn
import numpy as np
from torch.nn.utils import clip_grad_norm
from data_utils import Dictionary, Corpus # Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Hyper-parameters
embed_size = 128
hidden_size = 1024
num_layers = 1
num_epochs = 5
num_samples = 1000 # number of words to be sampled
batch_size = 20
seq_length = 30
learning_rate = 0.002 # Load "Penn Treebank" dataset
corpus = Corpus()
ids = corpus.get_data('data/train.txt', batch_size)
vocab_size = len(corpus.dictionary)
num_batches = ids.size(1) // seq_length # RNN based language model
class RNNLM(nn.Module):
def __init__(self, vocab_size, embed_size, hidden_size, num_layers):
super(RNNLM, self).__init__()
self.embed = nn.Embedding(vocab_size, embed_size)
self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True)
self.linear = nn.Linear(hidden_size, vocab_size) def forward(self, x, h):
# Embed word ids to vectors
x = self.embed(x) # Forward propagate LSTM
out, (h, c) = self.lstm(x, h) # Reshape output to (batch_size*sequence_length, hidden_size)
out = out.reshape(out.size(0)*out.size(1), out.size(2)) # Decode hidden states of all time steps
out = self.linear(out)
return out, (h, c) model = RNNLM(vocab_size, embed_size, hidden_size, num_layers).to(device) # Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # Truncated backpropagation
def detach(states):
return [state.detach() for state in states] # Train the model
for epoch in range(num_epochs):
# Set initial hidden and cell states
states = (torch.zeros(num_layers, batch_size, hidden_size).to(device),
torch.zeros(num_layers, batch_size, hidden_size).to(device)) for i in range(0, ids.size(1) - seq_length, seq_length):
# Get mini-batch inputs and targets
inputs = ids[:, i:i+seq_length].to(device)
targets = ids[:, (i+1):(i+1)+seq_length].to(device) # Forward pass
states = detach(states)
outputs, states = model(inputs, states)
loss = criterion(outputs, targets.reshape(-1)) # Backward and optimize
clip_grad_norm(model.parameters(), 0.5)
optimizer.step() step = (i+1) // seq_length
if step % 100 == 0:
print ('Epoch [{}/{}], Step[{}/{}], Loss: {:.4f}, Perplexity: {:5.2f}'
.format(epoch+1, num_epochs, step, num_batches, loss.item(), np.exp(loss.item()))) # Test the model
with torch.no_grad():
with open('sample.txt', 'w') as f:
# Set intial hidden ane cell states
state = (torch.zeros(num_layers, 1, hidden_size).to(device),
torch.zeros(num_layers, 1, hidden_size).to(device)) # Select one word id randomly
prob = torch.ones(vocab_size)
input = torch.multinomial(prob, num_samples=1).unsqueeze(1).to(device) for i in range(num_samples):
# Forward propagate RNN
output, state = model(input, state) # Sample a word id
prob = output.exp()
word_id = torch.multinomial(prob, num_samples=1).item() # Fill input with sampled word id for the next time step
input.fill_(word_id) # File write
word = corpus.dictionary.idx2word[word_id]
word = '\n' if word == '<eos>' else word + ' '
f.write(word) if (i+1) % 100 == 0:
print('Sampled [{}/{}] words and save to {}'.format(i+1, num_samples, 'sample.txt')) # Save the model checkpoints, 'model.ckpt')
import torch
import os class Dictionary(object):
def __init__(self):
self.word2idx = {}
self.idx2word = {}
self.idx = 0 def add_word(self, word):
if not word in self.word2idx:
self.word2idx[word] = self.idx
self.idx2word[self.idx] = word
self.idx += 1 def __len__(self):
return len(self.word2idx) class Corpus(object):
def __init__(self):
self.dictionary = Dictionary() def get_data(self, path, batch_size=20):
# Add words to the dictionary
with open(path, 'r') as f:
tokens = 0
for line in f:
words = line.split() + ['<eos>']
tokens += len(words)
for word in words:
self.dictionary.add_word(word) # Tokenize the file content
ids = torch.LongTensor(tokens)
token = 0
with open(path, 'r') as f:
for line in f:
words = line.split() + ['<eos>']
for word in words:
ids[token] = self.dictionary.word2idx[word]
token += 1
num_batches = ids.size(0) // batch_size
ids = ids[:num_batches*batch_size]
return ids.view(batch_size, -1)


