pytorch --Rnn语言模型(LSTM，BiLSTM) -- 《Recurrent neural network based language model》

论文通过实现RNN来完成了文本分类。

论文地址：88888888

模型结构图：

原理自行参考论文，code and comment（https://github.com/graykode/nlp-tutorial）:

 # -*- coding: utf-8 -*-

 # @time : 2019/11/9  15:12

 import numpy as np

 import torch

 import torch.nn as nn

 import torch.optim as optim

 from torch.autograd import Variable

 dtype = torch.FloatTensor

 sentences = [ "i like dog", "i love coffee", "i hate milk"]

 word_list = " ".join(sentences).split()

 word_list = list(set(word_list))

 word_dict = {w: i for i, w in enumerate(word_list)}

 number_dict = {i: w for i, w in enumerate(word_list)}

 n_class = len(word_dict)

 # TextRNN Parameter

 batch_size = len(sentences)

 n_step = 2 # number of cells(= number of Step)

 n_hidden = 5 # number of hidden units in one cell

 def make_batch(sentences):

     input_batch = []

     target_batch = []

     for sen in sentences:

         word = sen.split()

         input = [word_dict[n] for n in word[:-1]]

         target = word_dict[word[-1]]

         input_batch.append(np.eye(n_class)[input])

         target_batch.append(target)

     return input_batch, target_batch

 # to Torch.Tensor

 input_batch, target_batch = make_batch(sentences)

 input_batch = Variable(torch.Tensor(input_batch))

 target_batch = Variable(torch.LongTensor(target_batch))

 class TextRNN(nn.Module):

     def __init__(self):

         super(TextRNN, self).__init__()

         self.rnn = nn.RNN(input_size=n_class, hidden_size=n_hidden,batch_first=True)

         self.W = nn.Parameter(torch.randn([n_hidden, n_class]).type(dtype))

         self.b = nn.Parameter(torch.randn([n_class]).type(dtype))

     def forward(self, hidden, X):

         if self.rnn.batch_first == True:

             # X [batch_size,time_step,word_vector]

             outputs, hidden = self.rnn(X, hidden)

             # outputs [batch_size, time_step, hidden_size*num_directions]

             output = outputs[:, -1, :]  # [batch_size, num_directions(=1) * n_hidden]

             model = torch.mm(output, self.W) + self.b  # model : [batch_size, n_class]

             return model

         else:

             X = X.transpose(0, 1) # X : [n_step, batch_size, n_class]

             outputs, hidden = self.rnn(X, hidden)

             # outputs : [n_step, batch_size, num_directions(=1) * n_hidden]

             # hidden : [num_layers(=1) * num_directions(=1), batch_size, n_hidden]

             output = outputs[-1,:,:] # [batch_size, num_directions(=1) * n_hidden]

             model = torch.mm(output, self.W) + self.b # model : [batch_size, n_class]

             return model

 model = TextRNN()

 criterion = nn.CrossEntropyLoss()

 optimizer = optim.Adam(model.parameters(), lr=0.001)

 # Training

 for epoch in range(5000):

     optimizer.zero_grad()

     # hidden : [num_layers * num_directions, batch, hidden_size]

     hidden = Variable(torch.zeros(1, batch_size, n_hidden))

     # input_batch : [batch_size, n_step, n_class]

     output = model(hidden, input_batch)

     # output : [batch_size, n_class], target_batch : [batch_size] (LongTensor, not one-hot)

     loss = criterion(output, target_batch)

     if (epoch + 1) % 1000 == 0:

         print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))

     loss.backward()

     optimizer.step()

 # Predict

 hidden_initial = Variable(torch.zeros(1, batch_size, n_hidden))

 predict = model(hidden_initial, input_batch).data.max(1, keepdim=True)[1]

 print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()])

LSTM unit的RNN模型：

 import numpy as np

 import torch

 import torch.nn as nn

 import torch.optim as optim

 from torch.autograd import Variable

 dtype = torch.FloatTensor

 char_arr = [c for c in 'abcdefghijklmnopqrstuvwxyz']

 word_dict = {n: i for i, n in enumerate(char_arr)}

 number_dict = {i: w for i, w in enumerate(char_arr)}

 n_class = len(word_dict)  # number of class(=number of vocab)

 seq_data = ['make', 'need', 'coal', 'word', 'love', 'hate', 'live', 'home', 'hash', 'star']

 # TextLSTM Parameters

 n_step = 3

 n_hidden = 128

 def make_batch(seq_data):

     input_batch, target_batch = [], []

     for seq in seq_data:

         input = [word_dict[n] for n in seq[:-1]]  # 'm', 'a' , 'k' is input

         target = word_dict[seq[-1]]  # 'e' is target

         input_batch.append(np.eye(n_class)[input])

         target_batch.append(target)

     return Variable(torch.Tensor(input_batch)), Variable(torch.LongTensor(target_batch))

 class TextLSTM(nn.Module):

     def __init__(self):

         super(TextLSTM, self).__init__()

         self.lstm = nn.LSTM(input_size=n_class, hidden_size=n_hidden)

         self.W = nn.Parameter(torch.randn([n_hidden, n_class]).type(dtype))

         self.b = nn.Parameter(torch.randn([n_class]).type(dtype))

     def forward(self, X):

         input = X.transpose(0, 1)  # X : [n_step, batch_size, n_class]

         hidden_state = Variable(

             torch.zeros(1, len(X), n_hidden))  # [num_layers(=1) * num_directions(=1), batch_size, n_hidden]

         cell_state = Variable(

             torch.zeros(1, len(X), n_hidden))  # [num_layers(=1) * num_directions(=1), batch_size, n_hidden]

         outputs, (_, _) = self.lstm(input, (hidden_state, cell_state))

         outputs = outputs[-1]  # [batch_size, n_hidden]

         model = torch.mm(outputs, self.W) + self.b  # model : [batch_size, n_class]

         return model

 input_batch, target_batch = make_batch(seq_data)

 model = TextLSTM()

 criterion = nn.CrossEntropyLoss()

 optimizer = optim.Adam(model.parameters(), lr=0.001)

 # Training

 for epoch in range(1000):

     output = model(input_batch)

     loss = criterion(output, target_batch)

     if (epoch + 1) % 100 == 0:

         print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))

     optimizer.zero_grad()

     loss.backward()

     optimizer.step()

 inputs = [sen[:3] for sen in seq_data]

 predict = model(input_batch).data.max(1, keepdim=True)[1]

 print(inputs, '->', [number_dict[n.item()] for n in predict.squeeze()])

BiLSTM RNN model:

 import numpy as np

 import torch

 import torch.nn as nn

 import torch.optim as optim

 from torch.autograd import Variable

 import torch.nn.functional as F

 dtype = torch.FloatTensor

 sentence = (

     'Lorem ipsum dolor sit amet consectetur adipisicing elit '

     'sed do eiusmod tempor incididunt ut labore et dolore magna '

     'aliqua Ut enim ad minim veniam quis nostrud exercitation'

 )

 word_dict = {w: i for i, w in enumerate(list(set(sentence.split())))}

 number_dict = {i: w for i, w in enumerate(list(set(sentence.split())))}

 n_class = len(word_dict)

 max_len = len(sentence.split())

 n_hidden = 5

 def make_batch(sentence):

     input_batch = []

     target_batch = []

     words = sentence.split()

     for i, word in enumerate(words[:-1]):

         input = [word_dict[n] for n in words[:(i + 1)]]

         input = input + [0] * (max_len - len(input))

         target = word_dict[words[i + 1]]

         input_batch.append(np.eye(n_class)[input])

         target_batch.append(target)

     return Variable(torch.Tensor(input_batch)), Variable(torch.LongTensor(target_batch))

 class BiLSTM(nn.Module):

     def __init__(self):

         super(BiLSTM, self).__init__()

         self.lstm = nn.LSTM(input_size=n_class, hidden_size=n_hidden, bidirectional=True)

         self.W = nn.Parameter(torch.randn([n_hidden * 2, n_class]).type(dtype))

         self.b = nn.Parameter(torch.randn([n_class]).type(dtype))

     def forward(self, X):

         input = X.transpose(0, 1)  # input : [n_step, batch_size, n_class]

         hidden_state = Variable(torch.zeros(1*2, len(X), n_hidden))   # [num_layers(=1) * num_directions(=1), batch_size, n_hidden]

         cell_state = Variable(torch.zeros(1*2, len(X), n_hidden))     # [num_layers(=1) * num_directions(=1), batch_size, n_hidden]

         outputs, (_, _) = self.lstm(input, (hidden_state, cell_state))

         outputs = outputs[-1]  # [batch_size, n_hidden]

         model = torch.mm(outputs, self.W) + self.b  # model : [batch_size, n_class]

         return model

 input_batch, target_batch = make_batch(sentence)

 model = BiLSTM()

 criterion = nn.CrossEntropyLoss()

 optimizer = optim.Adam(model.parameters(), lr=0.001)

 # Training

 for epoch in range(10000):

     output = model(input_batch)

     loss = criterion(output, target_batch)

     if (epoch + 1) % 1000 == 0:

         print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))

     optimizer.zero_grad()

     loss.backward()

     optimizer.step()

 predict = model(input_batch).data.max(1, keepdim=True)[1]

 print(sentence)

 print([number_dict[n.item()] for n in predict.squeeze()])

pytorch --Rnn语言模型(LSTM，BiLSTM) -- 《Recurrent neural network based language model》的更多相关文章

4.5 RNN循环神经网络（recurrent neural network）
自己开发了一个股票智能分析软件,功能很强大,需要的点击下面的链接获取: https://www.cnblogs.com/bclshuai/p/11380657.html 1.1 RNN循环神经网络 ...
论文笔记：ReNet: A Recurrent Neural Network Based Alternative to Convolutional Networks
ReNet: A Recurrent Neural Network Based Alternative to Convolutional Networks2018-03-05 11:13:05 ...
【NLP】Recurrent Neural Network and Language Models
0. Overview What is language models? A time series prediction problem. It assigns a probility to a s ...
RNN循环神经网络（Recurrent Neural Network）学习
一.RNN简介 1.)什么是RNN? RNN是一种特殊的神经网络结构,考虑前一时刻的输入,且赋予了网络对前面的内容的一种'记忆'功能. 2.)RNN可以解决什么问题? 时间先后顺序的问题都可以使用RN ...
Recurrent Neural Network系列1--RNN（循环神经网络）概述
作者:zhbzz2007 出处:http://www.cnblogs.com/zhbzz2007 欢迎转载,也请保留这段声明.谢谢! 本文翻译自 RECURRENT NEURAL NETWORKS T ...
(zhuan) Recurrent Neural Network
Recurrent Neural Network 2016年07月01日 Deep learning Deep learning 字数:24235 this blog from: http:/ ...
Recurrent neural network (RNN) - Pytorch版
import torch import torch.nn as nn import torchvision import torchvision.transforms as transforms # ...
Recurrent Neural Network系列2--利用Python，Theano实现RNN
作者:zhbzz2007 出处:http://www.cnblogs.com/zhbzz2007 欢迎转载,也请保留这段声明.谢谢! 本文翻译自 RECURRENT NEURAL NETWORKS T ...
Recurrent Neural Network系列4--利用Python，Theano实现GRU或LSTM
yi作者:zhbzz2007 出处:http://www.cnblogs.com/zhbzz2007 欢迎转载,也请保留这段声明.谢谢! 本文翻译自 RECURRENT NEURAL NETWORK ...

随机推荐

Numpy的介绍与基本使用方法
1.什么是Numpy numpy官方文档:https://docs.scipy.org/doc/numpy/reference/?v=20190307135750 NumPy是一个功能强大的Pytho ...
单词匹配 - hash
哈利波特在魔法学校的必修课之一就是学习魔咒.据说魔法世界有100000种不同的魔咒,哈利很难全部记住,但是为了对抗强敌,他必须在危急时刻能够调用任何一个需要的魔咒,所以他需要你的帮助. 给你一部魔咒词 ...
http GET 和 POST 请求的优缺点和误区 --前端优化
Get和Post在面试中一般都会问到,一般的区别:(1)post更安全(不会作为url的一部分,不会被缓存.保存在服务器日志.以及浏览器浏览记录中)(2)post发送的数据更大(get有url长度限制 ...
JS对JSON的使用【转】
JSON(JavaScript Object Notation)是一种轻量级的数据交换格式,采用完全独立于语言的文本格式,是理想的数据交换格式.同时,JSON是 JavaScript 原生格式,这意味 ...
算法笔记codeup-Contest100000568
A #include <stdio.h> int main() { ; ; while(a) { sum=sum+a; a--; } printf("%d",sum); ...
内置3D对象-Unity3D游戏开发培训
内置3D对象-Unity3D游戏开发培训作者:Jesai 2018-02-12 19:21:58 五大面板: -Hierachy:当前场景中的物体图 1-1 -Project:项目中的所有资源图 ...
java 三元运算
一.格式: 数据类型变量名称 = 条件判断 ? 表达式a : 表达式b; 二.注意: 1.不是打印操作时,需要三元运算的右则 2.表达式a和表达式b的值,必须要和变量名称的数据类型相等貌似和C ...
NSCTF-Reverse02 超级详细且简单的办法搞定
没有壳 VC写的观察界面一个编辑框一个按钮拖进IDA 在导入表里找到GetDlgItemTextA 为什么找这个函数因为这个函数的作用就是获取我们输入编辑框的内容双击进入 ctrl+X ...
export 和 export default 的区别
export命令用于规定模块的对外接口. 一个模块就是一个独立的文件.该文件内部的所有变量,外部无法获取.如果你希望外部能够读取模块内部的某个变量,就必须使用export关键字输出该变量.下面是一个 ...
Qt下Eigen矩阵函数库的添加
第1步: 下载一个Eigen文件包,在官网下即可: http://eigen.tuxfamily.org/index.php?title=Main_Page 第2步: 用Qt随便建一个GUI工程,在. ...

pytorch --Rnn语言模型(LSTM，BiLSTM) -- 《Recurrent neural network based language model》

pytorch --Rnn语言模型(LSTM，BiLSTM) -- 《Recurrent neural network based language model》的更多相关文章

随机推荐

热门专题