深度学习之 seq2seq 进行英文到法文的翻译

import os

import torch

import random

source_path = "data/small_vocab_en"

target_path = "data/small_vocab_fr"

MAX_LENGTH = 100

SOS_token = 0

EOS_token = 1

def load_data(path):

    input_file = os.path.join(path)

    with open(input_file, 'r', encoding='utf-8') as f:

        data = f.read()

    return data

source_text = load_data(source_path)

target_text = load_data(target_path)

class Dictionary(object):

    def __init__(self):

        self.word2idx = {'<SOS>': 0, '<EOS>': 1}

        self.idx2word = {0: '<SOS>', 1: '<EOS>'}

        self.count = 2

    def add_word(self, word):

        if word not in self.word2idx:

            self.idx2word[self.count - 1] = word

            self.word2idx[word] = len(self.idx2word) - 1

            self.count += 1

        return self.word2idx[word]

    def __len__(self):

        return len(self.idx2word)

class Lang(object):

    def __init__(self, name):

        self.name = name

        self.dictionary = Dictionary()

    def addSentence(self, sentence):

        return [self.addWord(w) for w in sentence.split()]

    def addWord(self, word):

        return self.dictionary.add_word(word)

    def __len__(self):

        return len(self.dictionary)

def readLangs(source_name, source_lang_text, target_name, target_lang_text):

    source_lang = Lang(source_name)

    source_data = [source_lang.addSentence(s) for s in source_lang_text.lower().split('\n')]

    target_lang = Lang(target_name)

    target_sentences = [ s + ' <EOS>' for s in target_lang_text.lower().split('\n')]

    target_data = [target_lang.addSentence(s) for s in target_sentences]

    pairs = list(zip(source_data, target_data))

    return source_lang, target_lang, pairs

source_lang, target_lang, pairs_data = readLangs('en', source_text, 'fe', target_text)

import torch.nn as nn

from torch.autograd import Variable

from torch import optim

import torch.nn.functional as F

class EncoderRNN(nn.Module):

    def __init__(self, input_size, hidden_size):

        super(EncoderRNN, self).__init__()

        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)

        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input, hidden):

        embedded = self.embedding(input).view(1, 1, -1)

        output = embedded

        output, hidden = self.gru(output, hidden)

        return output, hidden

    def initHidden(self):

        result = Variable(torch.zeros(1, 1, self.hidden_size))

        return result

class DecoderRNN(nn.Module):

    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):

        super(DecoderRNN, self).__init__()

        self.hidden_size = hidden_size

        self.output_size = output_size

        self.dropout_p = dropout_p

        self.max_length = max_length

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)

        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)

        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)

        self.dropout = nn.Dropout(self.dropout_p)

        self.gru = nn.GRU(self.hidden_size, self.hidden_size)

        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, encoder_outputs):

        embedded = self.embedding(input).view(1, 1, -1)

        embedded = self.dropout(embedded)

        attn_weights = F.softmax(self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)

        attn_applied = torch.bmm(attn_weights.unsqueeze(0), encoder_outputs.unsqueeze(0))

        output = torch.cat((embedded[0], attn_applied[0]), 1)

        output = self.attn_combine(output).unsqueeze(0)

        output = F.relu(output)

        output, hidden = self.gru(output, hidden)

        output = F.log_softmax(self.out(output[0]), dim=1)

        return output, hidden, attn_weights

    def initHidden(self):

        result = Variable(torch.zeros(1, 1, self.hidden_size))

        return result

epochs = 10

print_every = 2

hidden_size = 256

teacher_forcing_ratio = 0.5

encoder_model = EncoderRNN(len(source_lang), hidden_size)

att_decoder_model = DecoderRNN(hidden_size, len(target_lang), dropout_p=0.1)

def variablesFromIds(ids):

    return Variable(torch.LongTensor(ids).view(-1, 1))

def variablesFromPair(pair):

    input_var = variablesFromIds(pair[0])

    output_var = variablesFromIds(pair[1])

    return (input_var, output_var)

def train(input, target, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):

    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()

    decoder_optimizer.zero_grad()

    input_length = input.size()[0]

    target_length = target.size()[0]

    encoder_outputs = Variable(torch.zeros(max_length, encoder.hidden_size))

    loss = 0

    for i in range(input_length):

        encoder_output, encoder_hidden = encoder(input[i], encoder_hidden)

        encoder_outputs[i] = encoder_output[0][0]

    decoder_input = Variable(torch.LongTensor([[SOS_token]]))

    decoder_hidden = encoder_hidden

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:

        for di in range(target_length):

            decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs)

            loss += criterion(decoder_output, target[di])

            decoder_input = target[di]

    else:

        for di in range(target_length):

            decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs)

            topv, topi = decoder_output.data.topk(1)

            ni = topi[0][0]

            decoder_input = Variable(torch.LongTensor([[ni]]))

            loss += criterion(decoder_output, target[di])

            if ni == EOS_token:

                break;

    loss.backward()

    encoder_optimizer.step()

    decoder_optimizer.step()

    return loss.data[0] / target_length

def trainIters(encoder, decoder, n_iters, print_every=10, learning_rate=0.01):

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)

    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)

    training_pairs = [variablesFromPair(random.choice(pairs_data)) for i in range(n_iters)]

    criterion = nn.NLLLoss()

    total_loss = 0

    for iter in range(1, n_iters + 1):

        training_pair = training_pairs[iter - 1]

        input_variable = training_pair[0]

        target_variable = training_pair[1]

        loss = train(input_variable, target_variable, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion)

        total_loss  += loss

        if iter % print_every == 0:

            print('(%d %d%%) loss %d total-loss %d percent %.4f' % (iter, iter / n_iters * 100, loss ,total_loss, total_loss / print_every))

trainIters(encoder_model, att_decoder_model, 5000)

def evaluate(encoder, decoder, sentence, max_length = MAX_LENGTH):

    input_variable = variablesFromIds(sentence)

    input_length = input_variable.size()[0]

    encoder_hidden = encoder.initHidden()

    encoder_outputs = Variable(torch.zeros(max_length, encoder.hidden_size))

    for ei in range(input_length):

        encoder_output, encoder_hidden = encoder(input_variable[ei], encoder_hidden)

        encoder_outputs[ei] = encoder_outputs[ei] + encoder_output[0][0]

    decoder_input = Variable(torch.LongTensor([[SOS_token]]))  # SOS

    decoder_hidden = encoder_hidden

    decoded_words = []

    decoder_attentions = torch.zeros(max_length, max_length)

    for di in range(max_length):

        decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs)

        decoder_attentions[di] = decoder_attention.data

        topv, topi = decoder_output.data.topk(1)

        ni = topi[0][0]

        if ni == EOS_token:

            decoded_words.append('<EOS>')

            break

        else:

            decoded_words.append(target_lang.dictionary.idx2word[ni])

        decoder_input = Variable(torch.LongTensor([[ni]]))

    return decoded_words, decoder_attentions[:di + 1]  

evaluateRandomly(encoder_model, att_decoder_model)

结论

训练少，正确率较低，后面再实现一个对话机器人

深度学习之 seq2seq 进行英文到法文的翻译的更多相关文章

深度学习教程 | Seq2Seq序列模型和注意力机制
作者:韩信子@ShowMeAI 教程地址:http://www.showmeai.tech/tutorials/35 本文地址:http://www.showmeai.tech/article-det ...
时间序列深度学习：seq2seq 模型预测太阳黑子
目录时间序列深度学习:seq2seq 模型预测太阳黑子学习路线商业中的时间序列深度学习商业中应用时间序列深度学习深度学习时间序列预测:使用 keras 预测太阳黑子递归神经网络设置.预处 ...
深度学习的seq2seq模型——本质是LSTM，训练过程是使得所有样本的p(y1,...,yT‘|x1,...,xT)概率之和最大
from:https://baijiahao.baidu.com/s?id=1584177164196579663&wfr=spider&for=pc seq2seq模型是以编码(En ...
深度学习之seq2seq模型以及Attention机制
RNN,LSTM,seq2seq等模型广泛用于自然语言处理以及回归预测,本期详解seq2seq模型以及attention机制的原理以及在回归预测方向的运用. 1. seq2seq模型介绍 seq2se ...
机器学习(Machine Learning)&深度学习(Deep Learning)资料【转】
转自:机器学习(Machine Learning)&深度学习(Deep Learning)资料 <Brief History of Machine Learning> 介绍:这是一 ...
深度学习中的Attention机制
1.深度学习的seq2seq模型从rnn结构说起根据输出和输入序列不同数量rnn可以有多种不同的结构,不同结构自然就有不同的引用场合.如下图, one to one 结构,仅仅只是简单的给一个输入 ...
机器学习(Machine Learning)与深度学习(Deep Learning)资料汇总
<Brief History of Machine Learning> 介绍:这是一篇介绍机器学习历史的文章,介绍很全面,从感知机.神经网络.决策树.SVM.Adaboost到随机森林.D ...
深度学习(Deep Learning)算法简介
http://www.cnblogs.com/ysjxw/archive/2011/10/08/2201782.html Comments from Xinwei: 最近的一个课题发展到与深度学习有联 ...
时间序列深度学习：状态 LSTM 模型预测太阳黑子
目录时间序列深度学习:状态 LSTM 模型预测太阳黑子教程概览商业应用长短期记忆(LSTM)模型太阳黑子数据集构建 LSTM 模型预测太阳黑子 1 若干相关包 2 数据 3 探索性数据分析 ...

随机推荐

handsontable 合并单元格
<!DOCTYPE html> <html> <head> <title>handsontable demo</title> <met ...
http进阶
前言: 上一篇博文已经说到了,apache2.4简单的配置,端口,持久连接,MPM,DSO,路径下基于来源控制,页面特性,日志设置安全域,虚拟主机等等. 一:URL URL是互联中获取标记资源的方式 ...
Chrome游览器使用时，修改文件和网页刷新后，不能显示效果
一:因为游览器缓存问题有时候在写完代码后,刷新游览器,发现自己写的目标是让某一个东西隐藏,但是结果是依旧显示着,打开调试工具在Sources中发现,文件依旧是上次的旧的文件,新文件没有加载进去,无论 ...
python DNS域名轮询业务监控
应用场景: 目前DNS支持一个域名对应多个IP的解析,优势是可以起到负载均衡的作用,最大的问题是目标主机不可用时无法自动剔除,因此必须在自己的业务端写好监控与发现,怎么样来做这样的监控,以python ...
python3.5连接oracle数据及数据查询
今天心血来潮研究下用python连接oracle数据库,看了一下demo,本以为很简单,从操作到成功还是有点坎坷,这里分享给大家,希望为后面学习的童鞋铺路. 一.首先按照cx_Oracle 二:在py ...
设计模式——备忘录模式（C++实现）
#include <iostream> #include <string> #include <vector> using namespace std; class ...
http状态码是什么，有什么用，在哪里查看，分别代表什么意思？
写在前面: 当浏览者访问一个网页时,浏览者的浏览器会向网页所在服务器发出请求.当浏览器接收并显示网页前,此网页所在的服务器会返回一个包含HTTP状态码的信息头用以响应浏览器的请求.本文主要是:关于ht ...
jni 类初始化失败（nested exception is java.lang.NoClassDefFoundError）
nested exception is java.lang.NoClassDefFoundError: Could not initialize class com.netease.facedetec ...
Centos虚拟机克隆模板
Centos6模板 IPTABLES/SELINUX # iptalbes -F # service iptables save 或 # /etc/init.d/iptables stop # chk ...
JS常用函数用途小记
concat() 方法用于连接两个或多个数组. 该方法不会改变现有的数组,而仅仅会返回被连接数组的一个副本. var a = [1,2,3]; document.write(a.concat(4,5) ...

深度学习之 seq2seq 进行 英文到法文的翻译

深度学习之 seq2seq 进行 英文到法文的翻译

结论

深度学习之 seq2seq 进行 英文到法文的翻译的更多相关文章

随机推荐

热门专题

深度学习之 seq2seq 进行英文到法文的翻译

深度学习之 seq2seq 进行英文到法文的翻译

深度学习之 seq2seq 进行英文到法文的翻译的更多相关文章