RNN与情感分类问题实战-加载IMDB数据集
Sentiment Analysis
Two approaches
SimpleRNNCell
single layer
multi-layers
RNNCell
Single layer
import os
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
tf.random.set_seed(22)
np.random.seed(22)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
assert tf.__version__.startswith('2.')
batchsz = 128
# the most frequest words
total_words = 10000
max_review_len = 80
embedding_len = 100
(x_train,
y_train), (x_test,
y_test) = keras.datasets.imdb.load_data(num_words=total_words)
# x_train:[b, 80]
# x_test: [b, 80]
x_train = keras.preprocessing.sequence.pad_sequences(x_train,
maxlen=max_review_len)
x_test = keras.preprocessing.sequence.pad_sequences(x_test,
maxlen=max_review_len)
db_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
db_train = db_train.shuffle(1000).batch(batchsz, drop_remainder=True)
db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.batch(batchsz, drop_remainder=True)
print('x_train shape:', x_train.shape, tf.reduce_max(y_train),
tf.reduce_min(y_train))
print('x_test shape:', x_test.shape)
class MyRNN(keras.Model):
def __init__(self, units):
super(MyRNN, self).__init__()
# [b, 64]
self.state0 = [tf.zeros([batchsz, units])]
self.state1 = [tf.zeros([batchsz, units])]
# transform text to embedding representation
# [b, 80] => [b, 80, 100]
self.embedding = layers.Embedding(total_words,
embedding_len,
input_length=max_review_len)
# [b, 80, 100] , h_dim: 64
# RNN: cell1 ,cell2, cell3
# SimpleRNN,units=64表示100个向量转成64个初始的状态
self.rnn_cell0 = layers.SimpleRNNCell(units, dropout=0.5)
self.rnn_cell1 = layers.SimpleRNNCell(units, dropout=0.5)
# fc, [b, 80, 100] => [b, 64] => [b, 1]
self.outlayer = layers.Dense(1)
def call(self, inputs, training=None):
"""
net(x) net(x, training=True) :train mode
net(x, training=False): test
:param inputs: [b, 80]
:param training:
:return:
"""
# [b, 80]
x = inputs
# embedding: [b, 80] => [b, 80, 100]
x = self.embedding(x)
# rnn cell compute
# [b, 80, 100] => [b, 64]
state0 = self.state0
state1 = self.state1
for word in tf.unstack(x, axis=1): # word: [b, 100]
# h1 = x*wxh+h0*whh
# out0: [b, 64]
out0, state0 = self.rnn_cell0(word, state0, training)
# out1: [b, 64]
out1, state1 = self.rnn_cell1(out0, state1, training)
# out: [b, 64] => [b, 1]
x = self.outlayer(out1)
# p(y is pos|x)
prob = tf.sigmoid(x)
return prob
def main():
units = 64
epochs = 4
model = MyRNN(units)
model.compile(optimizer=keras.optimizers.Adam(0.001),
loss=tf.losses.BinaryCrossentropy(),
metrics=['accuracy'])
model.fit(db_train, epochs=epochs, validation_data=db_test)
model.evaluate(db_test)
if __name__ == '__main__':
main()
Multi-layers
import os
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
tf.random.set_seed(22)
np.random.seed(22)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
assert tf.__version__.startswith('2.')
batchsz = 128
# the most frequest words
total_words = 10000 # 编码10000个单词
max_review_len = 80 # 句子长度80
embedding_len = 100
(x_train,
y_train), (x_test,
y_test) = keras.datasets.imdb.load_data(num_words=total_words)
# x_train:[b, 80]
# x_test: [b, 80]
x_train = keras.preprocessing.sequence.pad_sequences(x_train,
maxlen=max_review_len)
x_test = keras.preprocessing.sequence.pad_sequences(x_test,
maxlen=max_review_len)
db_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
# drop_remainder,丢弃最后一个大小不合适的batch
db_train = db_train.shuffle(1000).batch(batchsz, drop_remainder=True)
db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.batch(batchsz, drop_remainder=True)
print('x_train shape:', x_train.shape, tf.reduce_max(y_train),
tf.reduce_min(y_train))
print('x_test shape:', x_test.shape)
class MyRNN(keras.Model):
def __init__(self, units):
super(MyRNN, self).__init__()
# transform text to embedding representation
# [b, 80] => [b, 80, 100] # embedding_len=100表示一个单词为100的向量
self.embedding = layers.Embedding(total_words,
embedding_len,
input_length=max_review_len)
# [b, 80, 100] , h_dim: 64
self.rnn = keras.Sequential([
layers.SimpleRNN(units,
dropout=0.5,
return_sequences=True,
unroll=True),
layers.SimpleRNN(units, dropout=0.5, unroll=True)
])
# fc, [b, 80, 100] => [b, 64] => [b, 1] # 得到分类结果
self.outlayer = layers.Dense(1)
def call(self, inputs, training=None):
"""
net(x) net(x, training=True) :train mode
net(x, training=False): test
:param inputs: [b, 80]
:param training: 计算过程是train还是test
:return:
"""
# [b, 80]
x = inputs
# embedding: [b, 80] => [b, 80, 100]
x = self.embedding(x)
# rnn cell compute
# x: [b, 80, 100] => [b, 64]
x = self.rnn(x)
# out: [b, 64] => [b, 1]
x = self.outlayer(x)
# p(y is pos|x)
prob = tf.sigmoid(x)
return prob
def main():
units = 64
epochs = 4
model = MyRNN(units)
model.compile(optimizer=keras.optimizers.Adam(0.001),
loss=tf.losses.BinaryCrossentropy(),
metrics=['accuracy'])
model.fit(db_train, epochs=epochs, validation_data=db_test)
model.evaluate(db_test)
if __name__ == '__main__':
main()
RNN与情感分类问题实战-加载IMDB数据集的更多相关文章
- pytorch 加载mnist数据集报错not gzip file
利用pytorch加载mnist数据集的代码如下 import torchvision import torchvision.transforms as transforms from torch.u ...
- torchvision的理解和学习 加载常用数据集,对主流模型的调用.md
torchvision的理解和学习 加载常用数据集,对主流模型的调用 https://blog.csdn.net/tsq292978891/article/details/79403617 加载常用数 ...
- 科学计算三维可视化---TVTK管线与数据加载(数据集)
一:数据集 三维可视化的第一步是选用合适的数据结构来表示数据,TVTK提供了多种表示不同种类数据的数据集 (一)数据集--ImageData >>> from tvtk.api im ...
- Tensorflow之快速加载MNIST数据集
from tensorflow.examples.tutorials.mnist import input_data import tensorflow as tf def myprint(v): p ...
- Pytorch文本分类(imdb数据集),含DataLoader数据加载,最优模型保存
用pytorch进行文本分类,数据集为keras内置的imdb影评数据(二分类),代码包含六个部分(详见代码) 使用环境: pytorch:1.1.0 cuda:10.0 gpu:RTX2070 (1 ...
- [DeeplearningAI笔记]序列模型2.9情感分类
5.2自然语言处理 觉得有用的话,欢迎一起讨论相互学习~Follow Me 2.9 Sentiment classification 情感分类 情感分类任务简单来说是看一段文本,然后分辨这个人是否喜欢 ...
- JVM学习二:JVM之类加载器之加载分析
前面一遍,我们对类的加载有了一个整体的认识,而这一节我们细节分析一下类加载器的第一步,即:加载. 一.概念 类的加载指的是将类的.class文件中的二进制数据读入到内存中,将其放在运行时数据区的方法区 ...
- UIButton 加载网络图片
以后就可以 用这个分类 UIButton轻松加载网络图片了, UIButton+WebCache.h #import <UIKit/UIKit.h> @interface UIButt ...
- Pytorch加载并可视化FashionMNIST指定层(Udacity)
加载并可视化FashionMNIST 在这个notebook中,我们要加载并查看 Fashion-MNIST 数据库中的图像. 任何分类问题的第一步,都是查看你正在使用的数据集.这样你可以了解有关图像 ...
随机推荐
- bzoj 3527: [Zjoi2014]力【FFT】
大力推公式,目标是转成卷积形式:\( C_i=\sum_{j=1}^{i}a_jb_{i-j} \) 首先下标从0开始存,n-- \[ F_i=\frac{\sum_{j<i}\frac{q_j ...
- bzoj 2242: [SDOI2011]计算器【扩展欧几里得+快速幂+BSGS】
第一问快速幂板子 第二问把式子转化为\( xy\equiv Z(mod P)\rightarrow xy+bP=z \),然后扩展欧几里得 第三问BSGS板子 #include<iostream ...
- 浅谈KMP算法——Chemist
很久以前就学过KMP,不过一直没有深入理解只是背代码,今天总结一下KMP算法来加深印象. 一.KMP算法介绍 KMP解决的问题:给你两个字符串A和B(|A|=n,|B|=m,n>m),询问一个字 ...
- Service官方教程(10)Bound Service的生命周期函数
Managing the Lifecycle of a Bound Service When a service is unbound from all clients, the Android sy ...
- VS2010环境下.NET4.0中Tuple<T>的一个小BUG问题
启动一个桌面程序后,发现一个窗体cfdata=null, 执行时发生错误, 但是在初始化的时候,我明明是cfdata=new Cfdata();为什么会出现这个错误呢. 我开始跟踪,发现当执行cfda ...
- CoreText的绘制流程-转
来自:http://blog.sina.com.cn/s/blog_7c8dc2d50101lbb1.html 使用coreText进行文本绘制,需要在工程中添加CoreText.framework, ...
- Java socket2
通过socket对象可以获取通信对方的socket信息 客户端: import java.net.*; import java.io.*; public class TestServer { publ ...
- solr之~模糊查询【转】
solr之~模糊查询 有的时候,我们一开始不可能准确地知道搜索的关键字在 Solr 中查询出的结果是什么,因此,Solr 还提供了几种类型的模糊查询.模糊匹配会在索引中对关键字进行非精确匹配.例如,有 ...
- Hibernate配置(通过注解配置)
本文主要讲通过注解配置来替换Hibernate的映射文件 1.多对一配置 package com.jazz7.entity; import java.util.Date; import javax.p ...
- jQuery select年月日(生日)选择器
实际项目中,在用户的个人中心,编辑用户资料时经常会遇到选择生日选项的问题. 因为我项目工程中没有使用如jQuery UI的插件性下拉列表,所以选择select + option的原生方式,实现选择器. ...