基于theano的深度卷积神经网络
使用了两个卷积层、一个全连接层和一个softmax分类器。
在测试数据集上正确率可以达到99.22%。
代码参考了neural-networks-and-deep-learning
#coding:utf8
import cPickle
import numpy as np
import theano
import theano.tensor as T
from theano.tensor.nnet import conv
from theano.tensor.nnet import softmax
from theano.tensor import shared_randomstreams
from theano.tensor.signal import downsample
def ReLU(z): return T.maximum(0.0, z)
from theano.tensor.nnet import sigmoid def load_data_shared():
f = open('mnist.pkl', 'rb')
training_data, validation_data, test_data = cPickle.load(f)
f.close()
def shared(data):
shared_x = theano.shared(
np.asarray(data[0], dtype=theano.config.floatX), borrow=True)
shared_y = theano.shared(
np.asarray(data[1], dtype=theano.config.floatX), borrow=True)
return shared_x, T.cast(shared_y, "int32")
return [shared(training_data), shared(validation_data), shared(test_data)] class Network(object):
def __init__(self, layers, mini_batch_size):
self.layers = layers
self.mini_batch_size = mini_batch_size
self.params = [param for layer in self.layers for param in layer.params] # w,b
self.x = T.matrix("x")
self.y = T.ivector("y") # 1 dimensional
init_layer = self.layers[0]
init_layer.set_inpt(self.x, self.x, self.mini_batch_size)
for j in xrange(1, len(self.layers)):
prev_layer, layer = self.layers[j-1], self.layers[j] # layer[j-1]->j
layer.set_inpt(
prev_layer.output, prev_layer.output_dropout, self.mini_batch_size)
self.output = self.layers[-1].output
self.output_dropout = self.layers[-1].output_dropout def SGD(self, training_data, epochs, mini_batch_size, eta,
validation_data, test_data, lmbda=0.0):
training_x, training_y = training_data
validation_x, validation_y = validation_data
test_x, test_y = test_data
num_training_batches = size(training_data)/mini_batch_size
num_validation_batches = size(validation_data)/mini_batch_size
num_test_batches = size(test_data)/mini_batch_size
l2_norm_squared = sum([(layer.w**2).sum() for layer in self.layers])
cost = self.layers[-1].cost(self)+\
0.5*lmbda*l2_norm_squared/num_training_batches
grads = T.grad(cost, self.params) # 根据cost计算梯度,无需prime函数
updates = [(param, param-eta*grad)
for param, grad in zip(self.params, grads)] i = T.lscalar() # mini-batch index
train_mb = theano.function(
[i], cost, updates=updates,
givens={
self.x:
training_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
self.y:
training_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
})
validate_mb_accuracy = theano.function(
[i], self.layers[-1].accuracy(self.y),
givens={
self.x:
validation_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
self.y:
validation_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
})
test_mb_accuracy = theano.function(
[i], self.layers[-1].accuracy(self.y),
givens={
self.x:
test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
self.y:
test_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
})
self.test_mb_predictions = theano.function(
[i], self.layers[-1].y_out,
givens={
self.x:
test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
}) best_validation_accuracy = 0.0
for epoch in xrange(epochs):
for minibatch_index in xrange(num_training_batches):
iteration = num_training_batches*epoch+minibatch_index
if iteration % 1000 == 0:
print("Training mini-batch number {0}".format(iteration))
cost_ij = train_mb(minibatch_index)
if (iteration+1) % num_training_batches == 0:
validation_accuracy = np.mean(
[validate_mb_accuracy(j) for j in xrange(num_validation_batches)])
print("Epoch {0}: validation accuracy {1:.2%},cost={2}".format(
epoch, validation_accuracy,cost_ij))
if validation_accuracy >= best_validation_accuracy:
print("This is the best validation accuracy to date.")
best_validation_accuracy = validation_accuracy
best_iteration = iteration
if test_data:
test_accuracy = np.mean(
[test_mb_accuracy(j) for j in xrange(num_test_batches)])
print('The corresponding test accuracy is {0:.2%}'.format(
test_accuracy))
print("Finished training network.")
print("Best validation accuracy of {0:.2%} obtained at iteration {1}".format(
best_validation_accuracy, best_iteration))
print("Corresponding test accuracy of {0:.2%}".format(test_accuracy)) class ConvPoolLayer(object): # layer init
def __init__(self, filter_shape, image_shape, poolsize=(2, 2),
activation_fn=ReLU):
self.filter_shape = filter_shape # 20, 1, 5, 5, 输入个数1, 卷积核5*5,20个
self.image_shape = image_shape # 10, 1, 28, 28, 1与上面一致
self.poolsize = poolsize # 2,2
self.activation_fn=activation_fn # theano.tensor.nnet.sigmoid
n_out = (filter_shape[0]*np.prod(filter_shape[2:])/np.prod(poolsize)) # 20*5*5/2/2=125
self.w = theano.shared( # 20, 1, 5, 5
np.asarray(
np.random.normal(loc=0, scale=np.sqrt(1.0/n_out), size=filter_shape),
dtype=theano.config.floatX),
borrow=True)
self.b = theano.shared( #
np.asarray(
np.random.normal(loc=0, scale=1.0, size=(filter_shape[0],)),
dtype=theano.config.floatX),
borrow=True)
self.params = [self.w, self.b] def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
self.inpt = inpt.reshape(self.image_shape) # 10, 1, 28, 28
conv_out = conv.conv2d( # 28-5+1=24 1, 20, 24, 24
input=self.inpt, filters=self.w, filter_shape=self.filter_shape,
image_shape=self.image_shape)
pooled_out = downsample.max_pool_2d( # 24/2=12 1, 20, 12, 12
input=conv_out, ds=self.poolsize, ignore_border=True)
self.output = self.activation_fn( # 1, 20, 12, 12 + 1, 20, 1, 1= 1, 20, 12, 12
pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) # 1, 20, 1, 1
self.output_dropout = self.output # no dropout in the convolutional layers class FullyConnectedLayer(object):
def __init__(self, n_in, n_out, activation_fn=sigmoid, p_dropout=0.0):
self.n_in = n_in
self.n_out = n_out
self.activation_fn = activation_fn
self.p_dropout = p_dropout
self.w = theano.shared(
np.asarray(
np.random.normal(
loc=0.0, scale=np.sqrt(1.0/n_out), size=(n_in, n_out)),
dtype=theano.config.floatX),
name='w', borrow=True)
self.b = theano.shared(
np.asarray(np.random.normal(loc=0.0, scale=1.0, size=(n_out,)),
dtype=theano.config.floatX),
name='b', borrow=True)
self.params = [self.w, self.b] def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
self.inpt = inpt.reshape((mini_batch_size, self.n_in))
self.output = self.activation_fn(
(1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)
self.y_out = T.argmax(self.output, axis=1)
self.inpt_dropout = dropout_layer(
inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)
self.output_dropout = self.activation_fn(
T.dot(self.inpt_dropout, self.w) + self.b) def accuracy(self, y):
return T.mean(T.eq(y, self.y_out)) class SoftmaxLayer(object): def __init__(self, n_in, n_out, p_dropout=0.0):
self.n_in = n_in
self.n_out = n_out
self.p_dropout = p_dropout
self.w = theano.shared(
np.zeros((n_in, n_out), dtype=theano.config.floatX),
name='w', borrow=True)
self.b = theano.shared(
np.zeros((n_out,), dtype=theano.config.floatX),
name='b', borrow=True)
self.params = [self.w, self.b] def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
self.inpt = inpt.reshape((mini_batch_size, self.n_in))
self.output = softmax((1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b) # theano.tensor.nnet.softmax
self.y_out = T.argmax(self.output, axis=1)
self.inpt_dropout = dropout_layer(
inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)
self.output_dropout = softmax(T.dot(self.inpt_dropout, self.w) + self.b) def cost(self, net):
return -T.mean(T.log(self.output_dropout)[T.arange(net.y.shape[0]), net.y]) def accuracy(self, y):
return T.mean(T.eq(y, self.y_out)) def size(data): # for shared data
return len(data[0].get_value()) def dropout_layer(layer, p_dropout): # 随机无视p_dropout的隐含层节点
srng = shared_randomstreams.RandomStreams(
np.random.RandomState(0).randint(999999))
mask = srng.binomial(n=1, p=1-p_dropout, size=layer.shape)
return layer*T.cast(mask, theano.config.floatX) if __name__ =='__main__':
training_data, validation_data, test_data = load_data_shared()
mini_batch_size = 10
net = Network([
ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28),
filter_shape=(20, 1, 5, 5),
poolsize=(2, 2)),
ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12),
filter_shape=(40, 20, 5, 5),
poolsize=(2, 2)),
FullyConnectedLayer(n_in=40*4*4, n_out=100),
SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)
net.SGD(training_data, 30, mini_batch_size, 0.1,
validation_data, test_data) # Sigmoid ConvPoolLayer
# Epoch 29: validation accuracy 98.96%,cost=9.70275432337e-05
# This is the best validation accuracy to date.
# The corresponding test accuracy is 98.86% # ReLU ConvPoolLayer
# Epoch 29: validation accuracy 99.06%,cost=4.11269593315e-06
# This is the best validation accuracy to date.
# The corresponding test accuracy is 99.22%
基于theano的深度卷积神经网络的更多相关文章
- 优化基于FPGA的深度卷积神经网络的加速器设计
英文论文链接:http://cadlab.cs.ucla.edu/~cong/slides/fpga2015_chen.pdf 翻译:卜居 转载请注明出处:http://blog.csdn.net/k ...
- 基于MTCNN多任务级联卷积神经网络进行的人脸识别 世纪晟人脸检测
神经网络和深度学习目前为处理图像识别的许多问题提供了最佳解决方案,而基于MTCNN(多任务级联卷积神经网络)的人脸检测算法也解决了传统算法对环境要求高.人脸要求高.检测耗时高的弊端. 基于MTCNN多 ...
- (转) 基于Theano的深度学习(Deep Learning)框架Keras学习随笔-01-FAQ
特别棒的一篇文章,仍不住转一下,留着以后需要时阅读 基于Theano的深度学习(Deep Learning)框架Keras学习随笔-01-FAQ
- 基于MNIST数据的卷积神经网络CNN
基于tensorflow使用CNN识别MNIST 参数数量:第一个卷积层5x5x1x32=800个参数,第二个卷积层5x5x32x64=51200个参数,第三个全连接层7x7x64x1024=3211 ...
- 基于Theano的深度学习框架keras及配合SVM训练模型
https://blog.csdn.net/a819825294/article/details/51334397 1.介绍 Keras是基于Theano的一个深度学习框架,它的设计参考了Torch, ...
- DeepLearning.ai学习笔记(四)卷积神经网络 -- week2深度卷积神经网络 实例探究
一.为什么要进行实例探究? 通过他人的实例可以更好的理解如何构建卷积神经网络,本周课程主要会介绍如下网络 LeNet-5 AlexNet VGG ResNet (有152层) Inception 二. ...
- 【原创】梵高油画用深度卷积神经网络迭代十万次是什么效果? A neural style of convolutional neural networks
作为一个脱离了低级趣味的码农,春节假期闲来无事,决定做一些有意思的事情打发时间,碰巧看到这篇论文: A neural style of convolutional neural networks,译作 ...
- 【原创】梵高油画用深度卷积神经网络迭代10万次是什么效果? A neural style of convolutional neural networks
作为一个脱离了低级趣味的码农,春节假期闲来无事,决定做一些有意思的事情打发时间,碰巧看到这篇论文: A neural style of convolutional neural networks,译作 ...
- 深度学习(五)基于tensorflow实现简单卷积神经网络Lenet5
原文作者:aircraft 原文地址:https://www.cnblogs.com/DOMLX/p/8954892.html 参考博客:https://blog.csdn.net/u01287127 ...
随机推荐
- oracle触发器如何使用2
触发器 是特定事件出现的时候,自动执行的代码块.类似于存储过程,但是用户不能直接调用他们.触发器是许多关系数据库系统都提供的一项技术.在ORACLE系统里,触发器类似过程和函数,都有声明,执行和异常处 ...
- (转)深入理解javascript的function
原文:http://www.cnblogs.com/sharpxiajun/archive/2011/09/16/2179323.html javascript笔记:深入理解javascript的fu ...
- apache http client vs urlconnection
Google has deprecated HttpClient Choose an HTTP Client Most network-connected Android apps use HTTP ...
- hdu 2075
PS:水得不能再水..刚开始还以为是大数..要用到快速幂...谁知道想太多...就普通int型.. 代码: #include "stdio.h" int main(){ int a ...
- javascript树形菜单简单实例
参考博客地址:http://chengyoyo2006.blog.163.com/blog/static/8451734820087843950604/ <!DOCTYPE HTML PUBLI ...
- 解决:Android4.3锁屏界面Emergency calls only - China Unicom与EMERGENCY CALL语义重复
从图片中我们可以看到,这里在语义上有一定的重复,当然这是谷歌的原始设计.这个问题在博客上进行共享从表面上来看着实没有什么太大的意义,不过由于Android4.3在锁屏功能上比起老版本做了很大的改动,而 ...
- Git ~ 管理修改 ~ Gitasd
现在假设你一经常我了暂存区的概念 , 下面我们将要讨论的就是 , 为什么 Git 比其他的版本控制系统设计的优秀 , 因为 Git 跟踪管理的是修改而非文件 什么是修改 ? 修改就是 你在某个地方 ...
- 2013杭州现场赛B题-Rabbit Kingdom
杭州现场赛的题.BFS+DFS #include <iostream> #include<cstdio> #include<cstring> #define inf ...
- 关于Android Studio里的Gradle,你所需要知道的都在这里了
Gradle介绍 Gradle是一个先进的build toolkit,可以方便的管理依赖包和定义自己的build逻辑.到底有多先进,Android Studio官方集成Gradle,Google还专门 ...
- 实现Magento多文件上传代码功能开发
在Magento中上传单个文件很简单,可以直接在继承的Mage_Adminhtml_Block_Widget_Form类中直接添加如下组件Field: 对于图片: $fieldset->a ...