基于theano的深度卷积神经网络

使用了两个卷积层、一个全连接层和一个softmax分类器。

在测试数据集上正确率可以达到99.22%。

 #coding:utf8

 import cPickle

 import numpy as np

 import theano

 import theano.tensor as T

 from theano.tensor.nnet import conv

 from theano.tensor.nnet import softmax

 from theano.tensor import shared_randomstreams

 from theano.tensor.signal import downsample

 def ReLU(z): return T.maximum(0.0, z)

 from theano.tensor.nnet import sigmoid

 def load_data_shared():

     f = open('mnist.pkl', 'rb')

     training_data, validation_data, test_data = cPickle.load(f)

     f.close()

     def shared(data):

         shared_x = theano.shared(

             np.asarray(data[0], dtype=theano.config.floatX), borrow=True)

         shared_y = theano.shared(

             np.asarray(data[1], dtype=theano.config.floatX), borrow=True)

         return shared_x, T.cast(shared_y, "int32")

     return [shared(training_data), shared(validation_data), shared(test_data)]

 class Network(object):

     def __init__(self, layers, mini_batch_size):

         self.layers = layers

         self.mini_batch_size = mini_batch_size

         self.params = [param for layer in self.layers for param in layer.params]  # w,b

         self.x = T.matrix("x")

         self.y = T.ivector("y")  # 1 dimensional

         init_layer = self.layers[0]

         init_layer.set_inpt(self.x, self.x, self.mini_batch_size)

         for j in xrange(1, len(self.layers)):

             prev_layer, layer  = self.layers[j-1], self.layers[j]  # layer[j-1]->j

             layer.set_inpt(

                 prev_layer.output, prev_layer.output_dropout, self.mini_batch_size)

         self.output = self.layers[-1].output

         self.output_dropout = self.layers[-1].output_dropout

     def SGD(self, training_data, epochs, mini_batch_size, eta,

             validation_data, test_data, lmbda=0.0):

         training_x, training_y = training_data

         validation_x, validation_y = validation_data

         test_x, test_y = test_data

         num_training_batches = size(training_data)/mini_batch_size

         num_validation_batches = size(validation_data)/mini_batch_size

         num_test_batches = size(test_data)/mini_batch_size

         l2_norm_squared = sum([(layer.w**2).sum() for layer in self.layers])

         cost = self.layers[-1].cost(self)+\

                0.5*lmbda*l2_norm_squared/num_training_batches

         grads = T.grad(cost, self.params)  # 根据cost计算梯度，无需prime函数

         updates = [(param, param-eta*grad)

                    for param, grad in zip(self.params, grads)]

         i = T.lscalar() # mini-batch index

         train_mb = theano.function(

             [i], cost, updates=updates,

             givens={

                 self.x:

                 training_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],

                 self.y:

                 training_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]

             })

         validate_mb_accuracy = theano.function(

             [i], self.layers[-1].accuracy(self.y),

             givens={

                 self.x:

                 validation_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],

                 self.y:

                 validation_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]

             })

         test_mb_accuracy = theano.function(

             [i], self.layers[-1].accuracy(self.y),

             givens={

                 self.x:

                 test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],

                 self.y:

                 test_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]

             })

         self.test_mb_predictions = theano.function(

             [i], self.layers[-1].y_out,

             givens={

                 self.x:

                 test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size]

             })

         best_validation_accuracy = 0.0

         for epoch in xrange(epochs):

             for minibatch_index in xrange(num_training_batches):

                 iteration = num_training_batches*epoch+minibatch_index

                 if iteration % 1000 == 0:

                     print("Training mini-batch number {0}".format(iteration))

                 cost_ij = train_mb(minibatch_index)

                 if (iteration+1) % num_training_batches == 0:

                     validation_accuracy = np.mean(

                         [validate_mb_accuracy(j) for j in xrange(num_validation_batches)])

                     print("Epoch {0}: validation accuracy {1:.2%},cost={2}".format(

                         epoch, validation_accuracy,cost_ij))

                     if validation_accuracy >= best_validation_accuracy:

                         print("This is the best validation accuracy to date.")

                         best_validation_accuracy = validation_accuracy

                         best_iteration = iteration

                         if test_data:

                             test_accuracy = np.mean(

                                 [test_mb_accuracy(j) for j in xrange(num_test_batches)])

                             print('The corresponding test accuracy is {0:.2%}'.format(

                                 test_accuracy))

         print("Finished training network.")

         print("Best validation accuracy of {0:.2%} obtained at iteration {1}".format(

             best_validation_accuracy, best_iteration))

         print("Corresponding test accuracy of {0:.2%}".format(test_accuracy))

 class ConvPoolLayer(object):  # layer init

     def __init__(self, filter_shape, image_shape, poolsize=(2, 2),

                  activation_fn=ReLU):

         self.filter_shape = filter_shape  # 20, 1, 5, 5, 输入个数1, 卷积核5*5，20个

         self.image_shape = image_shape  # 10, 1, 28, 28, 1与上面一致

         self.poolsize = poolsize  # 2,2

         self.activation_fn=activation_fn  # theano.tensor.nnet.sigmoid

         n_out = (filter_shape[0]*np.prod(filter_shape[2:])/np.prod(poolsize))  # 20*5*5/2/2=125

         self.w = theano.shared(  # 20, 1, 5, 5

             np.asarray(

                 np.random.normal(loc=0, scale=np.sqrt(1.0/n_out), size=filter_shape),

                 dtype=theano.config.floatX),

             borrow=True)

         self.b = theano.shared(  #

             np.asarray(

                 np.random.normal(loc=0, scale=1.0, size=(filter_shape[0],)),

                 dtype=theano.config.floatX),

             borrow=True)

         self.params = [self.w, self.b]

     def set_inpt(self, inpt, inpt_dropout, mini_batch_size):

         self.inpt = inpt.reshape(self.image_shape)    # 10, 1, 28, 28

         conv_out = conv.conv2d(  # 28-5+1=24   1, 20, 24, 24

             input=self.inpt, filters=self.w, filter_shape=self.filter_shape,

             image_shape=self.image_shape)

         pooled_out = downsample.max_pool_2d(  # 24/2=12   1, 20, 12, 12

             input=conv_out, ds=self.poolsize, ignore_border=True)

         self.output = self.activation_fn(  # 1, 20, 12, 12 + 1, 20, 1, 1= 1, 20, 12, 12

             pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))  # 1, 20, 1, 1

         self.output_dropout = self.output  # no dropout in the convolutional layers

 class FullyConnectedLayer(object):

     def __init__(self, n_in, n_out, activation_fn=sigmoid, p_dropout=0.0):

         self.n_in = n_in

         self.n_out = n_out

         self.activation_fn = activation_fn

         self.p_dropout = p_dropout

         self.w = theano.shared(

             np.asarray(

                 np.random.normal(

                     loc=0.0, scale=np.sqrt(1.0/n_out), size=(n_in, n_out)),

                 dtype=theano.config.floatX),

             name='w', borrow=True)

         self.b = theano.shared(

             np.asarray(np.random.normal(loc=0.0, scale=1.0, size=(n_out,)),

                        dtype=theano.config.floatX),

             name='b', borrow=True)

         self.params = [self.w, self.b]

     def set_inpt(self, inpt, inpt_dropout, mini_batch_size):

         self.inpt = inpt.reshape((mini_batch_size, self.n_in))

         self.output = self.activation_fn(

             (1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)

         self.y_out = T.argmax(self.output, axis=1)

         self.inpt_dropout = dropout_layer(

             inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)

         self.output_dropout = self.activation_fn(

             T.dot(self.inpt_dropout, self.w) + self.b)

     def accuracy(self, y):

         return T.mean(T.eq(y, self.y_out))

 class SoftmaxLayer(object):

     def __init__(self, n_in, n_out, p_dropout=0.0):

         self.n_in = n_in

         self.n_out = n_out

         self.p_dropout = p_dropout

         self.w = theano.shared(

             np.zeros((n_in, n_out), dtype=theano.config.floatX),

             name='w', borrow=True)

         self.b = theano.shared(

             np.zeros((n_out,), dtype=theano.config.floatX),

             name='b', borrow=True)

         self.params = [self.w, self.b]

     def set_inpt(self, inpt, inpt_dropout, mini_batch_size):

         self.inpt = inpt.reshape((mini_batch_size, self.n_in))

         self.output = softmax((1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)  # theano.tensor.nnet.softmax

         self.y_out = T.argmax(self.output, axis=1)

         self.inpt_dropout = dropout_layer(

             inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)

         self.output_dropout = softmax(T.dot(self.inpt_dropout, self.w) + self.b)

     def cost(self, net):

         return -T.mean(T.log(self.output_dropout)[T.arange(net.y.shape[0]), net.y])

     def accuracy(self, y):

         return T.mean(T.eq(y, self.y_out))

 def size(data):  # for shared data

     return len(data[0].get_value())

 def dropout_layer(layer, p_dropout):  # 随机无视p_dropout的隐含层节点

     srng = shared_randomstreams.RandomStreams(

         np.random.RandomState(0).randint(999999))

     mask = srng.binomial(n=1, p=1-p_dropout, size=layer.shape)

     return layer*T.cast(mask, theano.config.floatX)

 if __name__ =='__main__':

     training_data, validation_data, test_data = load_data_shared()

     mini_batch_size = 10

     net = Network([

         ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28),

                       filter_shape=(20, 1, 5, 5),

                       poolsize=(2, 2)),

         ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12),

                       filter_shape=(40, 20, 5, 5),

                       poolsize=(2, 2)),

         FullyConnectedLayer(n_in=40*4*4, n_out=100),

         SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)

     net.SGD(training_data, 30, mini_batch_size, 0.1,

             validation_data, test_data)

 # Sigmoid ConvPoolLayer

 # Epoch 29: validation accuracy 98.96%,cost=9.70275432337e-05

 # This is the best validation accuracy to date.

 # The corresponding test accuracy is 98.86%

 # ReLU ConvPoolLayer

 # Epoch 29: validation accuracy 99.06%,cost=4.11269593315e-06

 # This is the best validation accuracy to date.

 # The corresponding test accuracy is 99.22%

基于theano的深度卷积神经网络的更多相关文章

优化基于FPGA的深度卷积神经网络的加速器设计
英文论文链接:http://cadlab.cs.ucla.edu/~cong/slides/fpga2015_chen.pdf 翻译:卜居转载请注明出处:http://blog.csdn.net/k ...
基于MTCNN多任务级联卷积神经网络进行的人脸识别世纪晟人脸检测
神经网络和深度学习目前为处理图像识别的许多问题提供了最佳解决方案,而基于MTCNN(多任务级联卷积神经网络)的人脸检测算法也解决了传统算法对环境要求高.人脸要求高.检测耗时高的弊端. 基于MTCNN多 ...
（转）基于Theano的深度学习(Deep Learning)框架Keras学习随笔-01-FAQ
特别棒的一篇文章,仍不住转一下,留着以后需要时阅读基于Theano的深度学习(Deep Learning)框架Keras学习随笔-01-FAQ
基于MNIST数据的卷积神经网络CNN
基于tensorflow使用CNN识别MNIST 参数数量:第一个卷积层5x5x1x32=800个参数,第二个卷积层5x5x32x64=51200个参数,第三个全连接层7x7x64x1024=3211 ...
基于Theano的深度学习框架keras及配合SVM训练模型
https://blog.csdn.net/a819825294/article/details/51334397 1.介绍 Keras是基于Theano的一个深度学习框架,它的设计参考了Torch, ...
DeepLearning.ai学习笔记（四）卷积神经网络 -- week2深度卷积神经网络实例探究
一.为什么要进行实例探究? 通过他人的实例可以更好的理解如何构建卷积神经网络,本周课程主要会介绍如下网络 LeNet-5 AlexNet VGG ResNet (有152层) Inception 二. ...
【原创】梵高油画用深度卷积神经网络迭代十万次是什么效果？ A neural style of convolutional neural networks
作为一个脱离了低级趣味的码农,春节假期闲来无事,决定做一些有意思的事情打发时间,碰巧看到这篇论文: A neural style of convolutional neural networks,译作 ...
【原创】梵高油画用深度卷积神经网络迭代10万次是什么效果？ A neural style of convolutional neural networks
作为一个脱离了低级趣味的码农,春节假期闲来无事,决定做一些有意思的事情打发时间,碰巧看到这篇论文: A neural style of convolutional neural networks,译作 ...
深度学习（五）基于tensorflow实现简单卷积神经网络Lenet５
原文作者:aircraft 原文地址:https://www.cnblogs.com/DOMLX/p/8954892.html 参考博客:https://blog.csdn.net/u01287127 ...

随机推荐

oracle触发器如何使用2
触发器是特定事件出现的时候,自动执行的代码块.类似于存储过程,但是用户不能直接调用他们.触发器是许多关系数据库系统都提供的一项技术.在ORACLE系统里,触发器类似过程和函数,都有声明,执行和异常处 ...
(转)深入理解javascript的function
原文:http://www.cnblogs.com/sharpxiajun/archive/2011/09/16/2179323.html javascript笔记:深入理解javascript的fu ...
apache http client vs urlconnection
Google has deprecated HttpClient Choose an HTTP Client Most network-connected Android apps use HTTP ...
hdu 2075
PS:水得不能再水..刚开始还以为是大数..要用到快速幂...谁知道想太多...就普通int型.. 代码: #include "stdio.h" int main(){ int a ...
javascript树形菜单简单实例
参考博客地址:http://chengyoyo2006.blog.163.com/blog/static/8451734820087843950604/ <!DOCTYPE HTML PUBLI ...
解决：Android4.3锁屏界面Emergency calls only - China Unicom与EMERGENCY CALL语义重复
从图片中我们可以看到,这里在语义上有一定的重复,当然这是谷歌的原始设计.这个问题在博客上进行共享从表面上来看着实没有什么太大的意义,不过由于Android4.3在锁屏功能上比起老版本做了很大的改动,而 ...
Git ~ 管理修改 ~ Gitasd
现在假设你一经常我了暂存区的概念 , 下面我们将要讨论的就是 , 为什么 Git 比其他的版本控制系统设计的优秀 , 因为 Git 跟踪管理的是修改而非文件什么是修改 ? 修改就是你在某个地方 ...
2013杭州现场赛B题-Rabbit Kingdom
杭州现场赛的题.BFS+DFS #include <iostream> #include<cstdio> #include<cstring> #define inf ...
关于Android Studio里的Gradle，你所需要知道的都在这里了
Gradle介绍 Gradle是一个先进的build toolkit,可以方便的管理依赖包和定义自己的build逻辑.到底有多先进,Android Studio官方集成Gradle,Google还专门 ...
实现Magento多文件上传代码功能开发
在Magento中上传单个文件很简单,可以直接在继承的Mage_Adminhtml_Block_Widget_Form类中直接添加如下组件Field: 对于图片: $fieldset->a ...

基于theano的深度卷积神经网络

基于theano的深度卷积神经网络的更多相关文章

随机推荐

热门专题