fc_net.py cs231n
n如果有错误,欢迎指出,不胜感激
import numpy as np from cs231n.layers import *
from cs231n.layer_utils import * class TwoLayerNet(object):
"""
A two-layer fully-connected neural network with ReLU nonlinearity and
softmax loss that uses a modular layer design. We assume an input dimension
of D, a hidden dimension of H, and perform classification over C classes. The architecure should be affine - relu - affine - softmax. Note that this class does not implement gradient descent; instead, it
will interact with a separate Solver object that is responsible for running
optimization. The learnable parameters of the model are stored in the dictionary
self.params that maps parameter names to numpy arrays.
""" def __init__(self, input_dim=3*32*32, hidden_dim=100, num_classes=10,
weight_scale=1e-3, reg=0.0):
"""
Initialize a new network. Inputs:
- input_dim: An integer giving the size of the input
- hidden_dim: An integer giving the size of the hidden layer
- num_classes: An integer giving the number of classes to classify
- dropout: Scalar between 0 and 1 giving dropout strength.
- weight_scale: Scalar giving the standard deviation for random
initialization of the weights.
- reg: Scalar giving L2 regularization strength.
"""
self.params = {}
self.reg = reg
self.params['W1']=np.random.randn(input_dim,hidden_dim)*weight_scale
self.params['b1']=np.zeros((hidden_dim,))
self.params['W2']=np.random.randn(hidden_dim,num_classes)*weight_scale
self.params['b2']=np.zeros((num_classes,)) def loss(self, X, y=None):
"""
Compute loss and gradient for a minibatch of data. Inputs:
- X: Array of input data of shape (N, d_1, ..., d_k)
- y: Array of labels, of shape (N,). y[i] gives the label for X[i]. Returns:
If y is None, then run a test-time forward pass of the model and return:
- scores: Array of shape (N, C) giving classification scores, where
scores[i, c] is the classification score for X[i] and class c. If y is not None, then run a training-time forward and backward pass and
return a tuple of:
- loss: Scalar value giving the loss
- grads: Dictionary with the same keys as self.params, mapping parameter
names to gradients of the loss with respect to those parameters.
"""
scores = None
out1,cache1=affine_relu_forward(X,self.params['W1'],self.params['b1'])
out2,cache2=affine_forward(out1,self.params['W2'],self.params['b2'])
if y is None :
return out2 loss,dx=softmax_loss(out2,y)
loss+=0.5*self.reg*( np.sum(self.params['W2']*self.params['W2'])+np.sum(self.params['W1']*self.params['W1']) )
grads={}
dout1,grads['W2'],grads['b2']=affine_backward(dx,cache2)
true_dx,grads['W1'],grads['b1']=affine_relu_backward(dout1,cache1) grads['W2']+=self.params['W2']*self.reg
grads['W1']+=self.params['W1']*self.reg
#grads['b2']+=self.params['b2']*self.reg
#grads['b1']+=self.params['b1']*self.reg # If y is None then we are in test mode so just return scores
# if y is None:
# return scores # loss, grads = 0, {} return loss, grads class FullyConnectedNet(object):
"""
A fully-connected neural network with an arbitrary number of hidden layers,
ReLU nonlinearities, and a softmax loss function. This will also implement
dropout and batch normalization as options. For a network with L layers,
the architecture will be {affine - [batch norm] - relu - [dropout]} x (L - 1) - affine - softmax where batch normalization and dropout are optional, and the {...} block is
repeated L - 1 times. Similar to the TwoLayerNet above, learnable parameters are stored in the
self.params dictionary and will be learned using the Solver class.
""" def __init__(self, hidden_dims=[100], input_dim=3*32*32, num_classes=10,
dropout=0, use_batchnorm=False, reg=0.0,
weight_scale=1e-2, dtype=np.float32, seed=None):
"""
Initialize a new FullyConnectedNet. Inputs:
- hidden_dims: A list of integers giving the size of each hidden layer.
- input_dim: An integer giving the size of the input.
- num_classes: An integer giving the number of classes to classify.
- dropout: Scalar between 0 and 1 giving dropout strength. If dropout=0 then
the network should not use dropout at all.
- use_batchnorm: Whether or not the network should use batch normalization.
- reg: Scalar giving L2 regularization strength.
- weight_scale: Scalar giving the standard deviation for random
initialization of the weights.
- dtype: A numpy datatype object; all computations will be performed using
this datatype. float32 is faster but less accurate, so you should use
float64 for numeric gradient checking.
- seed: If not None, then pass this random seed to the dropout layers. This
will make the dropout layers deteriminstic so we can gradient check the
model.
"""
self.use_batchnorm = use_batchnorm
self.use_dropout = dropout > 0
self.reg = reg
self.num_layers = 1 + len(hidden_dims)
self.dtype = dtype
self.params = {} self.num_layers=len(hidden_dims) num_layers=self.num_layers last_dims=input_dim
for i in xrange(num_layers):
self.params['W%d'%(i+1)]=np.random.randn(last_dims,hidden_dims[i])*weight_scale
self.params['b%d'%(i+1)]=np.zeros(hidden_dims[i],)
if self.use_batchnorm:
self.params['beta%d'%(i+1)]=np.zeros(hidden_dims[i],)
self.params['gamma%d'%(i+1)]=np.ones(hidden_dims[i],)
last_dims=hidden_dims[i] self.params['W%d'%(num_layers+1)]=np.random.randn(last_dims,num_classes)*weight_scale
self.params['b%d'%(num_layers+1)]=np.zeros(num_classes,) # When using dropout we need to pass a dropout_param dictionary to each
# dropout layer so that the layer knows the dropout probability and the mode
# (train / test). You can pass the same dropout_param to each dropout layer.
self.dropout_param = {}
if self.use_dropout:
self.dropout_param = {'mode': 'train', 'p': dropout}
if seed is not None:
self.dropout_param['seed'] = seed # With batch normalization we need to keep track of running means and
# variances, so we need to pass a special bn_param object to each batch
# normalization layer. You should pass self.bn_params[0] to the forward pass
# of the first batch normalization layer, self.bn_params[1] to the forward
# pass of the second batch normalization layer, etc.
self.bn_params = []
if self.use_batchnorm:
self.bn_params = [{'mode': 'train'} for i in xrange(self.num_layers)]
#print len(self.bn_params) # Cast all parameters to the correct datatype
for k, v in self.params.iteritems():
self.params[k] = v.astype(dtype) def loss(self, X, y=None):
"""
Compute loss and gradient for the fully-connected net. Input / output: Same as TwoLayerNet above.
"""
X = X.astype(self.dtype)
mode = 'test' if y is None else 'train' # Set train/test mode for batchnorm params and dropout param since they
# behave differently during training and testing.
if self.dropout_param is not None:
self.dropout_param['mode'] = mode if self.use_batchnorm:
for bn_param in self.bn_params:
bn_param[mode] = mode scores = None
cache={}
num_layers=self.num_layers
next=X
for i in xrange(num_layers): next,cache['cache%d'%(i+1)]=affine_forward(next,self.params['W%d'%(i+1)],self.params['b%d'%(i+1)]) if self.use_batchnorm: next,cache['cachebn%d'%(i+1)]=batchnorm_forward(next,self.params['gamma%d'%(i+1)],self.params['beta%d'%(i+1)],self.bn_params[i]) next,cache['cacher%d'%(i+1)]=relu_forward(next)
if self.use_dropout:
next,cache['cached%d'%(i+1)]=dropout_forward(next,self.dropout_param) scores,cache['cache%d'%(num_layers+1)]=affine_forward(next,self.params['W%d'%(num_layers+1)],self.params['b%d'%(num_layers+1)]) # If test mode return early if mode == 'test':
return scores loss, grads = 0.0, {}
loss,dscores=softmax_loss(scores,y)
for i in xrange(num_layers+1):
loss+=np.sum(self.params['W%d'%(i+1)]**2)*0.5*self.reg
dout=dscores dout,grads['W%d'%(num_layers+1)],grads['b%d'%(num_layers+1)]=affine_backward(dout,cache['cache%d'%(num_layers+1)]) grads['W%d'%(num_layers+1)]+=self.params['W%d'%(num_layers+1)]*self.reg for i in xrange(num_layers):
i=num_layers-i
if self.use_dropout:
dout=dropout_backward(dout,cache['cached%d'%i])
dout=relu_backward(dout,cache['cacher%d'%i])
if self.use_batchnorm:
#print i
dout,grads['gamma%d'%i],grads['beta%d'%i]=batchnorm_backward_alt(dout,cache['cachebn%d'%i]) dout,grads['W%d'%i],grads['b%d'%i]=affine_backward(dout,cache['cache%d'%i])
# print "W%d s is "%(i)+str(grads['W%d'%i].shape) grads['W%d'%(i)]+=self.params['W%d'%(i)]*self.reg return loss, grads
n
fc_net.py cs231n的更多相关文章
- cnn.py cs231n
n import numpy as np from cs231n.layers import * from cs231n.fast_layers import * from cs231n.layer_ ...
- layers.py cs231n
如果有错误,欢迎指出,不胜感激. import numpy as np def affine_forward(x, w, b): 第一个最简单的 affine_forward简单的前向传递,返回 ou ...
- optim.py cs231n
n如果有错误,欢迎指出,不胜感激 import numpy as np """ This file implements various first-order upda ...
- 深度学习原理与框架-神经网络-cifar10分类(代码) 1.np.concatenate(进行数据串接) 2.np.hstack(将数据横着排列) 3.hasattr(判断.py文件的函数是否存在) 4.reshape(维度重构) 5.tanspose(维度位置变化) 6.pickle.load(f文件读入) 7.np.argmax(获得最大值索引) 8.np.maximum(阈值比较)
横1. np.concatenate(list, axis=0) 将数据进行串接,这里主要是可以将列表进行x轴获得y轴的串接 参数说明:list表示需要串接的列表,axis=0,表示从上到下进行串接 ...
- 『cs231n』通过代码理解风格迁移
『cs231n』卷积神经网络的可视化应用 文件目录 vgg16.py import os import numpy as np import tensorflow as tf from downloa ...
- 转:深度学习斯坦福cs231n 课程笔记
http://blog.csdn.net/dinosoft/article/details/51813615 前言 对于深度学习,新手我推荐先看UFLDL,不做assignment的话,一两个晚上就可 ...
- 笔记:CS231n+assignment2(作业二)(一)
第二个作业难度很高,但做(抄)完之后收获还是很大的.... 一.Fully-Connected Neural Nets 首先是对之前的神经网络的程序进行重构,目的是可以构建任意大小的全连接的neura ...
- CS231n 2016 通关 第五、六章 Fully-Connected Neural Nets 作业
要求:实现任意层数的NN. 每一层结构包含: 1.前向传播和反向传播函数:2.每一层计算的相关数值 cell 1 依旧是显示的初始设置 # As usual, a bit of setup impor ...
- CS231n 2016 通关 第四章-NN 作业
cell 1 显示设置初始化 # A bit of setup import numpy as np import matplotlib.pyplot as plt from cs231n.class ...
随机推荐
- Install- Linux必学的60个命令
1.作用 install命令的作用是安装或升级软件或备份数据,它的使用权限是所有用户. 2.格式 (1)install [选项]... 来源 目的地 (2)install [选项]... 来源... ...
- 如何在终端编译C++代码
C++语言从编写-->执行整个过程.一般来讲,开发一个C++程序需要经过以下几步1. 编写代码,2. 编译器进行编译,compile 生成.o的可执行的二进制目标文件3. 连接器进行连接. ...
- 玩转xargs
xargs命令可以把标准输入数据转换成命令行参数.也可以将单行或多行文本输入转换成其他格式,是构建单行命令的重要组件之一. xargs命令应该紧跟在管道操作符之后,以stdin作为主要的源数据流,使用 ...
- [JZOJ5229]【GDOI2018模拟7.14】小奇的糖果
题目 题目大意 在一个二维的平面上,有一堆有颜色的点,你需要找出一条水平线段,使得这个线段上面(或者是下面)的点的颜色不包含所有的颜色.问点数最大是多少. 思考历程 在一开始,我看错了题目大意. 题目 ...
- 廖雪峰Java10加密与安全-4加密算法-4密钥交换算法
1DH算法 1.1.原根公式:g^i mod P 条件:1<g<P,0<i<P 原根:介于[1, p-1]之间的任意2个数i,j(p为素数,i≠j)的结果不相等,即 g^i m ...
- 方法的重写(override)两同两小一大原则:
方法名相同,参数类型相同 子类返回类型小于等于父类方法返回类型, 子类抛出异常小于等于父类方法抛出异常, 子类访问权限大于等于父类方法访问权限.
- SQLServer-SQLServer2017:SQLServer2017
ylbtech-SQLServer-SQLServer2017:SQLServer2017 微软推出了首个公共预览版本,并持续带来更新和改进.而今天,微软同时向 Windows.Linux.macOS ...
- Some vulnerabilities in JEECMSV9
转载:https://blog.csdn.net/weixin_44063566/article/details/88897406 之前遇到了一个JEECMS大概看了一下, 测试版本JEECMSV9. ...
- 关于Cesium 官方教程
最近一直在准备第一次QQ群的Cesium基础培训公开课,虽说使用Cesium也有段日子了,但是要说对Cesium了解有多深,还真不一定.原因是一直以来我都是用哪里学哪里.基于多年开发三维数字地球的底层 ...
- Python学习之高阶函数--嵌套函数、函数装饰器、含参函数装饰器
玩了一晚上王者,突然觉得该学习,然后大晚上的搞出来这道练习题,凌晨一点写博客(之所以这么晚就赶忙写是因为怕第二天看自己程序都忘了咋写的了),我太难了o(╥﹏╥)o 言归正传,练习题要求:构造类似京东的 ...