n如果有错误,欢迎指出,不胜感激

import numpy as np

from cs231n.layers import *
from cs231n.layer_utils import * class TwoLayerNet(object):
"""
A two-layer fully-connected neural network with ReLU nonlinearity and
softmax loss that uses a modular layer design. We assume an input dimension
of D, a hidden dimension of H, and perform classification over C classes. The architecure should be affine - relu - affine - softmax. Note that this class does not implement gradient descent; instead, it
will interact with a separate Solver object that is responsible for running
optimization. The learnable parameters of the model are stored in the dictionary
self.params that maps parameter names to numpy arrays.
""" def __init__(self, input_dim=3*32*32, hidden_dim=100, num_classes=10,
weight_scale=1e-3, reg=0.0):
"""
Initialize a new network. Inputs:
- input_dim: An integer giving the size of the input
- hidden_dim: An integer giving the size of the hidden layer
- num_classes: An integer giving the number of classes to classify
- dropout: Scalar between 0 and 1 giving dropout strength.
- weight_scale: Scalar giving the standard deviation for random
initialization of the weights.
- reg: Scalar giving L2 regularization strength.
"""
self.params = {}
self.reg = reg
self.params['W1']=np.random.randn(input_dim,hidden_dim)*weight_scale
self.params['b1']=np.zeros((hidden_dim,))
self.params['W2']=np.random.randn(hidden_dim,num_classes)*weight_scale
self.params['b2']=np.zeros((num_classes,)) def loss(self, X, y=None):
"""
Compute loss and gradient for a minibatch of data. Inputs:
- X: Array of input data of shape (N, d_1, ..., d_k)
- y: Array of labels, of shape (N,). y[i] gives the label for X[i]. Returns:
If y is None, then run a test-time forward pass of the model and return:
- scores: Array of shape (N, C) giving classification scores, where
scores[i, c] is the classification score for X[i] and class c. If y is not None, then run a training-time forward and backward pass and
return a tuple of:
- loss: Scalar value giving the loss
- grads: Dictionary with the same keys as self.params, mapping parameter
names to gradients of the loss with respect to those parameters.
"""
scores = None
out1,cache1=affine_relu_forward(X,self.params['W1'],self.params['b1'])
out2,cache2=affine_forward(out1,self.params['W2'],self.params['b2'])
if y is None :
return out2 loss,dx=softmax_loss(out2,y)
loss+=0.5*self.reg*( np.sum(self.params['W2']*self.params['W2'])+np.sum(self.params['W1']*self.params['W1']) )
grads={}
dout1,grads['W2'],grads['b2']=affine_backward(dx,cache2)
true_dx,grads['W1'],grads['b1']=affine_relu_backward(dout1,cache1) grads['W2']+=self.params['W2']*self.reg
grads['W1']+=self.params['W1']*self.reg
#grads['b2']+=self.params['b2']*self.reg
#grads['b1']+=self.params['b1']*self.reg # If y is None then we are in test mode so just return scores
# if y is None:
# return scores # loss, grads = 0, {} return loss, grads class FullyConnectedNet(object):
"""
A fully-connected neural network with an arbitrary number of hidden layers,
ReLU nonlinearities, and a softmax loss function. This will also implement
dropout and batch normalization as options. For a network with L layers,
the architecture will be {affine - [batch norm] - relu - [dropout]} x (L - 1) - affine - softmax where batch normalization and dropout are optional, and the {...} block is
repeated L - 1 times. Similar to the TwoLayerNet above, learnable parameters are stored in the
self.params dictionary and will be learned using the Solver class.
""" def __init__(self, hidden_dims=[100], input_dim=3*32*32, num_classes=10,
dropout=0, use_batchnorm=False, reg=0.0,
weight_scale=1e-2, dtype=np.float32, seed=None):
"""
Initialize a new FullyConnectedNet. Inputs:
- hidden_dims: A list of integers giving the size of each hidden layer.
- input_dim: An integer giving the size of the input.
- num_classes: An integer giving the number of classes to classify.
- dropout: Scalar between 0 and 1 giving dropout strength. If dropout=0 then
the network should not use dropout at all.
- use_batchnorm: Whether or not the network should use batch normalization.
- reg: Scalar giving L2 regularization strength.
- weight_scale: Scalar giving the standard deviation for random
initialization of the weights.
- dtype: A numpy datatype object; all computations will be performed using
this datatype. float32 is faster but less accurate, so you should use
float64 for numeric gradient checking.
- seed: If not None, then pass this random seed to the dropout layers. This
will make the dropout layers deteriminstic so we can gradient check the
model.
"""
self.use_batchnorm = use_batchnorm
self.use_dropout = dropout > 0
self.reg = reg
self.num_layers = 1 + len(hidden_dims)
self.dtype = dtype
self.params = {} self.num_layers=len(hidden_dims) num_layers=self.num_layers last_dims=input_dim
for i in xrange(num_layers):
self.params['W%d'%(i+1)]=np.random.randn(last_dims,hidden_dims[i])*weight_scale
self.params['b%d'%(i+1)]=np.zeros(hidden_dims[i],)
if self.use_batchnorm:
self.params['beta%d'%(i+1)]=np.zeros(hidden_dims[i],)
self.params['gamma%d'%(i+1)]=np.ones(hidden_dims[i],)
last_dims=hidden_dims[i] self.params['W%d'%(num_layers+1)]=np.random.randn(last_dims,num_classes)*weight_scale
self.params['b%d'%(num_layers+1)]=np.zeros(num_classes,) # When using dropout we need to pass a dropout_param dictionary to each
# dropout layer so that the layer knows the dropout probability and the mode
# (train / test). You can pass the same dropout_param to each dropout layer.
self.dropout_param = {}
if self.use_dropout:
self.dropout_param = {'mode': 'train', 'p': dropout}
if seed is not None:
self.dropout_param['seed'] = seed # With batch normalization we need to keep track of running means and
# variances, so we need to pass a special bn_param object to each batch
# normalization layer. You should pass self.bn_params[0] to the forward pass
# of the first batch normalization layer, self.bn_params[1] to the forward
# pass of the second batch normalization layer, etc.
self.bn_params = []
if self.use_batchnorm:
self.bn_params = [{'mode': 'train'} for i in xrange(self.num_layers)]
#print len(self.bn_params) # Cast all parameters to the correct datatype
for k, v in self.params.iteritems():
self.params[k] = v.astype(dtype) def loss(self, X, y=None):
"""
Compute loss and gradient for the fully-connected net. Input / output: Same as TwoLayerNet above.
"""
X = X.astype(self.dtype)
mode = 'test' if y is None else 'train' # Set train/test mode for batchnorm params and dropout param since they
# behave differently during training and testing.
if self.dropout_param is not None:
self.dropout_param['mode'] = mode if self.use_batchnorm:
for bn_param in self.bn_params:
bn_param[mode] = mode scores = None
cache={}
num_layers=self.num_layers
next=X
for i in xrange(num_layers): next,cache['cache%d'%(i+1)]=affine_forward(next,self.params['W%d'%(i+1)],self.params['b%d'%(i+1)]) if self.use_batchnorm: next,cache['cachebn%d'%(i+1)]=batchnorm_forward(next,self.params['gamma%d'%(i+1)],self.params['beta%d'%(i+1)],self.bn_params[i]) next,cache['cacher%d'%(i+1)]=relu_forward(next)
if self.use_dropout:
next,cache['cached%d'%(i+1)]=dropout_forward(next,self.dropout_param) scores,cache['cache%d'%(num_layers+1)]=affine_forward(next,self.params['W%d'%(num_layers+1)],self.params['b%d'%(num_layers+1)]) # If test mode return early if mode == 'test':
return scores loss, grads = 0.0, {}
loss,dscores=softmax_loss(scores,y)
for i in xrange(num_layers+1):
loss+=np.sum(self.params['W%d'%(i+1)]**2)*0.5*self.reg
dout=dscores dout,grads['W%d'%(num_layers+1)],grads['b%d'%(num_layers+1)]=affine_backward(dout,cache['cache%d'%(num_layers+1)]) grads['W%d'%(num_layers+1)]+=self.params['W%d'%(num_layers+1)]*self.reg for i in xrange(num_layers):
i=num_layers-i
if self.use_dropout:
dout=dropout_backward(dout,cache['cached%d'%i])
dout=relu_backward(dout,cache['cacher%d'%i])
if self.use_batchnorm:
#print i
dout,grads['gamma%d'%i],grads['beta%d'%i]=batchnorm_backward_alt(dout,cache['cachebn%d'%i]) dout,grads['W%d'%i],grads['b%d'%i]=affine_backward(dout,cache['cache%d'%i])
# print "W%d s is "%(i)+str(grads['W%d'%i].shape) grads['W%d'%(i)]+=self.params['W%d'%(i)]*self.reg return loss, grads

  

n

fc_net.py cs231n的更多相关文章

  1. cnn.py cs231n

    n import numpy as np from cs231n.layers import * from cs231n.fast_layers import * from cs231n.layer_ ...

  2. layers.py cs231n

    如果有错误,欢迎指出,不胜感激. import numpy as np def affine_forward(x, w, b): 第一个最简单的 affine_forward简单的前向传递,返回 ou ...

  3. optim.py cs231n

    n如果有错误,欢迎指出,不胜感激 import numpy as np """ This file implements various first-order upda ...

  4. 深度学习原理与框架-神经网络-cifar10分类(代码) 1.np.concatenate(进行数据串接) 2.np.hstack(将数据横着排列) 3.hasattr(判断.py文件的函数是否存在) 4.reshape(维度重构) 5.tanspose(维度位置变化) 6.pickle.load(f文件读入) 7.np.argmax(获得最大值索引) 8.np.maximum(阈值比较)

    横1. np.concatenate(list, axis=0) 将数据进行串接,这里主要是可以将列表进行x轴获得y轴的串接 参数说明:list表示需要串接的列表,axis=0,表示从上到下进行串接 ...

  5. 『cs231n』通过代码理解风格迁移

    『cs231n』卷积神经网络的可视化应用 文件目录 vgg16.py import os import numpy as np import tensorflow as tf from downloa ...

  6. 转:深度学习斯坦福cs231n 课程笔记

    http://blog.csdn.net/dinosoft/article/details/51813615 前言 对于深度学习,新手我推荐先看UFLDL,不做assignment的话,一两个晚上就可 ...

  7. 笔记:CS231n+assignment2(作业二)(一)

    第二个作业难度很高,但做(抄)完之后收获还是很大的.... 一.Fully-Connected Neural Nets 首先是对之前的神经网络的程序进行重构,目的是可以构建任意大小的全连接的neura ...

  8. CS231n 2016 通关 第五、六章 Fully-Connected Neural Nets 作业

    要求:实现任意层数的NN. 每一层结构包含: 1.前向传播和反向传播函数:2.每一层计算的相关数值 cell 1 依旧是显示的初始设置 # As usual, a bit of setup impor ...

  9. CS231n 2016 通关 第四章-NN 作业

    cell 1 显示设置初始化 # A bit of setup import numpy as np import matplotlib.pyplot as plt from cs231n.class ...

随机推荐

  1. elasticsearch 中文API 删除(四)

    删除API 删除api允许你通过id,从特定的索引中删除类型化的JSON文档.如下例: DeleteResponse response = client.prepareDelete("twi ...

  2. for update行级锁的作用

    1.for update叫排它锁,是一种行级锁,一旦用户对某个行施加了行级加锁,则该用户可以查询也可以更新被加锁的数据行,其它用户只能查询但不能更新被加锁的数据行.如果其它用户想更新该表中的数据行,则 ...

  3. 2018-10-19-C#-序列类为-xml-可以使用的特性大全

    title author date CreateTime categories C# 序列类为 xml 可以使用的特性大全 lindexi 2018-10-19 9:9:47 +0800 2018-6 ...

  4. Python ——tempfile

    主要有以下几个函数: tempfile.TemporaryFile 如何你的应用程序需要一个临时文件来存储数据,但不需要同其他程序共享,那么用TemporaryFile函数创建临时文件是最好的选择.其 ...

  5. PAT甲级——A1046 Shortest Distance

    The task is really simple: given N exits on a highway which forms a simple cycle, you are supposed t ...

  6. 稀疏表示step by step(转)

    原文地址:稀疏表示step by step(转)作者:野火春风 稀疏表示step by step(1)     声明:本人属于绝对的新手,刚刚接触“稀疏表示”这个领域.之所以写下以下的若干个连载,是鼓 ...

  7. 数据库连接池 - (druid、c3p0、dbcp)

    概述: 在这里所谓的数据库连接是指通过网络协议与数据库服务之间建立的TCP连接.通常,与数据库服务进行通信的网络协议无需由应用程序本身实现. 原因有三: 实现复杂度大,需要充分理解和掌握相应的通信协议 ...

  8. 855. Exam Room

    维护一个数据结构要满足:一个教室线性排列的座位 0 ... N-1 调用seat 入座一个距离最近学生最远的座位 调用leave x 离座一个位置为x的学生 由于N最多是 10e9 所以选择维护 学生 ...

  9. 只要三步!阿里云DLA帮你处理海量JSON数据

    概述 您可能有大量应用程序产生的JSON数据,您可能需要对这些JSON数据进行整理,去除不想要的字段,或者只保留想要的字段,或者仅仅是进行数据查询. 那么,利用阿里云Data Lake Analyti ...

  10. bootstrap--栅格系统布局

    栅格布局使用例子: <!DOCTYPE html> <html lang="zh-CN"> <head> <meta charset=&q ...