吴恩达--神经网络-week1-hw4
# Ref: https://blog.csdn.net/u013733326/article/details/79767169
import numpy as np
import testCases
import h5py
import matplotlib.pyplot as plt
from dnn_utils import sigmoid, sigmoid_backward, relu, relu_backward
import lr_utils
import traceback
np.random.seed(1)
# 2 layers
def initial_parameters(n_x = 4, n_h = 2, n_y =1):
"""
2 lays of nn, L=2, #(hidden layer) = 1
:param n_x: dims of input layer
:param n_h: dims of hidden layer
:param n_y: dims of output layer
:return: the dict type of params, including W1, b1, W2, b2
"""
W1 = np.random.randn(n_h, n_x) * 0.01
b1 = np.zeros((n_h, 1))
W2 = np.random.randn(n_y, n_h)
b2 = np.zeros((n_y, 1))
assert (W1.shape[1] == n_x)
assert (b2.shape == (n_y, 1))
params = {
'W1': W1,
'b1': b1,
'W2': W2,
'b2': b2,
}
return params
# initialize the params of deep nn
def initial_params_deep(layers_dims):
"""
initialize the parameters of deep nn
:param layers_dims: input layer, hidden layers, output layer
:return: initial parameters
"""
np.random.seed(3)
params = {}
L = len(layers_dims)
for l in range(1, L):
params['W' + str(l)] = np.random.randn(layers_dims[l], layers_dims[l-1]) / np.sqrt(layers_dims[l-1])
params['b' + str(l)] = np.zeros((layers_dims[l], 1))
assert (params['W' + str(l)].shape == (layers_dims[l], layers_dims[l-1]))
return params
def linear_forward(A, W, b):
"""
linear forward of nn
:param A: the acitivated value of the last layer (l-1)-th
:param W: the weight of matrix; W[l]
:param b: the bias, b[l]
:return: Z, cache including A, W and b in order to calculate the backward
"""
Z = np.dot(W, A) + b
assert (Z.shape == (W.shape[0], A.shape[1]))
cache = (A, W, b)
return Z, cache
def linear_activation_forward(A_pre, W, b, activation):
"""
calculate the activation values,
:param A_pre: the activation of last layer
:param W: W[l]
:param b: b[l]
:param activation: the status of activation: Sigmoid or Relu
:return: the activation of the l-th layer & the cache including
the liner caches and the activation caches
"""
Z, linear_cache = linear_forward(A_pre, W, b)
if activation == 'Sigmoid':
A, activ_cache = sigmoid(Z)
elif activation == 'Relu':
A, activ_cache = relu(Z)
assert (Z.shape == (W.shape[0], A_pre.shape[1]))
cache = (linear_cache, activ_cache)
return A, cache
# Multi layers
def L_layers_forward(X, params):
"""
calculate the cache and the activation the forward propagation
:param X: a[0], input feature
:param params: parameters including the L layers
:return: the L-th layer activation and the cache of L layers
"""
caches = []
A = X
L = len(params) // 2
for i in range(1, L):
A_prev = A
A, cache = linear_activation_forward(A_prev, params['W' + str(i)], params['b' + str(i)], activation='Relu')
caches.append(cache)
A_L, cache = linear_activation_forward(A, params['W' + str(L)], params['b' + str(L)], activation='Sigmoid')
caches.append(cache)
assert (A_L.shape == (1, X.shape[1]))
return A_L, caches
def computer_cost(A_L, Y):
"""
calcuclate the cost function: m is the number of samples
:param A_L: the activation of L-th layer
:param Y: 0 or 1, 0: not cat; 1: cat
:return: the total cost
"""
m = Y.shape[1]
cost = -np.sum(np.multiply(np.log(A_L), Y) + np.multiply(np.log(1-A_L), 1-Y)) / m
cost = np.squeeze(cost)
assert (cost.shape == ())
return cost
def linear_backward(dZ, cache):
"""
linear backward of nn, input the dZ, the cache including the A_pre, W, b
:param dZ: the gradient in the l-th layer
:param cache: the cache in the l-th layer, which including the A_pre, W, b
A_pre: the activation in last layer, i.e.: (l-1)th layer
W: the weight matrix in the l-th layer, b: the bias in the l-th layer
:return: the gradient of W, b, A_prev
"""
A_prev, W, b = cache
m = A_prev.shape[1]
dW = np.dot(dZ, A_prev.T) / m
db = np.sum(dZ, axis=1, keepdims=True) / m
dA_prev = np.dot(W.T, dZ)
assert (dA_prev.shape == A_prev.shape)
return dA_prev, dW, db
def linear_activation_backward(dA, cache, activation):
"""
calculate the dW, db, dA_prev, input dA, cache, the status of activation function
:param dA: the gradient of l-th layer
:param cache: including the linear caches and the activation caches in the l-th layer
:param activation: the status of activation function: Relu or Sigmoid
:return: dW, db, dA_prev that is (l-1)-th layer
"""
linear_cache, activation_cache = cache
if activation == "Relu":
dZ = relu_backward(dA, activation_cache)
dA_prev, dW, db = linear_backward(dZ, linear_cache)
elif activation == "Sigmoid":
dZ = sigmoid_backward(dA, activation_cache)
dA_prev, dW, db = linear_backward(dZ, linear_cache)
return dA_prev, dW, db,
# L layers backward propagation
def L_layers_backward(A_L, Y, caches):
grads = {}
L = len(caches)
m = Y.shape[1]
Y = Y.reshape(A_L.shape)
currrent_staus = caches[L-1]
dAL = - (np.divide(Y, A_L) - np.divide(1 - Y, 1 - A_L))
grads['dA' + str(L-1)], grads['dW' + str(L)], grads['db' + str(L)] = linear_activation_backward(dAL,
currrent_staus,
activation='Sigmoid')
for l in reversed(range(L-1)):
currrent_staus = caches[l]
dA_prev_temp, dW_temp, db_temp = \
linear_activation_backward(grads['dA' + str(l + 1)], currrent_staus, activation='Relu')
grads['dA' + str(l)] = dA_prev_temp
grads['dW' + str(l + 1)] = dW_temp
grads['db' + str(l + 1)] = db_temp
return grads
def update_params(params, grads, alpha=0.05):
"""
update the params
:param params: including the W, b
:param grads: including the dA[l-1] dW[l] d[l]
:param alpha: learning rate:0.05
:return: the updated params
"""
L = len(params)//2
for l in range(L):
params['W' + str(l+1)] = params['W' + str(l+1)] - alpha * grads['dW' + str(l+1)]
params['b' + str(l+1)] = params['b' + str(l+1)] - alpha * grads['db' + str(l+1)]
return params
def two_layers_nn(X, Y, layer_dims, alpha=0.075, num_iterations=10000, print_cost=False, is_plot=True):
"""
two layers nn model, the activation function of the first layer is the Relu; the activation function of
the second layer is the Sigmoid.
:param X: Inpute features
:param Y: label that 0 represents the not cat, 1 represents the cat
:param layer_dims: n_x, n_h, n_y: input layer, hidden layer, output layer
:param alpha: learning rate
:param num_iterations: the number of iterations
:param print_cost: default is False
:param is_plot: default is True
:return: the updated params
"""
np.random.seed(1)
costs = []
grads = {}
(n_x, n_h, n_y) = layer_dims
## initialize theparamters
params = initial_parameters(n_x, n_h, n_y)
W1 = params['W1']
b1 = params['b1']
W2 = params['W2']
b2 = params['b2']
try:
plt.figure()
for i in range(num_iterations):
# forward propagation
A1, cache1 = linear_activation_forward(X, W1, b1, activation='Relu')
A2, cache2 = linear_activation_forward(A1, W2, b2, activation='Sigmoid')
# calculate the cost
cost = computer_cost(A2, Y)
# backward propagation
dA2 = -(np.divide(Y, A2) - np.divide(1 - Y, 1 - A2))
dA1, dW2, db2 = linear_activation_backward(dA2, cache2, 'Sigmoid')
dA0, dW1, db1 = linear_activation_backward(dA1, cache1, 'Relu')
# gradient
grads["dW1"] = dW1
grads["db1"] = db1
grads["dW2"] = dW2
grads["db2"] = db2
# update
params = update_params(params, grads, alpha)
W1 = params['W1']
b1 = params['b1']
W2 = params['W2']
b2 = params['b2']
if i % 100 == 0:
costs.append(cost)
if print_cost:
print(f"第{i}次迭代的成本是: ", np.squeeze(cost))
if is_plot:
plt.plot(np.squeeze(costs))
plt.title(f'Learning rate is {alpha}')
plt.xlabel('iterations per tens')
plt.ylabel('cost')
plt.show()
return params
except:
print(traceback.print_exc())
train_set_x_orig , train_set_y , test_set_x_orig , test_set_y , classes = lr_utils.load_dataset()
train_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T
test_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T
train_x = train_x_flatten / 255
train_y = train_set_y
test_x = test_x_flatten / 255
test_y = test_set_y
n_x = 12288
n_h = 7
n_y = 1
layers_dims = (n_x,n_h,n_y)
print('==================two layers======================')
parameters = two_layers_nn(train_x, train_set_y, layer_dims=(n_x, n_h, n_y), alpha=0.0075, num_iterations=1000, print_cost=True, is_plot=True)
def L_layers_nn(X, Y, layers_dims, alpha = 0.075, num_iterations = 1000, print_cost=False, is_plot=True):
"""
L layers nn model, layers_dims is the relations among the layers, the (L-1) layers' activation function is Relu, the L-th
layer's activation function is the Sigmoid.
:param X: input features
:param Y: the label: 0(not cat); 1(cat)
:param layers_dims: input layer, hidden layer(>2), output layer
:param alpha: learning rate
:param num_iterations: iterations, default is 1000
:param print_cost: default is False
:param is_plot: default is True
:return: the updated parameters that is optimal value during the iterations
"""
np.random.seed(1)
costs = []
params = initial_params_deep(layers_dims)
plt.figure()
for i in range(0, num_iterations):
A_L, caches = L_layers_forward(X, params)
cost = computer_cost(A_L, Y)
grads = L_layers_backward(A_L, Y, caches)
params = update_params(params, grads, alpha)
if i % 100 == 0:
costs.append(cost)
if print_cost:
print(f'第{i}次迭代成本是: %.3f' % cost)
if is_plot:
plt.plot(np.squeeze(costs))
plt.title(f'Learning rate is {alpha}')
plt.ylabel('cost')
plt.xlabel('Iterations per 100')
plt.show()
return params
print('=====================多层测试============================')
layers_dims = [12288, 20, 7, 5, 1] # 5-layer model
parameters = L_layers_nn(train_x, train_y, layers_dims, alpha=0.0075,
num_iterations=500, print_cost=True, is_plot=True)
def predict_y(X, y, params):
"""
predict the y according to the params
:param X: Input features
:param y: 0: not cat; 1: cat
:param params: W[l] b[l]
:return: the predicted values
"""
m = X.shape[1]
n = len(params) // 2
predict = np.zeros((1, m))
probies, caches = L_layers_forward(X, params)
for i in range(0, probies.shape[0]):
if probies[0, i] > 0.5:
predict[0, i] = 1
else:
predict[0, i] = 0
accur = float(np.sum((predict == y))/m) * 100
print("准确率:%.3f " % accur + str('%'))
return predict
predictions_train = predict_y(train_x, train_y, parameters) #训练集
predictions_test = predict_y(test_x, test_y, parameters) #测试集
def print_mislabeled_image(classes, X, y, predict_y):
"""
this function figure the mislabeled image that is the cat
:param classes: ndarray: not cat; cat
:param X: input features
:param y: ture label: 0(not cat) or 1(cat)
:param predict_y: predict value
:return: mislabeled_indices and a figure that true value is cat, but the predict value
is not cat
"""
a = y + predict_y
mislabeled_indices = np.asarray(np.where(a == 1))
plt.rcParams['figure.figsize'] = (40, 60)
num_images = len(mislabeled_indices[0])
plt.figure()
for i in range(num_images):
index = mislabeled_indices[1][i]
plt.subplot(2, num_images, i + 1)
plt.imshow(X[:, index].reshape(64, 64, 3), interpolation='nearest')
plt.axis('off')
plt.title('Prediction' + classes[int(predict_y[0, index])].decode('utf-8') +
'\n Classes' + classes[(y[0, index])].decode('utf-8'))
plt.savefig('mislabeled_image.png', dpi=500)
return mislabeled_indices
mislabeled_indices = print_mislabeled_image(classes, test_x, test_y, predictions_test)
if __name__ == '__main__':
pass
吴恩达--神经网络-week1-hw4的更多相关文章
- coursera-斯坦福-机器学习-吴恩达-笔记week1
1 Introduction 1.1 概念:一个程序被认为能从经验E中学习,解决任务 T,达到性能度量值P,当且仅当, 有了经验E后,经过P评判, 程序在处理 T 时的性能有所提升. 1.2 机器学习 ...
- 吴恩达-神经网络-week1-hw3
Ref:https://blog.csdn.net/u013733326/article/details/79702148 点击查看代码 from testCases import * from pl ...
- 吴恩达深度学习第1课第4周-任意层人工神经网络(Artificial Neural Network,即ANN)(向量化)手写推导过程(我觉得已经很详细了)
学习了吴恩达老师深度学习工程师第一门课,受益匪浅,尤其是吴老师所用的符号系统,准确且易区分. 遵循吴老师的符号系统,我对任意层神经网络模型进行了详细的推导,形成笔记. 有人说推导任意层MLP很容易,我 ...
- 【吴恩达课后测验】Course 1 - 神经网络和深度学习 - 第二周测验【中英】
[中英][吴恩达课后测验]Course 1 - 神经网络和深度学习 - 第二周测验 第2周测验 - 神经网络基础 神经元节点计算什么? [ ]神经元节点先计算激活函数,再计算线性函数(z = Wx + ...
- 【吴恩达课后测验】Course 1 - 神经网络和深度学习 - 第一周测验【中英】
[吴恩达课后测验]Course 1 - 神经网络和深度学习 - 第一周测验[中英] 第一周测验 - 深度学习简介 和“AI是新电力”相类似的说法是什么? [ ]AI为我们的家庭和办公室的个人设备供电 ...
- Python3 反向传播神经网络-Min-Batch(根据吴恩达课程讲解编写)
# -*- coding: utf-8 -*- """ Created on Sat Jan 20 13:47:54 2018 @author: markli " ...
- 吴恩达deepLearning.ai循环神经网络RNN学习笔记_看图就懂了!!!(理论篇)
前言 目录: RNN提出的背景 - 一个问题 - 为什么不用标准神经网络 - RNN模型怎么解决这个问题 - RNN模型适用的数据特征 - RNN几种类型 RNN模型结构 - RNN block - ...
- 吴恩达deepLearning.ai循环神经网络RNN学习笔记_没有复杂数学公式,看图就懂了!!!(理论篇)
本篇文章被Google中国社区组织人转发,评价: 条理清晰,写的很详细! 被阿里算法工程师点在看! 所以很值得一看! 前言 目录: RNN提出的背景 - 一个问题 - 为什么不用标准神经网络 - RN ...
- 用纯Python实现循环神经网络RNN向前传播过程(吴恩达DeepLearning.ai作业)
Google TensorFlow程序员点赞的文章! 前言 目录: - 向量表示以及它的维度 - rnn cell - rnn 向前传播 重点关注: - 如何把数据向量化的,它们的维度是怎么来的 ...
随机推荐
- 深入浅出Mybatis系列(九)---缓存
MyBatis系统中默认定义了两级缓存:一级缓存和二级缓存. 1.默认情况下,只有一级缓存(SqlSession级别的缓存,也称为本地缓存)开启. 2.二级缓存需要手动开启和配置,他是基于namesp ...
- linux(3)--------SSH工具的安装使用
0.一般安装服务端的Linux ssh是默认安装的可以运行ssh localhost测试一下是否可以链接 1.SSH是什么 1)ssh:Secure Shell 安全外壳协议 2)建立在应用层基础上 ...
- JDBC中级篇(MYSQL)——在JDBC中如何获得表中的,自增长的字段值
注意:其中的JdbcUtil是我自定义的连接工具类:代码例子链接: package c_increment; import java.sql.Connection; import java.sql.P ...
- 统计学习:线性可分支持向量机(SVM)
模型 超平面 我们称下面形式的集合为超平面 \[\begin{aligned} \{ \bm{x} | \bm{a}^{T} \bm{x} - b = 0 \} \end{aligned} \tag{ ...
- ES读写数据的工作原理
es写入数据的工作原理是什么啊?es查询数据的工作原理是什么?底层的lucence介绍一下呗?倒排索引了解吗? 一.es写数据过程 1.客户端选择一个node发送请求过去,这个node就是coordi ...
- 12-SpringCloud GateWay
GateWay和Zuul说明 Zuul开发人员窝里斗,实属明日黄花 重点关注Gate Way GateWay是什么 上一代zuul 1.x官网 Gateway官网 概述 Cloud全家桶中有个很重要的 ...
- Playwright-python 教程
安装 pip install playwright -i https://mirrors.aliyun.com/pypi/simple/ 使用阿里源,下载速度快一点. python -m playwr ...
- JavaScript高级程序设计读书笔记之JSON
JSON(JavaScript Object Notation)JavaScript对象表示法.JSON是JavaScript的一个严格的子集,利用了JavaScript中的一些模式来表示结构化数据. ...
- 代码保留格式(高亮)复制到Word(转载)
将代码保持高亮复制粘贴到word上,一些方法如下: 方法一:借助网站http://www.planetb.ca/syntax-highlight-word/(代码有编号,整体排版精美令人舒适,但语言有 ...
- SQL语句之基本使用
1.sql语法 一些重要的SQL命令: SELECT - 从数据库中提取数据 UPDATE - 更新数据库中的数据 DELETE - 从数据库中删除数据 INSERT INTO - 向数据库中插入新数 ...