吴恩达--神经网络-week1-hw4

# Ref: https://blog.csdn.net/u013733326/article/details/79767169

import numpy as np

import testCases

import h5py

import matplotlib.pyplot as plt

from dnn_utils import sigmoid, sigmoid_backward, relu, relu_backward

import lr_utils

import traceback

np.random.seed(1)

# 2 layers

def initial_parameters(n_x = 4, n_h = 2, n_y =1):

    """

    2 lays of nn, L=2, #(hidden layer) = 1

    :param n_x: dims of input layer

    :param n_h: dims of hidden layer

    :param n_y: dims of output layer

    :return: the dict type of params, including W1, b1, W2, b2

    """

    W1 = np.random.randn(n_h, n_x) * 0.01

    b1 = np.zeros((n_h, 1))

    W2 = np.random.randn(n_y, n_h)

    b2 = np.zeros((n_y, 1))

    assert (W1.shape[1] == n_x)

    assert (b2.shape == (n_y, 1))

    params = {

        'W1': W1,

        'b1': b1,

        'W2': W2,

        'b2': b2,

    }

    return params

# initialize the params of deep nn

def initial_params_deep(layers_dims):

    """

    initialize the parameters of deep nn

    :param layers_dims: input layer, hidden layers, output layer

    :return: initial parameters

    """

    np.random.seed(3)

    params = {}

    L = len(layers_dims)

    for l in range(1, L):

        params['W' + str(l)] = np.random.randn(layers_dims[l], layers_dims[l-1]) / np.sqrt(layers_dims[l-1])

        params['b' + str(l)] = np.zeros((layers_dims[l], 1))

        assert (params['W' + str(l)].shape == (layers_dims[l], layers_dims[l-1]))

    return params

def linear_forward(A, W, b):

    """

    linear forward of nn

    :param A: the acitivated value of the last layer (l-1)-th

    :param W: the weight of matrix; W[l]

    :param b: the bias, b[l]

    :return: Z, cache including A, W and b in order to calculate the backward

    """

    Z = np.dot(W, A) + b

    assert (Z.shape == (W.shape[0], A.shape[1]))

    cache = (A, W, b)

    return Z, cache

def linear_activation_forward(A_pre, W, b, activation):

    """

    calculate the activation values,

    :param A_pre: the activation of last layer

    :param W: W[l]

    :param b: b[l]

    :param activation: the status of activation: Sigmoid or Relu

    :return: the activation of the l-th layer & the cache including

    the liner caches and the activation caches

    """

    Z, linear_cache = linear_forward(A_pre, W, b)

    if activation == 'Sigmoid':

        A, activ_cache = sigmoid(Z)

    elif activation == 'Relu':

        A, activ_cache = relu(Z)

    assert (Z.shape == (W.shape[0], A_pre.shape[1]))

    cache = (linear_cache, activ_cache)

    return A, cache

# Multi layers

def L_layers_forward(X, params):

    """

    calculate  the cache and the activation the forward propagation

    :param X: a[0], input feature

    :param params: parameters including the L layers

    :return: the L-th layer activation and the cache of L layers

    """

    caches = []

    A = X

    L = len(params) // 2

    for i in range(1, L):

        A_prev = A

        A, cache = linear_activation_forward(A_prev, params['W' + str(i)], params['b' + str(i)], activation='Relu')

        caches.append(cache)

    A_L, cache = linear_activation_forward(A, params['W' + str(L)], params['b' + str(L)], activation='Sigmoid')

    caches.append(cache)

    assert (A_L.shape == (1, X.shape[1]))

    return A_L, caches

def computer_cost(A_L, Y):

    """

    calcuclate the cost function: m is the number of samples

    :param A_L: the activation of L-th layer

    :param Y: 0 or 1, 0: not cat; 1: cat

    :return: the total cost

    """

    m = Y.shape[1]

    cost = -np.sum(np.multiply(np.log(A_L), Y) + np.multiply(np.log(1-A_L), 1-Y)) / m

    cost = np.squeeze(cost)

    assert (cost.shape == ())

    return cost

def linear_backward(dZ, cache):

    """

    linear backward of nn, input the dZ, the cache including the A_pre, W, b

    :param dZ: the gradient in the l-th layer

    :param cache: the cache in the l-th layer, which including the A_pre, W, b

    A_pre: the activation in last layer, i.e.: (l-1)th layer

    W: the weight matrix in the l-th layer, b: the bias in the l-th layer

    :return: the gradient of W, b, A_prev

    """

    A_prev, W, b = cache

    m = A_prev.shape[1]

    dW = np.dot(dZ, A_prev.T) / m

    db = np.sum(dZ, axis=1, keepdims=True) / m

    dA_prev = np.dot(W.T, dZ)

    assert (dA_prev.shape == A_prev.shape)

    return dA_prev, dW, db

def linear_activation_backward(dA, cache, activation):

    """

    calculate the dW, db, dA_prev, input dA, cache, the status of activation function

    :param dA: the gradient of l-th layer

    :param cache: including the linear caches and the activation caches in the l-th layer

    :param activation: the status of activation function: Relu or Sigmoid

    :return: dW, db, dA_prev that is (l-1)-th layer

    """

    linear_cache, activation_cache = cache

    if activation == "Relu":

        dZ = relu_backward(dA, activation_cache)

        dA_prev, dW, db = linear_backward(dZ, linear_cache)

    elif activation == "Sigmoid":

        dZ = sigmoid_backward(dA, activation_cache)

        dA_prev, dW, db = linear_backward(dZ, linear_cache)

    return dA_prev, dW, db,

# L layers backward propagation

def L_layers_backward(A_L, Y, caches):

    grads = {}

    L = len(caches)

    m = Y.shape[1]

    Y = Y.reshape(A_L.shape)

    currrent_staus = caches[L-1]

    dAL = - (np.divide(Y, A_L) - np.divide(1 - Y, 1 - A_L))

    grads['dA' + str(L-1)], grads['dW' + str(L)], grads['db' + str(L)] = linear_activation_backward(dAL,

                                                                                                      currrent_staus,

                                                                                                      activation='Sigmoid')

    for l in reversed(range(L-1)):

        currrent_staus = caches[l]

        dA_prev_temp, dW_temp, db_temp = \

            linear_activation_backward(grads['dA' + str(l + 1)], currrent_staus, activation='Relu')

        grads['dA' + str(l)] = dA_prev_temp

        grads['dW' + str(l + 1)] = dW_temp

        grads['db' + str(l + 1)] = db_temp

    return grads

def update_params(params, grads, alpha=0.05):

    """

    update the params

    :param params: including the W, b

    :param grads: including the dA[l-1] dW[l] d[l]

    :param alpha: learning rate:0.05

    :return: the updated params

    """

    L = len(params)//2

    for l in range(L):

        params['W' + str(l+1)] = params['W' + str(l+1)] - alpha * grads['dW' + str(l+1)]

        params['b' + str(l+1)] = params['b' + str(l+1)] - alpha * grads['db' + str(l+1)]

    return params

def two_layers_nn(X, Y, layer_dims, alpha=0.075, num_iterations=10000, print_cost=False, is_plot=True):

    """

    two layers nn model, the activation function of the first layer is the Relu; the activation function of

    the second layer is the Sigmoid.

    :param X: Inpute features

    :param Y: label that 0 represents the not cat, 1 represents the cat

    :param layer_dims: n_x, n_h, n_y: input layer, hidden layer, output layer

    :param alpha: learning rate

    :param num_iterations: the number of iterations

    :param print_cost: default is False

    :param is_plot: default is True

    :return: the updated params

    """

    np.random.seed(1)

    costs = []

    grads = {}

    (n_x, n_h, n_y) = layer_dims

    ## initialize theparamters

    params = initial_parameters(n_x, n_h, n_y)

    W1 = params['W1']

    b1 = params['b1']

    W2 = params['W2']

    b2 = params['b2']

    try:

        plt.figure()

        for i in range(num_iterations):

            # forward propagation

            A1, cache1 = linear_activation_forward(X, W1, b1, activation='Relu')

            A2, cache2 = linear_activation_forward(A1, W2, b2, activation='Sigmoid')

            # calculate the cost

            cost = computer_cost(A2, Y)

            # backward propagation

            dA2 = -(np.divide(Y, A2) - np.divide(1 - Y, 1 - A2))

            dA1, dW2, db2 = linear_activation_backward(dA2, cache2, 'Sigmoid')

            dA0, dW1, db1 = linear_activation_backward(dA1, cache1, 'Relu')

            # gradient

            grads["dW1"] = dW1

            grads["db1"] = db1

            grads["dW2"] = dW2

            grads["db2"] = db2

            # update

            params = update_params(params, grads, alpha)

            W1 = params['W1']

            b1 = params['b1']

            W2 = params['W2']

            b2 = params['b2']

            if i % 100 == 0:

                costs.append(cost)

                if print_cost:

                    print(f"第{i}次迭代的成本是: ", np.squeeze(cost))

            if is_plot:

                plt.plot(np.squeeze(costs))

                plt.title(f'Learning rate is {alpha}')

                plt.xlabel('iterations per tens')

                plt.ylabel('cost')

                plt.show()

        return params

    except:

        print(traceback.print_exc())

train_set_x_orig , train_set_y , test_set_x_orig , test_set_y , classes = lr_utils.load_dataset()

train_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T

test_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T

train_x = train_x_flatten / 255

train_y = train_set_y

test_x = test_x_flatten / 255

test_y = test_set_y

n_x = 12288

n_h = 7

n_y = 1

layers_dims = (n_x,n_h,n_y)

print('==================two layers======================')

parameters = two_layers_nn(train_x, train_set_y, layer_dims=(n_x, n_h, n_y), alpha=0.0075, num_iterations=1000, print_cost=True, is_plot=True)

def L_layers_nn(X, Y, layers_dims, alpha = 0.075, num_iterations = 1000, print_cost=False, is_plot=True):

    """

    L layers nn model, layers_dims is the relations among the layers, the (L-1) layers' activation function is Relu, the L-th

    layer's activation function is the Sigmoid.

    :param X: input features

    :param Y: the label: 0(not cat); 1(cat)

    :param layers_dims: input layer, hidden layer(>2), output layer

    :param alpha: learning rate

    :param num_iterations: iterations, default is 1000

    :param print_cost: default is False

    :param is_plot: default is True

    :return: the updated parameters that is optimal value during the iterations

    """

    np.random.seed(1)

    costs = []

    params = initial_params_deep(layers_dims)

    plt.figure()

    for i in range(0, num_iterations):

        A_L, caches = L_layers_forward(X, params)

        cost = computer_cost(A_L, Y)

        grads = L_layers_backward(A_L, Y, caches)

        params = update_params(params, grads, alpha)

        if i % 100 == 0:

            costs.append(cost)

            if print_cost:

                print(f'第{i}次迭代成本是: %.3f' % cost)

    if is_plot:

        plt.plot(np.squeeze(costs))

        plt.title(f'Learning rate is {alpha}')

        plt.ylabel('cost')

        plt.xlabel('Iterations per 100')

        plt.show()

    return params

print('=====================多层测试============================')

layers_dims = [12288, 20, 7, 5, 1]  # 5-layer model

parameters = L_layers_nn(train_x, train_y, layers_dims, alpha=0.0075,

                         num_iterations=500, print_cost=True, is_plot=True)

def predict_y(X, y, params):

    """

    predict the y according to the params

    :param X:  Input features

    :param y:  0: not cat; 1: cat

    :param params: W[l] b[l]

    :return: the predicted values

    """

    m = X.shape[1]

    n = len(params) // 2

    predict = np.zeros((1, m))

    probies, caches = L_layers_forward(X, params)

    for i in range(0, probies.shape[0]):

        if probies[0, i] > 0.5:

            predict[0, i] = 1

        else:

            predict[0, i] = 0

    accur = float(np.sum((predict == y))/m) * 100

    print("准确率：%.3f  " % accur + str('%'))

    return predict

predictions_train = predict_y(train_x, train_y, parameters) #训练集

predictions_test = predict_y(test_x, test_y, parameters) #测试集

def print_mislabeled_image(classes, X, y, predict_y):

    """

    this function figure the mislabeled image that is the cat

    :param classes: ndarray: not cat; cat

    :param X: input features

    :param y: ture label: 0(not cat) or 1(cat)

    :param predict_y: predict value

    :return: mislabeled_indices and a figure that true value is cat, but the predict value

    is not cat

    """

    a = y + predict_y

    mislabeled_indices = np.asarray(np.where(a == 1))

    plt.rcParams['figure.figsize'] = (40, 60)

    num_images = len(mislabeled_indices[0])

    plt.figure()

    for i in range(num_images):

        index = mislabeled_indices[1][i]

        plt.subplot(2, num_images, i + 1)

        plt.imshow(X[:, index].reshape(64, 64, 3), interpolation='nearest')

        plt.axis('off')

        plt.title('Prediction' + classes[int(predict_y[0, index])].decode('utf-8') +

                  '\n Classes' + classes[(y[0, index])].decode('utf-8'))

    plt.savefig('mislabeled_image.png', dpi=500)

    return mislabeled_indices

mislabeled_indices = print_mislabeled_image(classes, test_x, test_y, predictions_test)

if __name__ == '__main__':

    pass

吴恩达--神经网络-week1-hw4的更多相关文章

coursera-斯坦福-机器学习-吴恩达-笔记week1
1 Introduction 1.1 概念:一个程序被认为能从经验E中学习,解决任务 T,达到性能度量值P,当且仅当, 有了经验E后,经过P评判, 程序在处理 T 时的性能有所提升. 1.2 机器学习 ...
吴恩达-神经网络-week1-hw3
Ref:https://blog.csdn.net/u013733326/article/details/79702148 点击查看代码 from testCases import * from pl ...
吴恩达深度学习第1课第4周-任意层人工神经网络(Artificial Neural Network，即ANN)（向量化）手写推导过程（我觉得已经很详细了）
学习了吴恩达老师深度学习工程师第一门课,受益匪浅,尤其是吴老师所用的符号系统,准确且易区分．遵循吴老师的符号系统,我对任意层神经网络模型进行了详细的推导,形成笔记．有人说推导任意层MLP很容易,我 ...
【吴恩达课后测验】Course 1 - 神经网络和深度学习 - 第二周测验【中英】
[中英][吴恩达课后测验]Course 1 - 神经网络和深度学习 - 第二周测验第2周测验 - 神经网络基础神经元节点计算什么? [ ]神经元节点先计算激活函数,再计算线性函数(z = Wx + ...
【吴恩达课后测验】Course 1 - 神经网络和深度学习 - 第一周测验【中英】
[吴恩达课后测验]Course 1 - 神经网络和深度学习 - 第一周测验[中英] 第一周测验 - 深度学习简介和“AI是新电力”相类似的说法是什么? [ ]AI为我们的家庭和办公室的个人设备供电 ...
Python3 反向传播神经网络-Min-Batch(根据吴恩达课程讲解编写)
# -*- coding: utf-8 -*- """ Created on Sat Jan 20 13:47:54 2018 @author: markli " ...
吴恩达deepLearning.ai循环神经网络RNN学习笔记_看图就懂了！！！(理论篇)
前言目录: RNN提出的背景 - 一个问题 - 为什么不用标准神经网络 - RNN模型怎么解决这个问题 - RNN模型适用的数据特征 - RNN几种类型 RNN模型结构 - RNN block - ...
吴恩达deepLearning.ai循环神经网络RNN学习笔记_没有复杂数学公式，看图就懂了！！！(理论篇)
本篇文章被Google中国社区组织人转发,评价: 条理清晰,写的很详细! 被阿里算法工程师点在看! 所以很值得一看! 前言目录: RNN提出的背景 - 一个问题 - 为什么不用标准神经网络 - RN ...
用纯Python实现循环神经网络RNN向前传播过程(吴恩达DeepLearning.ai作业)
Google TensorFlow程序员点赞的文章! 前言目录: - 向量表示以及它的维度 - rnn cell - rnn 向前传播重点关注: - 如何把数据向量化的,它们的维度是怎么来的 ...

随机推荐

Git撤销&回滚操作(git reset 和 get revert)
转自:https://blog.csdn.net/asoar/article/details/84111841 git的工作流工作区:即自己当前分支所修改的代码,git add xx 之前的!不包括 ...
jQuery中的基本过滤选择器（四、三）：:first、:last、:not() ... ...
<!DOCTYPE html> <html> <head> <title>基本过滤选择器</title> <meta http-equ ...
工具库用久了，你还会原生操作 Cookie 吗？
用得好了,工具库和框架确实是一大助力,但就怕我们会因此习惯了走捷径,而忘了自己的根本依靠是什么. 前言前端技术的飞速发展,给从业人员不可避免地带来了"疲劳"感,我们常常会感叹学不 ...
痞子衡嵌入式：简析i.MXRT1170 XECC功能特点及其保护串行NOR Flash和SDRAM之道
大家好,我是痞子衡,是正经搞技术的痞子.今天痞子衡给大家分享的是i.MXRT1170 XECC功能特点及其保护串行NOR Flash和SDRAM之道. ECC 是 "Error Correc ...
Linux命令集锦之·字符截取命令
时间:2018-11-15 记录:byzqy 字符截取命令: cut.printf.awk.sed cut $ cut [选项] 文件名选项: -f 列号:提取第几列: -d 分隔符:按照指定分隔符 ...
Learning ROS: Packaging your ROS project as a snap
Snaps are containerised software packages that are simple to create and install. They auto-update an ...
linux centos7 重启后网络出现问题
2021-08-04 重启虚拟机后发现网络出了问题,输入 ip a 查看网络,出现以下情况查看配置文件 cat /etc/sysconfig/network-scripts/ifcfg-ens33 ...
在同一台计算机中运行多个MySQL服务
目录一.问题的来源二.配置 1. 修改原来MySQL系统的my.ini文件 2. 修改注册表 3. 重新启动服务 4. 最终效果一.问题的来源这个学期里我需要修读<数据库系统>的课 ...
痞子衡嵌入式：MCUXpresso IDE下工程链接文件配置管理与自动生成机制
大家好,我是痞子衡,是正经搞技术的痞子.今天痞子衡给大家介绍的是MCUXpresso IDE下工程链接文件配置管理与自动生成机制. 痞子衡在 2018 年初写过一个专题 <嵌入式开发文件系列&g ...
spring整合jdbc方法一
用了一段时间的spring这,闲来没事做一下spring整合jdbc 目录文件导入jar包由于spring的jar包是在myeclipse中自动导入的有些暂时用不到的也没有处理. Emp类 pac ...

吴恩达--神经网络-week1-hw4

吴恩达--神经网络-week1-hw4的更多相关文章

随机推荐

热门专题