Scikit-Learn 机器学习笔记 -- 线性回归、逻辑回归、softma

import numpy as np

from matplotlib import pyplot as plt

# 创建线性回归数据集

def create_dataset():

    X = 2 * np.random.rand(100, 1)

    # 结果加上高斯噪声

    y = 4 + 3*X + np.random.randn(100, 1)

    return X, y

# 线性回归解析法：使用正态方程求解，直接得到全局最优解

def linear_regression_analysis(X, y):

    # 特征向量为参数b添加值为1的特征

    X_b = np.c_[np.ones((100, 1)), X]

    # 用正态方程解得全局最优解

    theta_best = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)

    print("线性回归解析解为：", theta_best)

    # 预测

    sample = np.array([[0], [2]])

    sample_b = np.c_[np.ones((2, 1)), sample]

    predict = sample_b.dot(theta_best)

    # print('解析解方程预测为：', predict)

    # 绘制线性回归模型图像

    plt.plot(sample, predict, 'r-')

    plt.plot(X, y, 'b.')

    plt.axis([0, 2, 0, 15])

    plt.show()

    return X_b

# 使用sk-learn的线性回归模型，默认使用解析法

def linear_regression_sk(X, y):

    from sklearn.linear_model import LinearRegression

    # 创建线性回归模型实例

    lin_reg = LinearRegression()

    lin_reg.fit(X, y)

    print('sk-learn线性回归解析解：', 'b：', lin_reg.intercept_, 'w：', lin_reg.coef_)

# 线性回归批量梯度下降法（batch gradient descent）

def linear_regression_batch_gd(X_b, y):

    # 学习率不变、迭代次数和样本数

    learning_rate = 0.1

    max_iterations = 1000

    m = 100

    # 随机初始值

    theta = np.random.randn(2, 1)

    # 开始迭代

    for n in range(max_iterations):

        gradients = 2/m * X_b.T.dot(X_b.dot(theta)-y)

        theta = theta - learning_rate*gradients

    print('线性回归批量梯度下降法解：', theta)

# 线性回归随机梯度下降法（stochastic gradient descent）

def linear_regression_stochastic_gd(X_b, y):

    # epoch次数，样本数

    n_epochs = 50

    m = 100

    theta = np.random.randn(2, 1)

    for epoch in range(n_epochs):

        for i in range(m):

            random_index = np.random.randint(m)

            xi = X_b[random_index:random_index+1]

            yi = y[random_index:random_index+1]

            gradients = 2 * xi.T.dot(xi.dot(theta) - yi)

            learning_rate = 1.0/(epoch*m + i + 10)

            theta = theta - learning_rate*gradients

    print('线性回归随机梯度下降法解：', theta)

# sk-learn 线性回归随机梯度下降

def linear_regression_stochastic_gd_sk(X, y):

    from sklearn.linear_model import SGDRegressor

    sgd_reg = SGDRegressor(n_iter=50, penalty=None, eta0=0.1)

    sgd_reg.fit(X, y.ravel())

    print('sk-learn线性回归随机梯度下降法解：',  'b：', sgd_reg.intercept_, 'w：', sgd_reg.coef_)

# 创建多项式回归数据集

def create_dataset_poly():

    m = 100

    X1 = 6 * np.random.rand(m, 1) - 3

    y1 = 0.5 * X1 ** 2 + X1 + 2 + np.random.randn(m, 1)

    return X1, y1

# 多项式回归

def polynomial_regression(X, y):

    # 添加二次特征

    from sklearn.preprocessing import PolynomialFeatures

    from sklearn.linear_model import LinearRegression

    poly_features = PolynomialFeatures(degree=2, include_bias=False)

    X_poly = poly_features.fit_transform(X)

    lin_reg_poly = LinearRegression()

    lin_reg_poly.fit(X_poly, y)

    print('多项式回归解：', 'b：', lin_reg_poly.intercept_, 'w：', lin_reg_poly.coef_)

    return lin_reg_poly

# 绘制关于训练集规模的学习曲线

def plot_learning_curves(model, X, y):

    from sklearn.metrics import mean_squared_error

    from sklearn.model_selection import train_test_split

    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2)

    train_errors, val_errors = [], []

    for m in range(1, len(X_train)):

        model.fit(X_train[:m], y_train[:m])

        y_train_predict = model.predict(X_train[:m])

        y_val_predict = model.predict(X_val)

        train_errors.append(mean_squared_error(y_train_predict, y_train[:m]))

        val_errors.append(mean_squared_error(y_val_predict, y_val))

    plt.plot(np.sqrt(train_errors), "r-+", linewidth=2, label="train")

    plt.plot(np.sqrt(val_errors), "b-", linewidth=3, label="val")

    plt.show()

# 岭回归，l2正则化，封闭方程求解

def ridge_regression_analysis(X, y):

    from sklearn.linear_model import Ridge

    ridge_reg = Ridge(alpha=1, solver="cholesky")

    ridge_reg.fit(X, y)

    print('岭回归解：', 'b：', ridge_reg.intercept_, 'w：', ridge_reg.coef_)

# Lasso 回归，l2正则化，封闭方程求解

def lasso_regression_analysis(X, y):

    from sklearn.linear_model import Lasso

    lasso_reg = Lasso(alpha=0.1)

    lasso_reg.fit(X, y)

    print('Lasso 回归解：', 'b：', lasso_reg.intercept_, 'w：', lasso_reg.coef_)

# l2,l1正则化，梯度下降求解

def regularization_regression_gd(X, y):

    from sklearn.linear_model import SGDRegressor

    # l1正则化把 penalty="l2" 改为 penalty="l1"

    sgd_reg = SGDRegressor(penalty="l2")

    sgd_reg.fit(X, y.ravel())

    print('l2梯度下降法解：', 'b：', sgd_reg.intercept_, 'w：', sgd_reg.coef_)

# 弹性网路正则化，即l1、l2混合正则化

def elasticnet_regression_gd(X, y):

    from sklearn.linear_model import ElasticNet

    # l1_ratio 指的就是混合率, 即l1正则化占的比例

    elastic_net = ElasticNet(alpha=0.1, l1_ratio=0.5)

    elastic_net.fit(X, y)

    print('弹性网络解：', 'b：', elastic_net.intercept_, 'w：', elastic_net.coef_)

# 早期停止法（Early Stopping）

def early_stoping(X, y):

    from sklearn.base import clone

    from sklearn.linear_model import SGDRegressor

    from sklearn.metrics import mean_squared_error

    from sklearn.model_selection import train_test_split

    # 当warm_start=True时，调用fit()方法后，训练会从停下来的地方继续，而不是从头重新开始。

    sgd_reg = SGDRegressor(max_iter=1, warm_start=True, penalty=None, learning_rate="constant", eta0=0.0005)

    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2)

    minimum_val_error = float("inf")

    best_epoch = None

    best_model = None

    for epoch in range(1000):

        sgd_reg.fit(X_train,  y_train.ravel())

        y_val_predict = sgd_reg.predict(X_val)

        val_error = mean_squared_error(y_val_predict, y_val)

        if val_error < minimum_val_error:

            minimum_val_error = val_error

            best_epoch = epoch

            best_model = clone(sgd_reg)

    print('stopping in:', best_epoch)

# 加载鸢尾花数据集

def load_dataset_flower():

    from sklearn import datasets

    iris = datasets.load_iris()

    # X_f = iris['data']

    # y_f = iris['target']

    # print('加载鸢尾花数据集成功：', iris)

    return iris

# logistic 回归

def logistic_classify(iris):

    from sklearn.linear_model import LogisticRegression

    X = iris["data"][:, 3:]  # petal width

    y = (iris["target"] == 2).astype(np.int)

    log_reg = LogisticRegression()

    log_reg.fit(X, y)

    # 绘图

    X_new = np.linspace(0, 3, 1000).reshape(-1, 1)

    y_proba = log_reg.predict_proba(X_new)

    plt.plot(X_new, y_proba[:, 1], "g-", label="Iris-Virginica")

    plt.plot(X_new, y_proba[:, 0], "b--", label="Not Iris-Virginica")

    plt.show()

# softmax 回归多分类

def softmax_classify(iris):

    from sklearn.linear_model import LogisticRegression

    # 划分数据集

    X = iris["data"][:, (2, 3)]  # petal length, petal width

    y = iris["target"]

    # 创建 softmax 回归实例

    softmax_reg = LogisticRegression(multi_class="multinomial", solver="lbfgs", C=10)

    softmax_reg.fit(X, y)

    # 预测

    predict = softmax_reg.predict([[5, 2]])

    predict_pro = softmax_reg.predict_proba([[5, 2]])

    print('softmax回归预测为：', predict, '各类概率为', predict_pro)

if __name__ == '__main__':

    # 获得线性回归数据集

    X, y = create_dataset()

    # 线性回归解析法

    # X_b = linear_regression_analysis(X, y)

    # sk-learn线性回归解

    # linear_regression_sk(X, y)

    # 线性回归批量梯度下降法

    # linear_regression_batch_gd(X_b, y)

    # 线性回归随机梯度下降法

    # linear_regression_stochastic_gd(X_b, y)

    # sk-learn线性回归随机梯度下降法

    # linear_regression_stochastic_gd_sk(X, y)

    # 获得多项式回归数据集

    # X1, y1 = create_dataset_poly()

    # 多项式回归解

    # lin_reg_poly = polynomial_regression(X1, y1)

    # 获得关于训练集规模的学习曲线

    # plot_learning_curves(lin_reg_poly, X1, y1)

    # 岭回归，l2正则化

    # ridge_regression_analysis(X, y)

    # lasso回归，l1正则化

    # lasso_regression_analysis(X, y)

    # 梯度下降法的正则化

    # regularization_regression_gd(X, y)

    # 弹性网络

    # elasticnet_regression_gd(X, y)

    # 早期停止

    # early_stoping(X1, y1)

    # 加载花的数据集

    iris = load_dataset_flower()

    # logistic 回归二分类

    logistic_classify(iris)

    # softmax 多分类

    softmax_classify(iris)

本文链接：https://blog.csdn.net/Wang_Jiankun/article/details/81114716

Scikit-Learn 机器学习笔记 -- 线性回归、逻辑回归、softma的更多相关文章

线性回归,逻辑回归,神经网络,SVM的总结
目录线性回归,逻辑回归,神经网络,SVM的总结线性回归,逻辑回归,神经网络,SVM的总结详细的学习笔记. markdown的公式编辑手册. 回归的含义: 回归就是指根据之前的数据预测一个准确的输 ...
机器学习总结之逻辑回归Logistic Regression
机器学习总结之逻辑回归Logistic Regression 逻辑回归logistic regression,虽然名字是回归,但是实际上它是处理分类问题的算法.简单的说回归问题和分类问题如下: 回归问 ...
机器学习（1）- 概述&线性回归&逻辑回归&正则化
根据Andrew Ng在斯坦福的<机器学习>视频做笔记,已经通过李航<统计学习方法>获得的知识不赘述,仅列出提纲. 1 初识机器学习 1.1 监督学习(x,y) 分类(输出y是 ...
斯坦福机器学习视频笔记 Week3 逻辑回归与正则化 Logistic Regression and Regularization
我们将讨论逻辑回归. 逻辑回归是一种将数据分类为离散结果的方法. 例如,我们可以使用逻辑回归将电子邮件分类为垃圾邮件或非垃圾邮件. 在本模块中,我们介绍分类的概念,逻辑回归的损失函数(cost fun ...
Stanford机器学习---第三讲. 逻辑回归和过拟合问题的解决 logistic Regression & Regularization
原文:http://blog.csdn.net/abcjennifer/article/details/7716281 本栏目(Machine learning)包括单参数的线性回归.多参数的线性回归 ...
机器学习入门11 - 逻辑回归 (Logistic Regression)
原文链接:https://developers.google.com/machine-learning/crash-course/logistic-regression/ 逻辑回归会生成一个介于 0 ...
吴恩达机器学习笔记 —— 7 Logistic回归
http://www.cnblogs.com/xing901022/p/9332529.html 本章主要讲解了逻辑回归相关的问题,比如什么是分类?逻辑回归如何定义损失函数?逻辑回归如何求最优解?如何 ...
Spark机器学习(2)：逻辑回归算法
逻辑回归本质上也是一种线性回归,和普通线性回归不同的是,普通线性回归特征到结果输出的是连续值,而逻辑回归增加了一个函数g(z),能够把连续值映射到0或者1. MLLib的逻辑回归类有两个:Logist ...
【笔记】逻辑回归中使用多项式（sklearn）
在逻辑回归中使用多项式特征以及在sklearn中使用逻辑回归并添加多项式在逻辑回归中使用多项式特征在上面提到的直线划分中,很明显有个问题,当样本并没有很好地遵循直线划分(非线性分布)的时候,其预测 ...
【机器学习基础】逻辑回归——LogisticRegression
LR算法作为一种比较经典的分类算法,在实际应用和面试中经常受到青睐,虽然在理论方面不是特别复杂,但LR所牵涉的知识点还是比较多的,同时与概率生成模型.神经网络都有着一定的联系,本节就针对这一算法及其所 ...

随机推荐

UCOSII消息队列
主结构体 typedef struct os_q { /* QUEUE CONTROL BLOCK */ struct os_q *OSQPtr; /* Link to next queue cont ...
c# 使用泛型集合List<T>
Jquery简单闭包
<html> <body> <script src="Js/Index.js"></script> <script type= ...
jqGrid行编辑配置，方法，事件
行编辑可以在行修改后更新数据,如下图所示用户用鼠标点击选择一行,jqGrid将可编辑的字段转换为数据输入单元,如上面图所示.不可编辑的列,如id,不会转为可输入单元,而是保持不变.可以通过配置col ...
vmware联网解决方案：host-only共享上网
一.需求说明 vmware通过桥接的方式可以上外网,但是虚拟机的IP地址必须和物理网卡在同一网段,上网环境不同虚拟机必须跟着换ip地址很麻烦,所以最好是采用host-only方式上网. 特别是做架构实 ...
学会github管理项目与VS单元测试——第二次作业
GIT地址 https://github.com/Bing-Fairy/AchaoCalculator GIT用户名 Bing—Fairy 学号后五位 62502 博客地址 https://home. ...
《hello-world》第九次团队作业：Beta冲刺与验收准备
项目内容这个作业属于哪个课程 2016级计算机科学与工程学院软件工程(西北师范大学) 这个作业的要求在哪里实验十三团队作业9:Beta冲刺与团队项目验收团队名称 <hello--wor ...
MyBatis-Plus-Generator配置
本文仅对使用MyBatis-Plus的代码生成器配置做保存,适合使用了该插件的童鞋做参考. 内部有大量默认配置,有性趣的童鞋可以研究下源码. ps:官方文档更齐全http://mp.baomidou. ...
SVM: 用kernels(核函数)来定义新的features,避免使用多项式,高斯kernel
应用kernels来进行非线性分类非线性分类:是否存在好的features的选择(而不是多项式)--f1,f2,f3.... 上图是一个非线性分类的问题,前面讲过,我们可以应用多项式(feature ...
mysql 8.0.17 安装配置方法图文教程
1.URL:https://www.jb51.net/article/167782.htm 2.装好之后需要使用add user中的用户名和密码登录(之前安装数据库时出现的) 使用navicat连接时 ...

Scikit-Learn 机器学习笔记 -- 线性回归、逻辑回归、softma

Scikit-Learn 机器学习笔记 -- 线性回归、逻辑回归、softma的更多相关文章

随机推荐

热门专题