logistic 回归（线性和非线性）

一：线性logistic 回归

代码如下：

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

import scipy.optimize as opt

import seaborn as sns

#读取数据集

path = 'ex2data1.txt'

data = pd.read_csv(path, header=None, names=['Exam 1', 'Exam 2', 'Admitted'])

#将正负数据集分开

positive = data[data['Admitted'].isin([1])]

negative = data[data['Admitted'].isin([0])]

'''

#查看分布

fig, ax = plt.subplots(figsize=(12, 8))

ax.scatter(positive['Exam 1'], positive['Exam 2'], s=60, c='b', marker='o', label='Admitted')

ax.scatter(negative['Exam 1'], negative['Exam 2'], s=50, c='r', marker='x', label='UnAdmitted')

ax.legend()

ax.set_xlabel('Exam 1 Score')

ax.set_ylabel('Exam 2 Score')

plt.show()

'''

#sigmoid函数实现

def sigmoid(h):

    return 1 / (1 + np.exp(-h))

'''

#测试sigmoid函数

nums = np.arange(-10, 11, step=1)

fig, ax = plt.subplots(figsize=(12, 8))

ax.plot(nums, sigmoid(nums), 'k')

plt.show()

'''

#计算损失函数值

def cost(theta, X, y):

    theta = np.matrix(theta)

    X = np.matrix(X)

    y = np.matrix(y)

    part1 = np.multiply(-y, np.log(sigmoid(X * theta.T)))

    part2 = np.multiply((1-y), np.log(1-sigmoid(X * theta.T)))

    return np.sum(part1-part2) / len(X)

#在原矩阵第1列前加一列全1

data.insert(0, 'ones', 1)

cols = data.shape[1]

X = data.iloc[:, 0:cols-1]

y = data.iloc[:, cols-1:cols]

X = np.array(X.values)

y = np.array(y.values)

theta = np.zeros(3) #这里是一个行向量

#返回梯度向量，注意是向量

def gradient(theta, X, y):

    theta = np.matrix(theta)

    X = np.matrix(X)

    y = np.matrix(y)

    parameters = theta.ravel().shape[1]

    grad = np.zeros(parameters)

    error = sigmoid(X * theta.T) - y

    grad = error.T.dot(X)

    grad = grad / len(X)

    return grad

#通过高级算法计算出最好的theta值

result = opt.fmin_tnc(func=cost, x0=theta, fprime=gradient, args=(X, y))

#print(cost(result[0], X, y))

#测试所得theta的性能

#计算原数据集的预测情况

def predict(theta, X):

    theta = np.matrix(theta)

    X = np.matrix(X)

    probability = sigmoid(X * theta.T)

    return [1 if i > 0.5 else 0 for i in probability]

theta_min = result[0]

predictions = predict(theta_min, X)

correct = [1 if((a == 1 and b == 1) or(a == 0 and b == 0)) else 0 for(a, b) in zip(predictions, y)]

accuracy = (sum(map(int, correct)) % len(correct))

print('accuracy = {0}%'.format(accuracy))#训练集测试准确度89%

# 作图

theta_temp = theta_min

theta_temp = theta_temp / theta_temp[2]

x = np.arange(130, step=0.1)

y = -(theta_temp[0] + theta_temp[1] * x)

#画出原点

sns.set(context='notebook', style='ticks', font_scale=1.5)

sns.lmplot('Exam 1', 'Exam 2', hue='Admitted', data=data,

           size=6,

           fit_reg=False,

           scatter_kws={"s": 25}

           )

#画出分界线

plt.plot(x, y, 'grey')

plt.xlim(0, 130)

plt.ylim(0, 130)

plt.title('Decision Boundary')

plt.show()

二：非线性logistic 回归（正则化）

代码如下：

import pandas as pd

import numpy as np

import scipy.optimize as opt

import matplotlib.pyplot as plt

path = 'ex2data2.txt'

data = pd.read_csv(path, header=None, names=['Test 1', 'Test 2', 'Accepted'])

positive = data[data['Accepted'].isin([1])]

negative = data[data['Accepted'].isin([0])]

'''

#显示原始数据的分布

fig, ax = plt.subplots(figsize=(12, 8))

ax.scatter(positive['Test 1'], positive['Test 2'], s=50, c='b', marker='o', label='Accepted')

ax.scatter(negative['Test 1'], negative['Test 2'], s=50, c='r', marker='x', label='Unaccepted')

ax.legend() #显示右上角的Accepted 和 Unaccepted标签

ax.set_xlabel('Test 1 Score')

ax.set_ylabel('Test 2 Score')

plt.show()

'''

degree = 5

x1 = data['Test 1']

x2 = data['Test 2']

#在data的第三列插入一列全1

data.insert(3, 'Ones', 1)

#创建多项式特征值，最高阶为4

for i in range(1, degree):

    for j in range(0, i):

        data['F' + str(i) + str(j)] = np.power(x1, i-j) * np.power(x2, j)

#删除原数据中的test 1和test 2两列

data.drop('Test 1', axis=1, inplace=True)

data.drop('Test 2', axis=1, inplace=True)

#sigmoid函数实现

def sigmoid(h):

    return 1 / (1 + np.exp(-h))

def cost(theta, X, y, learnRate):

    theta = np.matrix(theta)

    X = np.matrix(X)

    y = np.matrix(y)

    first = np.multiply(-y, np.log(sigmoid(X * theta.T)))

    second = np.multiply((1 - y), np.log(1 - sigmoid(X * theta.T)))

    reg = (learnRate / (2 * len(X))) * np.sum(np.power(theta[:, 1:theta.shape[1]], 2))

    return np.sum(first - second) / len(X) + reg

learnRate = 1

cols = data.shape[1]

X = data.iloc[:, 1:cols]

y = data.iloc[:, 0:1]

X = np.array(X)

y = np.array(y)

theta = np.zeros(X.shape[1])

#计算原数据集的预测情况

def predict(theta, X):

    theta = np.matrix(theta)

    X = np.matrix(X)

    probability = sigmoid(X * theta.T)

    return [1 if i > 0.5 else 0 for i in probability]

def gradientReg(theta, X, y, learnRate):

    theta = np.matrix(theta)

    X = np.matrix(X)

    y = np.matrix(y)

    paramates = int(theta.ravel().shape[1])

    grad = np.zeros(paramates)

    grad = (sigmoid(X * theta.T) - y).T * X / len(X) + (learnRate / len(X)) * theta[:, i]

    grad[0] = grad[0] - (learnRate / len(X)) * theta[:, i]

    return grad

result = opt.fmin_tnc(func=cost, x0=theta, fprime=gradientReg, args=(X, y, learnRate))

print(result)

theta_min = np.matrix(result[0])

predictions = predict(theta_min, X)

correct = [1 if((a == 1 and b == 1) or(a == 0 and b == 0)) else 0 for(a, b) in zip(predictions, y)]

accuracy = (sum(map(int, correct)) % len(correct))

print('accuracy = {0}%'.format(accuracy))

logistic 回归（线性和非线性）的更多相关文章

浅谈Logistic回归及过拟合
判断学习速率是否合适?每步都下降即可.这篇先不整理吧... 这节学习的是逻辑回归(Logistic Regression),也算进入了比较正统的机器学习算法.啥叫正统呢?我概念里面机器学习算法一般是这 ...
机器学习公开课笔记(3)：Logistic回归
Logistic 回归通常是二元分类器(也可以用于多元分类),例如以下的分类问题 Email: spam / not spam Tumor: Malignant / benign 假设 (Hypot ...
Logistic回归总结
原文:http://blog.csdn.net/dongtingzhizi/article/details/15962797 Logistic回归总结作者:洞庭之子微博:洞庭之子-Bing (2 ...
机器学习(4)之Logistic回归
机器学习(4)之Logistic回归 1. 算法推导与之前学过的梯度下降等不同,Logistic回归是一类分类问题,而前者是回归问题.回归问题中,尝试预测的变量y是连续的变量,而在分类问题中,y是一 ...
Logistic回归（逻辑回归）和softmax回归
一.Logistic回归 Logistic回归(Logistic Regression,简称LR)是一种常用的处理二类分类问题的模型. 在二类分类问题中,把因变量y可能属于的两个类分别称为负类和正类, ...
logistic回归学习
logistic回归是一种分类方法,用于两分类的问题,其基本思想为: 寻找合适的假设函数,即分类函数,用来预测输入数据的结果: 构造损失函数,用来表示预测的输出结果与训练数据中实际类别之间的偏差: 最 ...
Logistic回归和SVM的异同
这个问题在最近面试的时候被问了几次,让谈一下Logistic回归(以下简称LR)和SVM的异同.由于之前没有对比分析过,而且不知道从哪个角度去分析,一时语塞,只能不知为不知. 现在对这二者做一个对比分 ...
机器学习-- Logistic回归 Logistic Regression
转载自:http://blog.csdn.net/linuxcumt/article/details/8572746 1.假设随Tumor Size变化,预测病人的肿瘤是恶性(malignant)还是 ...
【转载】logistic回归
原文地址:https://www.cnblogs.com/zichun-zeng/p/3824745.html 1. logistic回归与一般线性回归模型的区别: (1) 线性回归的结果变量 ...

随机推荐

Druid-代码段-1-5
所属文章:池化技术(一)Druid是如何管理数据库连接的? 本代码段对应流程1.4,抛弃连接: //丢弃连接 public void discardConnection(Connection real ...
P4287 [SHOI2011]双倍回文
题意考虑对每个节点\(x\)维护\(lastpos_x\)表示\(x\)的所有后缀回文串中第一个\(len\leqslant len_x/2\)并且能和\(x\)最后一个字符匹配的,之后枚举节点,判 ...
pl/sql中record和%rowtype整理
1. 创建stu表,如下: create table stu(s1 number, s2 number); 2. 定义多维数组, 能用来接受多条返回数据方式一: type type_name i ...
Go语言系列教程
一 Go介绍与开发环境搭建 01-Go语言简介 02-开发环境搭建 03-Go语言集成开发环境之GoLand安装使用 04-Go语言集成开发环境之VS Code安装使用 05-Go包管理详解二 Go ...
Java修饰符作用域
作用域当前类同一package 子孙类其他package public √ √ √ √ protected √ √ √ × friendly √ √ × × private √ × × × 修饰 ...
Protractor - 环境设置
去年出于好奇搭建过一个Protractor+Cucumber的测试框架,当时项目上并没有用到AngularJS,所以框架能运行起来之后没有再深入了.最近新项目引入了AngularJS,想起去年搭的那个 ...
推荐 | 中文文本标注工具Chinese-Annotator（转载）
自然语言处理的大部分任务是监督学习问题.序列标注问题如中文分词.命名实体识别,分类问题如关系识别.情感分析.意图分析等,均需要标注数据进行模型训练.深度学习大行其道的今天,基于深度学习的 NLP 模型 ...
mysql多表关联update
日常的开发中一般都是写的单表update语句,很少写多表关联的update. 不同于SQL Server,在MySQL中,update的多表连接更新和select的多表连接查询在使用的方法上存在一些小 ...
一次业务网关用ASP.NET Core 2.1重构的小结
目录前言统一鉴权服务限流路由转发参数重组链路跟踪熔断降级服务计次业务指标监控日志记录迭代更新总结前言对于API网关,业界貌似对它进行下划分,有下面几个分类/场景. 面向We ...
零基础学python，python视频教程
零基础学python,python视频教程这是我收集到的互联网上的视频资源,所有内容均来自互联网.仅供学习使用. 目前我在也在学习过程中,会把学习过程中遇到问题以及解决问题的方式,总结到我的公众号[ ...

logistic 回归（线性和非线性）

logistic 回归（线性和非线性）的更多相关文章

随机推荐

热门专题