一元回归_ols参数解读(推荐AAA)
python机器学习-乳腺癌细胞挖掘(博主亲自录制视频)
项目合作QQ:231469242
多重共线性测试需要改进
文件夹需要两个包
python3.0 anaconda
normality_check.py 正太检验
# -*- coding: utf-8 -*-
'''
Author:Toby
QQ:231469242,all right reversed,no commercial use
normality_check.py
正态性检验脚本 ''' import scipy
from scipy.stats import f
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
# additional packages
from statsmodels.stats.diagnostic import lillifors #正态分布测试
def check_normality(testData):
#20<样本数<50用normal test算法检验正态分布性
if 20<len(testData) <50:
p_value= stats.normaltest(testData)[1]
if p_value<0.05:
print("use normaltest")
print ("data are not normal distributed")
return False
else:
print("use normaltest")
print ("data are normal distributed")
return True #样本数小于50用Shapiro-Wilk算法检验正态分布性
if len(testData) <50:
p_value= stats.shapiro(testData)[1]
if p_value<0.05:
print ("use shapiro:")
print ("data are not normal distributed")
return False
else:
print ("use shapiro:")
print ("data are normal distributed")
return True if 300>=len(testData) >=50:
p_value= lillifors(testData)[1]
if p_value<0.05:
print ("use lillifors:")
print ("data are not normal distributed")
return False
else:
print ("use lillifors:")
print ("data are normal distributed")
return True if len(testData) >300:
p_value= stats.kstest(testData,'norm')[1]
if p_value<0.05:
print ("use kstest:")
print ("data are not normal distributed")
return False
else:
print ("use kstest:")
print ("data are normal distributed")
return True #对所有样本组进行正态性检验
def NormalTest(list_groups):
for group in list_groups:
#正态性检验
status=check_normality(group)
if status==False :
return False
return True
Rsquare_multimode.py 多种模型计算R平方
加入了线性显著检测和r相关系数显著检测,多重共线性,自相关,残差正太检验等等
# -*- coding: utf-8 -*-
#斯皮尔曼等级相关(Spearman’s correlation coefficient for ranked data)
import math,pylab,scipy
import numpy as np
import scipy.stats as stats
from scipy.stats import t
from scipy.stats import f
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.stats.diagnostic import lillifors
import normality_check
import statsmodels.formula.api as sm
x=[4.03,3.76,3.77,3.34,3.47,2.92,3.20,2.71,3.53,4.51]
y=[6.47,6.13,6.19,4.89,5.63,4.52,5.89,4.79,5.27,6.08] list_group=[x,y]
sample=len(x)
#显著性
a=0.05 #数据可视化
plt.plot(x,y,'ro')
#斯皮尔曼等级相关,非参数检验
def Spearmanr(x,y):
print("use spearmanr,Nonparametric tests")
#样本不一致时,发出警告
if len(x)!=len(y):
print ("warming,the samples are not equal!")
r,p=stats.spearmanr(x,y)
print("spearman r**2:",r**2)
print("spearman p:",p)
if sample<500 and p>0.05:
print("when sample < 500,p has no mean(>0.05)")
print("when sample > 500,p has mean") #皮尔森 ,参数检验
def Pearsonr(x,y):
print("use Pearson,parametric tests")
r,p=stats.pearsonr(x,y)
print("pearson r**2:",r**2)
print("pearson p:",p)
if sample<30:
print("when sample <30,pearson has no mean") #皮尔森 ,参数检验,带有详细参数
def Pearsonr_details(x,y,xLabel,yLabel,formula):
n=len(x)
df=n-2
data=pd.DataFrame({yLabel:y,xLabel:x})
result = sm.ols(formula, data).fit()
print(result.summary()) #模型F分布显著性分析
print('\n')
print("linear relation Significant test:...................................")
#如果F检验的P值<0.05,拒绝H0,x和y无显著关系,H1成立,x和y有显著关系
if result.f_pvalue<0.05:
print ("P value of f test<0.05,the linear relation is right.") #R的显著检验
print('\n')
print("R significant test:...................................")
r_square=result.rsquared
r=math.sqrt(r_square)
t_score=r*math.sqrt(n-2)/(math.sqrt(1-r**2))
t_std=t.isf(a/2,df)
if t_score<-t_std or t_score>t_std:
print ("R is significant according to its sample size")
else:
print ("R is not significant") #残差分析
print('\n')
print("residual error analysis:...................................")
states=normality_check.check_normality(result.resid)
if states==True:
print("the residual error are normal distributed")
else:
print("the residual error are not normal distributed") #残差偏态和峰态
Skew = stats.skew(result.resid, bias=True)
Kurtosis = stats.kurtosis(result.resid, fisher=False,bias=True)
if round(Skew,1)==0:
print("residual errors normality Skew:in middle,perfect match")
elif round(Skew,1)>0:
print("residual errors normality Skew:close right")
elif round(Skew,1)<0:
print("residual errors normality Skew:close left") if round(Kurtosis,1)==3:
print("residual errors normality Kurtosis:in middle,perfect match")
elif round(Kurtosis,1)>3:
print("residual errors normality Kurtosis:more peak")
elif round(Kurtosis,1)<3:
print("residual errors normality Kurtosis:more flat") #自相关分析autocorrelation
print('\n')
print("autocorrelation test:...................................")
DW = np.sum( np.diff( result.resid.values )**2.0 )/ result.ssr
if round(DW,1)==2:
print("Durbin-Watson close to 2,there is no autocorrelation.OLS model works well") #共线性检查
print('\n')
print("multicollinearity test:")
conditionNumber=result.condition_number
if conditionNumber>30:
print("conditionNumber>30,multicollinearity exists")
else:
print("conditionNumber<=30,multicollinearity not exists") #绘制残差图,用于方差齐性检验
Draw_residual(list(result.resid))
'''
result.rsquared
Out[28]: 0.61510660055413524
''' #kendalltau非参数检验
def Kendalltau(x,y):
print("use kendalltau,Nonparametric tests")
r,p=stats.kendalltau(x,y)
print("kendalltau r**2:",r**2)
print("kendalltau p:",p) #选择模型
def R_mode(x,y,xLabel,yLabel,formula):
#正态性检验
Normal_result=normality_check.NormalTest(list_group)
print ("normality result:",Normal_result)
if len(list_group)>2:
Kendalltau(x,y)
if Normal_result==False:
Spearmanr(x,y)
Kendalltau(x,y)
if Normal_result==True:
Pearsonr_details(x,y,xLabel,yLabel,formula) #调整的R方
def Adjust_Rsquare(r_square,n,k):
adjust_rSquare=1-((1-r_square)*(n-1)*1.0/(n-k-1))
return adjust_rSquare
'''
n=len(x)
n=10
k=1
r_square=0.615
Adjust_Rsquare(r_square,n,k)
Out[11]: 0.566875
''' #绘图
def Plot(x,y,yLabel,xLabel,Title):
plt.plot(x,y,'ro')
plt.ylabel(yLabel)
plt.xlabel(xLabel)
plt.title(Title)
plt.show() #绘图参数
yLabel='Alcohol'
xLabel='Tobacco'
Title='Sales in Several UK Regions'
Plot(x,y,yLabel,xLabel,Title)
formula='Alcohol ~ Tobacco' #绘制残点图
def Draw_residual(residual_list):
x=[i for i in range(1,len(residual_list)+1)]
y=residual_list
pylab.plot(x,y,'ro')
pylab.title("draw residual to check wrong number") # Pad margins so that markers don't get clipped by the axes,让点不与坐标轴重合
pylab.margins(0.3) #绘制网格
pylab.grid(True) pylab.show() R_mode(x,y,xLabel,yLabel,formula) '''
result.fittedvalues表示预测的y值阵列
result.fittedvalues
Out[42]:
0 6.094983
1 5.823391
2 5.833450
3 5.400915
4 5.531682
5 4.978439
6 5.260090
7 4.767201
8 5.592035
9 6.577813
dtype: float64 #计算残差的偏态
S = stats.skew(result.resid, bias=True)
Out[44]: -0.013678125910039975 K = stats.kurtosis(result.resid, fisher=False,bias=True)
K
Out[47]: 1.5271300905736027
'''
result.params 得到两个参数:x的系数和截距
截距
result.params[0]
x系数
result.params[1]
D.W统计量是用来检验残差分布是否为正态分布的,因为用OLS进行回归估计是假设模型残差服从正态分布的,因此,如果残差不服从正态分布,那么,模型将是有偏的,也就是说模型的解释能力是不强的。
D.W统计量在2左右说明残差是服从正态分布的,若偏离2太远,那么你所构建的模型
的解释能力就要受影响了。 jarque-bera解读
----样本是否符合正太分布
Jarque-Bera的P值接近于0,表明显著性高,数据服从正态分布。
Omnibus解读
Omnibus统计量的P值都接近于0,自变量的作用显著。 Omnibus tests are a kind of statistical test. They test whether the explained variance in a set of data is significantly greater than the unexplained variance, overall. One example is the F-test in the analysis of variance. There can be legitimate significant effects within a model even if the omnibus test is not significant. For instance, in a model with two independent variables, if only one variable exerts a significant effect on the dependent variable and the other does not, then the omnibus test may be non-significant. This fact does not affect the conclusions that may be drawn from the one significant variable. In order to test effects within an omnibus test, researchers often use contrasts. https://en.wikipedia.org/wiki/Omnibus_test
python信用评分卡建模(附代码,博主录制)
一元回归_ols参数解读(推荐AAA)的更多相关文章
- 一元回归1_基础(python代码实现)
python机器学习-乳腺癌细胞挖掘(博主亲自录制视频) https://study.163.com/course/introduction.htm?courseId=1005269003&u ...
- 机器学习(2):简单线性回归 | 一元回归 | 损失计算 | MSE
前文再续书接上一回,机器学习的主要目的,是根据特征进行预测.预测到的信息,叫标签. 从特征映射出标签的诸多算法中,有一个简单的算法,叫简单线性回归.本文介绍简单线性回归的概念. (1)什么是简单线性回 ...
- main(int argc, char **argv)参数解读
main(int argc, char **argv)参数解读 编译生成了test.exe ,然后在控制台下相应的目录下输入:test 1 2 3 4 argc就是一个输入了多少个参数,包括te ...
- Python_sklearn机器学习库学习笔记(一)_一元回归
一.引入相关库 %matplotlib inline import matplotlib.pyplot as plt from matplotlib.font_manager import FontP ...
- sklearn_随机森林random forest原理_乳腺癌分类器建模(推荐AAA)
sklearn实战-乳腺癌细胞数据挖掘(博主亲自录制视频) https://study.163.com/course/introduction.htm?courseId=1005269003& ...
- 支持向量机SVM原理_python sklearn建模乳腺癌细胞分类器(推荐AAA)
项目合作联系QQ:231469242 sklearn实战-乳腺癌细胞数据挖掘(博主亲自录制视频) https://study.163.com/course/introduction.htm?cours ...
- 因子分析factor analysis_spss运用_python建模(推荐AAA)
sklearn实战-乳腺癌细胞数据挖掘(博主亲自录制视频) https://study.163.com/course/introduction.htm?courseId=1005269003& ...
- 决策树decision tree原理介绍_python sklearn建模_乳腺癌细胞分类器(推荐AAA)
sklearn实战-乳腺癌细胞数据挖掘(博主亲自录制视频) https://study.163.com/course/introduction.htm?courseId=1005269003& ...
- Java8 JVM参数解读
附录:https://www.liangzl.com/get-article-detail-134315.html 摘要: 我们知道java虚拟机启动时会带有很多的启动参数,Java命令本身就是一个多 ...
随机推荐
- 华为中兴借eBay出海 靠零售渠道撬动市场
在跨境电商领域,大多数中国商家依靠“中国制造”的优势和价格战策略打拼出一条血路,在海外市场占领了自己的一席 之地.不过,山寨货纷纷出海的同时,中国本土的品牌商们也开始了探索海外市场之旅.目前,华为.中 ...
- django的htpp请求之WSGIRequest
WSGIRequest对象 Django在接收到http请求之后,会根据http请求携带的参数以及报文信息创建一个WSGIRequest对象,并且作为视图函数第一个参数传给视图函数.这个参数就是dja ...
- css全局样式基础代码
body{ font-size:12px; font-family:"宋体",Arial, Helvetica, sans-serif;color:#363636;backgrou ...
- 关于mysql无法添加中文数据的问题以及解决方案
今天弄了一天的mysql数据库,就是被一个mysql数据库乱码的问题给缠住了.现在记录一下这个问题,虽然这个问题不是什么太大的事情,但还是记录一下. 问题是这样的: 1.先在mysql的安装文件当中, ...
- Alpha-6
前言 失心疯病源6 团队代码管理github 站立会议 队名:PMS 530雨勤(组长) 今天完成了那些任务 18:30~20:30 完成blob类下关于预测车辆下一个位置的函数 代码签入github ...
- 运维工程师如果将web服务http专变为https
1:生成私钥 2:生成证书签署请求 3:在提供CA签署的web网站上,提交生成的证书签署请求 4:下载已经签署的CA证书 5:将证书的信息保留在web服务器中,且应用到提供web服务的 ...
- Jmeter 中JDBC request 详解 !
JDBC Request: 这个sampler可以向数据库发送一个jdbc请求(sql语句),它经常需要和JDBC Connection Configuration 配置元件一起配合使用. 目录: 一 ...
- codepen & js demos
codepen & js demos Mutation Observer & customize resize event listener & demo https://co ...
- Checkbox & Excel
Checkbox & Excel Q: Excel how to check checkbox? 这个怎么打勾✔ ? A: 可以打勾的 How to Insert and Use a Chec ...
- LoadRunner脚本增强技巧之参数化(一)
参数化的方式有两种,一种通过File引入参数值,一种通过数据库引入参数值.本篇介绍File方式引入参数值. 一.File方式参数化过程 1.在脚本中找到需要做参数化的字符串,选中,右键点击,选择Rep ...