此次我做的实验是二分类问题,输出precision,recall,accuracy,auc

  1. # -*- coding: utf-8 -*-
  2. #from sklearn.neighbors import
  3. import numpy as np
  4. from pandas import read_csv
  5. import pandas as pd
  6. import sys
  7. import importlib
  8. from sklearn.neighbors import KNeighborsClassifier
  9. from sklearn.ensemble import GradientBoostingClassifier
  10. from sklearn import svm
  11. from sklearn import cross_validation
  12. from sklearn.metrics import hamming_loss
  13. from sklearn import metrics
  14. importlib.reload(sys)
  15. from sklearn.linear_model import LogisticRegression
  16. from imblearn.combine import SMOTEENN
  17. from sklearn.tree import DecisionTreeClassifier
  18. from sklearn.ensemble import RandomForestClassifier #92%
  19. from sklearn import tree
  20. from xgboost.sklearn import XGBClassifier
  21. from sklearn.linear_model import SGDClassifier
  22. from sklearn import neighbors
  23. from sklearn.naive_bayes import BernoulliNB
  24. import matplotlib as mpl
  25. import matplotlib.pyplot as plt
  26. from sklearn.metrics import confusion_matrix
  27. from numpy import mat
  28.  
  29. def metrics_result(actual, predict):
  30. print('准确度:{0:.3f}'.format(metrics.accuracy_score(actual, predict)))
  31. print('精密度:{0:.3f}'.format(metrics.precision_score(actual, predict,average='weighted')))
  32. print('召回:{0:0.3f}'.format(metrics.recall_score(actual, predict,average='weighted')))
  33. print('f1-score:{0:.3f}'.format(metrics.f1_score(actual, predict,average='weighted')))
  34. print('auc:{0:.3f}'.format(metrics.roc_auc_score(test_y, predict)))

输出混淆矩阵

  1. matr=confusion_matrix(test_y,predict)
  2. matr=mat(matr)
  3. conf=np.matrix([[0,0],[0,0]])
  4. conf[0,0]=matr[1,1]
  5. conf[1,0]=matr[1,0]
  6. conf[0,1]=matr[0,1]
  7. conf[1,1]=matr[0,0]
  8. print(conf)

全代码:

  1. # -*- coding: utf-8 -*-
  2. #from sklearn.neighbors import
  3. import numpy as np
  4. from pandas import read_csv
  5. import pandas as pd
  6. import sys
  7. import importlib
  8. from sklearn.neighbors import KNeighborsClassifier
  9. from sklearn.ensemble import GradientBoostingClassifier
  10. from sklearn import svm
  11. from sklearn import cross_validation
  12. from sklearn.metrics import hamming_loss
  13. from sklearn import metrics
  14. importlib.reload(sys)
  15. from sklearn.linear_model import LogisticRegression
  16. from imblearn.combine import SMOTEENN
  17. from sklearn.tree import DecisionTreeClassifier
  18. from sklearn.ensemble import RandomForestClassifier #92%
  19. from sklearn import tree
  20. from xgboost.sklearn import XGBClassifier
  21. from sklearn.linear_model import SGDClassifier
  22. from sklearn import neighbors
  23. from sklearn.naive_bayes import BernoulliNB
  24. import matplotlib as mpl
  25. import matplotlib.pyplot as plt
  26. from sklearn.metrics import confusion_matrix
  27. from numpy import mat
  28.  
  29. def metrics_result(actual, predict):
  30. print('准确度:{0:.3f}'.format(metrics.accuracy_score(actual, predict)))
  31. print('精密度:{0:.3f}'.format(metrics.precision_score(actual, predict,average='weighted')))
  32. print('召回:{0:0.3f}'.format(metrics.recall_score(actual, predict,average='weighted')))
  33. print('f1-score:{0:.3f}'.format(metrics.f1_score(actual, predict,average='weighted')))
  34. print('auc:{0:.3f}'.format(metrics.roc_auc_score(test_y, predict)))
  35.  
  36. '''分类0-1'''
  37. root1="D:/ProgramData/station3/10.csv"
  38. root2="D:/ProgramData/station3/more+average2.csv"
  39. root3="D:/ProgramData/station3/new_10.csv"
  40. root4="D:/ProgramData/station3/more+remove.csv"
  41. root5="D:/ProgramData/station3/new_10 2.csv"
  42. root6="D:/ProgramData/station3/new10.csv"
  43. root7="D:/ProgramData/station3/no_-999.csv"
  44.  
  45. root=root4
  46. data1 = read_csv(root) #数据转化为数组
  47. data1=data1.values
  48. print(root)
  49. time=1
  50.  
  51. accuracy=[]
  52. aucc=[]
  53. pre=[]
  54. recall=[]
  55. for i in range(time):
  56. train, test= cross_validation.train_test_split(data1, test_size=0.2, random_state=i)
  57. test_x=test[:,:-1]
  58. test_y=test[:,-1]
  59. train_x=train[:,:-1]
  60. train_y=train[:,-1]
  61. # =============================================================================
  62. # print(train_x.shape)
  63. # print(train_y.shape)
  64. # print(test_x.shape)
  65. # print(test_y.shape)
  66. # print(type(train_x))
  67. # =============================================================================
  68.  
  69. #X_Train=train_x
  70. #Y_Train=train_y
  71.  
  72. X_Train, Y_Train = SMOTEENN().fit_sample(train_x, train_y)
  73.  
  74. #clf = RandomForestClassifier() #82
  75. #clf = LogisticRegression() #82
  76.  
  77. #penalty=’l2’, dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1, class_weight=None, random_state=None, solver=’liblinear’, max_iter=100, multi_class=’ovr’, verbose=0, warm_start=False, n_jobs=1
  78. #clf=svm.SVC()
  79. clf= XGBClassifier()
  80. #from sklearn.ensemble import RandomForestClassifier #92%
  81. #clf = DecisionTreeClassifier()
  82. #clf = GradientBoostingClassifier()
  83.  
  84. #clf=neighbors.KNeighborsClassifier()
  85. #clf=BernoulliNB()
  86. print(clf)
  87. clf.fit(X_Train, Y_Train)
  88. predict=clf.predict(test_x)
  89.  
  90. matr=confusion_matrix(test_y,predict)
  91. matr=mat(matr)
  92. conf=np.matrix([[0,0],[0,0]])
  93. conf[0,0]=matr[1,1]
  94. conf[1,0]=matr[1,0]
  95. conf[0,1]=matr[0,1]
  96. conf[1,1]=matr[0,0]
  97. print(conf)
  98. #a=metrics_result(test_y, predict)
  99.  
  100. #a=metrics_result(test_y,predict)
  101. '''accuracy'''
  102. aa=metrics.accuracy_score(test_y, predict)
  103.  
  104. #print(metrics.accuracy_score(test_y, predict))
  105. accuracy.append(aa)
  106.  
  107. '''auc'''
  108. bb=metrics.roc_auc_score(test_y, predict, average=None)
  109. aucc.append(bb)
  110.  
  111. '''precision'''
  112. cc=metrics.precision_score(test_y, predict, average=None)
  113. pre.append(cc[1])
  114.  
  115. # =============================================================================
  116. # print('cc')
  117. # print(type(cc))
  118. # print(cc[1])
  119. # print('cc')
  120. # =============================================================================
  121.  
  122. '''recall'''
  123. dd=metrics.recall_score(test_y, predict, average=None)
  124. #print(metrics.recall_score(test_y, predict,average='weighted'))
  125. recall.append(dd[1])
  126.  
  127. f=open('D:\ProgramData\station3\predict.txt', 'w')
  128. for i in range(len(predict)):
  129. f.write(str(predict[i]))
  130. f.write('\n')
  131. f.write("写好了")
  132. f.close()
  133.  
  134. f=open('D:\ProgramData\station3\y_.txt', 'w')
  135. for i in range(len(predict)):
  136. f.write(str(test_y[i]))
  137. f.write('\n')
  138. f.write("写好了")
  139. f.close()
  140.  
  141. # =============================================================================
  142. # f=open('D:/ProgramData/station3/predict.txt', 'w')
  143. # for i in range(len(predict)):
  144. # f.write(str(predict[i]))
  145. # f.write('\n')
  146. # f.write("写好了")
  147. # f.close()
  148. #
  149. # f=open('D:/ProgramData/station3/y.txt', 'w')
  150. # for i in range(len(test_y)):
  151. # f.write(str(test_y[i]))
  152. # f.write('\n')
  153. # f.write("写好了")
  154. # f.close()
  155. #
  156. # =============================================================================
  157. # =============================================================================
  158. # print('调用函数auc:', metrics.roc_auc_score(test_y, predict, average='micro'))
  159. #
  160. # fpr, tpr, thresholds = metrics.roc_curve(test_y.ravel(),predict.ravel())
  161. # auc = metrics.auc(fpr, tpr)
  162. # print('手动计算auc:', auc)
  163. # #绘图
  164. # mpl.rcParams['font.sans-serif'] = u'SimHei'
  165. # mpl.rcParams['axes.unicode_minus'] = False
  166. # #FPR就是横坐标,TPR就是纵坐标
  167. # plt.plot(fpr, tpr, c = 'r', lw = 2, alpha = 0.7, label = u'AUC=%.3f' % auc)
  168. # plt.plot((0, 1), (0, 1), c = '#808080', lw = 1, ls = '--', alpha = 0.7)
  169. # plt.xlim((-0.01, 1.02))
  170. # plt.ylim((-0.01, 1.02))
  171. # plt.xticks(np.arange(0, 1.1, 0.1))
  172. # plt.yticks(np.arange(0, 1.1, 0.1))
  173. # plt.xlabel('False Positive Rate', fontsize=13)
  174. # plt.ylabel('True Positive Rate', fontsize=13)
  175. # plt.grid(b=True, ls=':')
  176. # plt.legend(loc='lower right', fancybox=True, framealpha=0.8, fontsize=12)
  177. # plt.title(u'大类问题一分类后的ROC和AUC', fontsize=17)
  178. # plt.show()
  179. # =============================================================================
  180.  
  181. sum_acc=0
  182. sum_auc=0
  183. sum_pre=0
  184. sum_recall=0
  185. for i in range(time):
  186. sum_acc+=accuracy[i]
  187. sum_auc+=aucc[i]
  188. sum_pre+=pre[i]
  189. sum_recall+=recall[i]
  190.  
  191. acc1=sum_acc*1.0/time
  192. auc1=sum_auc*1.0/time
  193. pre1=sum_pre*1.0/time
  194. recall1=sum_recall*1.0/time
  195. print("acc",acc1)
  196. print("auc",auc1)
  197. print("pre",pre1)
  198. print("recall",recall1)
  199.  
  200. # =============================================================================
  201. #
  202. # data1 = read_csv(root2) #数据转化为数组
  203. # data1=data1.values
  204. #
  205. #
  206. # accuracy=[]
  207. # auc=[]
  208. # pre=[]
  209. # recall=[]
  210. # for i in range(30):
  211. # train, test= cross_validation.train_test_split(data1, test_size=0.2, random_state=i)
  212. # test_x=test[:,:-1]
  213. # test_y=test[:,-1]
  214. # train_x=train[:,:-1]
  215. # train_y=train[:,-1]
  216. # X_Train, Y_Train = SMOTEENN().fit_sample(train_x, train_y)
  217. #
  218. # #clf = RandomForestClassifier() #82
  219. # clf = LogisticRegression() #82
  220. # #clf=svm.SVC()
  221. # #clf= XGBClassifier()
  222. # #from sklearn.ensemble import RandomForestClassifier #92%
  223. # #clf = DecisionTreeClassifier()
  224. # #clf = GradientBoostingClassifier()
  225. #
  226. # #clf=neighbors.KNeighborsClassifier() 65.25%
  227. # #clf=BernoulliNB()
  228. # clf.fit(X_Train, Y_Train)
  229. # predict=clf.predict(test_x)
  230. #
  231. # '''accuracy'''
  232. # aa=metrics.accuracy_score(test_y, predict)
  233. # accuracy.append(aa)
  234. #
  235. # '''auc'''
  236. # aa=metrics.roc_auc_score(test_y, predict)
  237. # auc.append(aa)
  238. #
  239. # '''precision'''
  240. # aa=metrics.precision_score(test_y, predict,average='weighted')
  241. # pre.append(aa)
  242. #
  243. # '''recall'''
  244. # aa=metrics.recall_score(test_y, predict,average='weighted')
  245. # recall.append(aa)
  246. #
  247. #
  248. # sum_acc=0
  249. # sum_auc=0
  250. # sum_pre=0
  251. # sum_recall=0
  252. # for i in range(30):
  253. # sum_acc+=accuracy[i]
  254. # sum_auc+=auc[i]
  255. # sum_pre+=pre[i]
  256. # sum_recall+=recall[i]
  257. #
  258. # acc1=sum_acc*1.0/30
  259. # auc1=sum_auc*1.0/30
  260. # pre1=sum_pre*1.0/30
  261. # recall1=sum_recall*1.0/30
  262. # print("more 的 acc:", acc1)
  263. # print("more 的 auc:", auc1)
  264. # print("more 的 precision:", pre1)
  265. # print("more 的 recall:", recall1)
  266. #
  267. # =============================================================================
  268. #X_train, X_test, y_train, y_test = cross_validation.train_test_split(X_Train,Y_Train, test_size=0.2, random_state=i)

输出结果:

分类预测输出precision,recall,accuracy,auc和tp,tn,fp,fn矩阵的更多相关文章

  1. 目标检测的评价标准mAP, Precision, Recall, Accuracy

    目录 metrics 评价方法 TP , FP , TN , FN 概念 计算流程 Accuracy , Precision ,Recall Average Precision PR曲线 AP计算 A ...

  2. 机器学习:评价分类结果(Precision - Recall 的平衡、P - R 曲线)

    一.Precision - Recall 的平衡 1)基础理论 调整阈值的大小,可以调节精准率和召回率的比重: 阈值:threshold,分类边界值,score > threshold 时分类为 ...

  3. 机器学习基础梳理—(accuracy,precision,recall浅谈)

    一.TP TN FP FN TP:标签为正例,预测为正例(P),即预测正确(T) TN:标签为负例,预测为负例(N),即预测正确(T) FP:标签为负例,预测为正例(P),即预测错误(F) FN:标签 ...

  4. Precision,Recall,F1的计算

    Precision又叫查准率,Recall又叫查全率.这两个指标共同衡量才能评价模型输出结果. TP: 预测为1(Positive),实际也为1(Truth-预测对了) TN: 预测为0(Negati ...

  5. 评价指标整理:Precision, Recall, F-score, TPR, FPR, TNR, FNR, AUC, Accuracy

    针对二分类的结果,对模型进行评估,通常有以下几种方法: Precision.Recall.F-score(F1-measure)TPR.FPR.TNR.FNR.AUCAccuracy   真实结果 1 ...

  6. 分类指标准确率(Precision)和正确率(Accuracy)的区别

    http://www.cnblogs.com/fengfenggirl/p/classification_evaluate.html 一.引言 分类算法有很多,不同分类算法又用很多不同的变种.不同的分 ...

  7. Precision/Recall、ROC/AUC、AP/MAP等概念区分

    1. Precision和Recall Precision,准确率/查准率.Recall,召回率/查全率.这两个指标分别以两个角度衡量分类系统的准确率. 例如,有一个池塘,里面共有1000条鱼,含10 ...

  8. 机器学习--如何理解Accuracy, Precision, Recall, F1 score

    当我们在谈论一个模型好坏的时候,我们常常会听到准确率(Accuracy)这个词,我们也会听到"如何才能使模型的Accurcy更高".那么是不是准确率最高的模型就一定是最好的模型? 这篇博文会向大家解释 ...

  9. 通过Precision/Recall判断分类结果偏差极大时算法的性能

    当我们对某些问题进行分类时,真实结果的分布会有明显偏差. 例如对是否患癌症进行分类,testing set 中可能只有0.5%的人患了癌症. 此时如果直接数误分类数的话,那么一个每次都预测人没有癌症的 ...

随机推荐

  1. 在Github上搭建博客

    貌似还是这个链接最靠谱呀 http://my.oschina.net/nark/blog/116299   如何利用github建立个人博客:之一 在线编辑器http://markable.in/ed ...

  2. POJ 1122 FDNY to the Rescue!(最短路+路径输出)

    http://poj.org/problem?id=1122 题意:给出地图并且给出终点和多个起点,输出从各个起点到终点的路径和时间. 思路: 因为有多个起点,所以这里反向建图,这样就相当于把终点变成 ...

  3. python 清空列表

    # lst = ["篮球","排球","乒乓球","足球","电子竞技","台球" ...

  4. Spring Cloud组件

    Spring Cloud Eureka Eureka负责服务的注册于发现,Eureka的角色和 Zookeeper差不多,都是服务的注册和发现,构成Eureka体系的包括:服务注册中心.服务提供者.服 ...

  5. 最全android Demo

    1.BeautifulRefreshLayout-漂亮的美食下拉刷新 https://github.com/android-cjj/BeautifulRefreshLayout/tree/Beauti ...

  6. UVA-11903 Just Finish it up

    题目大意:一个环形跑道上有n个加油站,每个加油站可加a[i]加仑油,走到下一站需要w[i]加仑油,初始油箱为空,问能否绕跑道一圈,起点任选,若有多个起点,找出编号最小的. 题目分析:如果从1号加油站开 ...

  7. powerdesigner安装图解

  8. SGU 132. Another Chocolate Maniac 状压dp 难度:1

    132. Another Chocolate Maniac time limit per test: 0.25 sec. memory limit per test: 4096 KB Bob real ...

  9. 转载:【Oracle 集群】RAC知识图文详细教程(九)--RAC基本测试与使用

    文章导航 集群概念介绍(一) ORACLE集群概念和原理(二) RAC 工作原理和相关组件(三) 缓存融合技术(四) RAC 特殊问题和实战经验(五) ORACLE 11 G版本2 RAC在LINUX ...

  10. Alpha阶段敏捷冲刺---Day1

    一.Daily Scrum Meeting照片 二.今天冲刺情况反馈 1.昨天已完成的工作    昨天我们组全体成员在五社区五号楼719召开了紧急会议,在会议上我们梳理了编写这个程序的所有流程,并且根 ...