Emsemble
RM
# -*- coding: utf-8 -*- """ RandomForestClassifier 예 """ import pandas as pd import numpy as np from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import train_test_split from sklearn.datasets import load_wine from sklearn import metrics #model 평가 도구 #1.dataset load wine=load_wine() wine_x=wine.data wine_y=wine.target# 3개 범주 #data set보기 print(wine_x[:5,:]) """ [[1.423e+01 1.710e+00 2.430e+00 1.560e+01 1.270e+02 2.800e+00 3.060e+00 2.800e-01 2.290e+00 5.640e+00 1.040e+00 3.920e+00 1.065e+03] [1.320e+01 1.780e+00 2.140e+00 1.120e+01 1.000e+02 2.650e+00 2.760e+00 2.600e-01 1.280e+00 4.380e+00 1.050e+00 3.400e+00 1.050e+03] [1.316e+01 2.360e+00 2.670e+00 1.860e+01 1.010e+02 2.800e+00 3.240e+00 3.000e-01 2.810e+00 5.680e+00 1.030e+00 3.170e+00 1.185e+03] [1.437e+01 1.950e+00 2.500e+00 1.680e+01 1.130e+02 3.850e+00 3.490e+00 2.400e-01 2.180e+00 7.800e+00 8.600e-01 3.450e+00 1.480e+03] [1.324e+01 2.590e+00 2.870e+00 2.100e+01 1.180e+02 2.800e+00 2.690e+00 3.900e-01 1.820e+00 4.320e+00 1.040e+00 2.930e+00 7.350e+02]] """ print(wine_y[:5]) #[0 0 0 0 0] print(wine_y[170:175]) #[2 2 2 2 2] #2. train/test X_train,X_test,y_train,y_test=train_test_split(wine_x,wine_y,test_size=0.3) #3.RM model 생성 obj=RandomForestClassifier() model=obj.fit(X_train,y_train) print(model) """ RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini', max_depth=None, max_features='auto', max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, min_samples_leaf=1, min_samples_split=2, min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1, oob_score=False, random_state=None, verbose=0, warm_start=False) """ #4.model 평가 : 도구 pred=model.predict(X_test) Y=y_test # 평가 도구 acc=metrics.accuracy_score(Y,pred) print(acc) #0.9629629629629629 report=metrics.classification_report(Y,pred) print(report) """ precision recall f1-score support 0 1.00 1.00 1.00 14 1 0.90 1.00 0.95 18 2 1.00 0.91 0.95 22 avg / total 0.97 0.96 0.96 54 """ ############################################## # RF model Tuning ############################################## """ n_estimators=10 tree개수 (400~500) 제일 좋음 min_samples_split=2 : 변수의 개수(sqrt(n)) """ #2. train/test X_train,X_test,y_train,y_test=train_test_split(wine_x,wine_y,test_size=0.3) print(wine_x.shape)#(178, 13) 13의 루트 print(np.sqrt(13)) #3.605551275463989=>4 # 3. RM model 생성 obj2=RandomForestClassifier(n_estimators=400, min_samples_split=3) model2=obj2.fit(X_train,y_train) print(model2) """ RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini', max_depth=None, max_features='auto', max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, min_samples_leaf=1, min_samples_split=4, min_weight_fraction_leaf=0.0, n_estimators=400, n_jobs=1, oob_score=False, random_state=None, verbose=0, warm_start=False) """ # 4. model 평가 : 도구 pred2=model.predict(X_test) Y=y_test # 평가 도구 acc=metrics.accuracy_score(Y,pred2) print(acc) #0.9814814814814815 report=metrics.classification_report(Y,pred2) print(report) """ precision recall f1-score support 0 1.00 1.00 1.00 14 1 0.95 1.00 0.97 18 2 1.00 0.95 0.98 22 avg / total 0.98 0.98 0.98 54 """
cross_validation
# -*- coding: utf-8 -*- """ 교차 검정예 """ import pandas as pd from sklearn.model_selection import cross_validate # 교차검정 from sklearn.ensemble import RandomForestClassifier # RM # 1. data set iris=pd.read_csv("../data/iris.csv") print(iris.info()) """ <class 'pandas.core.frame.DataFrame'> RangeIndex: 150 entries, 0 to 149 Data columns (total 5 columns): Sepal.Length 150 non-null float64 Sepal.Width 150 non-null float64 Petal.Length 150 non-null float64 Petal.Width 150 non-null float64 Species 150 non-null object dtypes: float64(4), object(1) memory usage: 5.9+ KB None """ cols=list(iris.columns) x_data=iris[cols[:4]] #1~4 y_data=iris[cols[-1]] #2.model 생성 obj=RandomForestClassifier() model=obj.fit(x_data,y_data) #3.교차 검정 cv=5(5겹 교차검정) score=cross_validate(model,x_data,y_data,cv=5) print(score) """ {'fit_time': array([0.01000023, 0.01000023, 0.00900006, 0.00999999, 0.01000023]), 'score_time': array([0.00099993, 0.00099993, 0.00099993, 0.00100017, 0.00099993]), 'test_score': array([0.96666667, 0.96666667, 0.9 , 0.93333333, 1. ]), 'train_score': array([1., 1., 1., 1., 1.])} """ test_score=list(score['test_score']) print(test_score) #[0.966, 0.966, 0.933, 0.9, 1.0] import numpy as np score_arr=np.array(test_score) print(score_arr.mean())#0.9533333333333334
RM_regression
# -*- coding: utf-8 -*- """ RandomForestRegressor 예 """ import pandas as pd import numpy as np from sklearn.ensemble import RandomForestRegressor from sklearn.model_selection import train_test_split from sklearn.datasets import load_boston # data set from sklearn import metrics # model 평가 도구 # 1. dataset load boston = load_boston() boston_x = boston.data boston_y = boston.target # 연속형 # data set 보기 print(boston_x.shape) # (506, 13) print(boston_y.shape) # (506,) # 2. train/test x_train, x_test, y_train, y_test = train_test_split( boston_x, boston_y, test_size=0.3, random_state=123) # 3. RM model 생성 obj = RandomForestRegressor(random_state=234) model = obj.fit(x_train, y_train) print(model) """ RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None, max_features='auto', max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, min_samples_leaf=1, min_samples_split=2, min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1, oob_score=False, random_state=234, verbose=0, warm_start=False) """ # 4. model 평가 : 도구 y_pred = model.predict(x_test) y_real_value = y_test # 평가 도구 model_score = model.score(x_test, y_test) print(model_score) # 0.7998066141697237
xgboost_test
# -*- coding: utf-8 -*- """ xgboot분류분석 """ import pandas as pd from xgboost import XGBClassifier #model from xgboost import plot_importance #중요변수 시각화 from xgboost import plot_tree# tree 시각화 from sklearn.model_selection import train_test_split #1.iris data set load iris=pd.read_csv("../data/iris.csv") cols=list(iris.columns) iris_x=iris[cols[:4]] iris_y=iris[cols[-1]] # 2. train/test set x_train,x_test,y_train,y_test=train_test_split(iris_x,iris_y,test_size=0.3,random_state=123) # 3. model 생성 obj=XGBClassifier() model=obj.fit(x_train,y_train) print(model) """ XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1, colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0, max_depth=3, min_child_weight=1, missing=None, n_estimators=100, n_jobs=1, nthread=None, objective='multi:softprob', random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None, silent=True, subsample=1) objective = "binary:logistic": binary:logistic" : y변수 이항 • max_depth = 2: tree 구조가 간단한 경우 : 2 • nthread = 2 : cpu 사용 수 : 2 • nrounds = 2 : 실제값과 예측값의 차이를 줄이기 위한 반복학습 횟수 • eta = 1 : 학습률을 제어하는 변수(Default: 0.3), 오버 피팅을 방지 """ # 4. model 평가 y_pred=model.predict(x_test) print(y_pred) Y=y_test """ ['versicolor' 'virginica' 'virginica' 'versicolor' 'setosa' 'versicolor' 'versicolor' 'setosa' 'setosa' 'versicolor' 'virginica' 'setosa' 'versicolor' 'virginica' 'virginica' 'virginica' 'setosa' 'setosa' 'versicolor' 'setosa' 'setosa' 'versicolor' 'setosa' 'virginica' 'setosa' 'setosa' 'setosa' 'virginica' 'virginica' 'setosa' 'virginica' 'versicolor' 'setosa' 'setosa' 'versicolor' 'versicolor' 'virginica' 'setosa' 'setosa' 'versicolor' 'versicolor' 'setosa' 'virginica' 'virginica' 'virginica'] """ # 중요변수 시각화 import matplotlib.pyplot as plt plot_importance(model) plt.show() #fscore 중요변수 확인 score=model.get_booster().get_fscore() print('x 중요변수=',score) #x 중요변수= {'Petal.Length': 255, 'Petal.Width': 135, 'Sepal.Width': 64, 'Sepal.Length': 118} #모델 평가 from sklearn import metrics acc=metrics.accuracy_score(y_pred,Y) print("acc=",acc) #acc= 0.9333333333333333 report=metrics.classification_report(Y,y_pred) print(report) """ precision recall f1-score support setosa 1.00 1.00 1.00 18 versicolor 0.77 1.00 0.87 10 virginica 1.00 0.82 0.90 17 avg / total 0.95 0.93 0.93 """ plot_tree(model) plt.show()
xgboost_regression
# -*- coding: utf-8 -*- """ Created on Sun Feb 24 15:18:35 2019 @author: 502-03 """ import pandas as pd from xgboost import XGBRegressor #model (회귀모델) from xgboost import plot_importance #중요변수 시각화 from xgboost import plot_tree from sklearn.model_selection import train_test_split from sklearn.datasets import load_boston# dataset import matplotlib.pyplot as plt # 1. dataset load boston=load_boston() x=boston.data y=boston.target print(x.shape) #(506, 13) print(y.shape) #(506,) # 2. train/test set x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=123) # 3. model 생성 obj=XGBRegressor(n_estimators=400,max_depth=6) model=obj.fit(x_train,y_train) print(model) """ XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1, colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0, max_depth=3, min_child_weight=1, missing=None, n_estimators=100, n_jobs=1, nthread=None, objective='reg:linear', random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None, silent=True, subsample=1) """ # 중요변수 score=model.get_booster().get_fscore() print(score) """ {'f5': 83, 'f12': 78, 'f0': 91, 'f4': 42, 'f7': 110, 'f10': 32, 'f6': 46, 'f9': 38, 'f3': 1, 'f8': 16, 'f11': 51, 'f1': 2, 'f2': 15} """ plot_importance(model) plt.show() plot_tree(model) plt.show() print(boston.feature_names) """ ['CRIM' 'ZN' 'INDUS' 'CHAS' 'NOX' 'RM' 'AGE' 'DIS' 'RAD' 'TAX' 'PTRATIO' 'B' 'LSTAT'] """
xgboost_freeze
# -*- coding: utf-8 -*- """ 수도사업소 주관 - big 콘테스트 dataset 동파유무(0 or 1) 분류하는 위한 dataset """ import pandas as pd from xgboost import XGBClassifier # model(분류모델) from xgboost import plot_importance # 중요변수 시각화 from sklearn.datasets import load_boston # dataset from sklearn.model_selection import train_test_split from matplotlib import font_manager, rc font_name = font_manager.FontProperties(fname="c:/Windows/Fonts/malgun.ttf").get_name() rc('font', family=font_name) import matplotlib.pyplot as plt # 중요변수 시각화 freeze = pd.read_csv("../data/freeze_dataset.csv",encoding="MS949") print(freeze.info()) ''' RangeIndex: 37089 entries, 0 to 37088 Data columns (total 95 columns): ''' print(freeze.head()) # 칼럼명 수정 : 공백 -> '_' freeze.columns = freeze.columns.str.replace(' ', '_') print(freeze.info()) # 동파유무(0 or 1) print(freeze['동파유무'].value_counts()) ''' 0.0 34130 : 90% 1.0 2959 : 10% ''' cols = list(freeze.columns) # 95개 칼럼 x_cols = cols[1:] y_cols = cols[0] print(y_cols) # 동파유무 train_set, test_set = train_test_split( freeze, test_size=0.4) # model obj = XGBClassifier() model = obj.fit(train_set[x_cols], train_set[y_cols]) # 중요변수 score, 시각화 score = model.get_booster().get_fscore() print(score) plot_importance(model) plt.show()
Emsemble的更多相关文章
- 推荐系统resys小组线下活动见闻2009-08-22
http://www.tuicool.com/articles/vUvQVn 时间2009-08-30 15:13:22 不周山原文 http://www.wentrue.net/blog/?p= ...
- 自适应注意力机制在Image Caption中的应用
在碎片化阅读充斥眼球的时代,越来越少的人会去关注每篇论文背后的探索和思考. 在这个栏目里,你会快速 get 每篇精选论文的亮点和痛点,时刻紧跟 AI 前沿成果. 点击本文底部的「阅读原文」即刻加入社区 ...
- 我的第一次面试 —— 腾讯 AI安全 一面总结
前言 在校两年半,没经历过面试的毒打,第一次面试给了腾讯,周二晚上学长帮推的简历周三下午就打电话来问周四晚上有没有空面试.那天下午还在赶着数据库的实验报告,脑子有点转不过来就说了有空,然后仔细一看好像 ...
随机推荐
- 全局鼠标钩子:WH_MOUSE_LL, 在【 win 10 上网本】上因为太卡,运行中丢失全局鼠标钩子
一台几年前买的上网本,让我安装了一个 win 10,然后用来测试程序的时候, 发现 使用 SetWindowsHookEx(WH_MOUSE_LL, mouseHook, GetModuleHandl ...
- 快速找出网站中可能存在的XSS漏洞实践
笔者写了一些XSS漏洞的挖掘过程记录下来,方便自己也方便他人. 一.背景 在本篇文章当中会一permeate生态测试系统为例,笔者此前写过一篇文章当中笔者已经讲解如何安装permeate渗透测试系统, ...
- 【优秀的iPhone/iPad数据恢复工具】Omni Recover for Mac 2.5
[简介] 今天和大家分享最新的 Omni Recover for Mac 2.5 版本,这是一款Mac上优秀的iPhone/iPad设备数据恢复工具,支持恢复误删除的短信.照片.视频.文档.通话记录等 ...
- Python 几个常见函数
本文主要总结常见的函数知识点. 1.zip函数 用来并行迭代,可以把两个序列并在一起,然后返回一个元组的列表 names = ['Ann','Jame','Anla'] ages = [11,12,1 ...
- MVC实例应用模式
MVC实例应用模式 1.可用性: 比如异常处理 2.可修改性: 比如用接口实现 3.性能战术: 4.易用性战术: 分层实现 5.可测试性战术: 实现对其接口进行测试,并不需要对其实现方法进行 6.安全 ...
- 2019第十二届全国大学生信息安全实践创新赛线上赛Writeup
本文章来自https://www.cnblogs.com/iAmSoScArEd/p/10780242.html 未经允许不得转载! 1.MISC-签到 下载附件后,看到readme.txt打开后提 ...
- react-native不是内部或 外部命令,也不是可运行的程序或批处理文件
1.执行node命令时提示:node不是内部或外部命令,也不是可运行的程序或批处理文件. 原因环境变量没有指向node安装目录 path:C:\Program Files\nodejs\ 2.reac ...
- exists,in的区别-mysql
如说两张表一张是用户表TDefUser(userid,address,phone),一张是消费表TAccConsume(userid,time,amount),我要查消费超过5000的用户记录,那么我 ...
- 浅入深出Vue:工具准备之PostMan安装配置及Mock服务配置
浅入深出Vue之工具准备(二):PostMan安装配置 由于家中有事,文章没顾得上.在此说声抱歉,这是工具准备的最后一章. 接下来就是开始环境搭建了~尽情期待 工欲善其事必先利其器,让我们先做好准备工 ...
- 在线制作微信跳转浏览器下载app/打开指定页面源码
微信自动跳转外部浏览器下载app/打开指定页面源码 源码说明: 适用安卓和苹果系统,支持任何网页链接.并且无论链接是否已经被微信拦截,均可实现微信内自动跳转浏览器打开. 生成的跳转链接具有极佳的防拦截 ...