吴裕雄 python 机器学习——集成学习随机森林RandomForestClassifier分类模型

import numpy as np

import matplotlib.pyplot as plt

from sklearn import datasets,ensemble

from sklearn.model_selection import train_test_split

def load_data_classification():

    '''

    加载用于分类问题的数据集

    '''

    # 使用 scikit-learn 自带的 digits 数据集

    digits=datasets.load_digits()

    # 分层采样拆分成训练集和测试集，测试集大小为原始数据集大小的 1/4

    return train_test_split(digits.data,digits.target,test_size=0.25,random_state=0,stratify=digits.target) 

#集成学习随机森林RandomForestClassifier分类模型

def test_RandomForestClassifier(*data):

    X_train,X_test,y_train,y_test=data

    clf=ensemble.RandomForestClassifier()

    clf.fit(X_train,y_train)

    print("Traing Score:%f"%clf.score(X_train,y_train))

    print("Testing Score:%f"%clf.score(X_test,y_test))

# 获取分类数据

X_train,X_test,y_train,y_test=load_data_classification()

# 调用 test_RandomForestClassifier

test_RandomForestClassifier(X_train,X_test,y_train,y_test)

def test_RandomForestClassifier_num(*data):

    '''

    测试 RandomForestClassifier 的预测性能随 n_estimators 参数的影响

    '''

    X_train,X_test,y_train,y_test=data

    nums=np.arange(1,100,step=2)

    fig=plt.figure()

    ax=fig.add_subplot(1,1,1)

    testing_scores=[]

    training_scores=[]

    for num in nums:

        clf=ensemble.RandomForestClassifier(n_estimators=num)

        clf.fit(X_train,y_train)

        training_scores.append(clf.score(X_train,y_train))

        testing_scores.append(clf.score(X_test,y_test))

    ax.plot(nums,training_scores,label="Training Score")

    ax.plot(nums,testing_scores,label="Testing Score")

    ax.set_xlabel("estimator num")

    ax.set_ylabel("score")

    ax.legend(loc="lower right")

    ax.set_ylim(0,1.05)

    plt.suptitle("RandomForestClassifier")

    plt.show()

# 调用 test_RandomForestClassifier_num

test_RandomForestClassifier_num(X_train,X_test,y_train,y_test)

def test_RandomForestClassifier_max_depth(*data):

    '''

    测试 RandomForestClassifier 的预测性能随 max_depth 参数的影响

    '''

    X_train,X_test,y_train,y_test=data

    maxdepths=range(1,20)

    fig=plt.figure()

    ax=fig.add_subplot(1,1,1)

    testing_scores=[]

    training_scores=[]

    for max_depth in maxdepths:

        clf=ensemble.RandomForestClassifier(max_depth=max_depth)

        clf.fit(X_train,y_train)

        training_scores.append(clf.score(X_train,y_train))

        testing_scores.append(clf.score(X_test,y_test))

    ax.plot(maxdepths,training_scores,label="Training Score")

    ax.plot(maxdepths,testing_scores,label="Testing Score")

    ax.set_xlabel("max_depth")

    ax.set_ylabel("score")

    ax.legend(loc="lower right")

    ax.set_ylim(0,1.05)

    plt.suptitle("RandomForestClassifier")

    plt.show()

# 调用 test_RandomForestClassifier_max_depth

test_RandomForestClassifier_max_depth(X_train,X_test,y_train,y_test)

def test_RandomForestClassifier_max_features(*data):

    '''

    测试 RandomForestClassifier 的预测性能随 max_features 参数的影响

    '''

    X_train,X_test,y_train,y_test=data

    max_features=np.linspace(0.01,1.0)

    fig=plt.figure()

    ax=fig.add_subplot(1,1,1)

    testing_scores=[]

    training_scores=[]

    for max_feature in max_features:

        clf=ensemble.RandomForestClassifier(max_features=max_feature)

        clf.fit(X_train,y_train)

        training_scores.append(clf.score(X_train,y_train))

        testing_scores.append(clf.score(X_test,y_test))

    ax.plot(max_features,training_scores,label="Training Score")

    ax.plot(max_features,testing_scores,label="Testing Score")

    ax.set_xlabel("max_feature")

    ax.set_ylabel("score")

    ax.legend(loc="lower right")

    ax.set_ylim(0,1.05)

    plt.suptitle("RandomForestClassifier")

    plt.show()

# 调用 test_RandomForestClassifier_max_features

test_RandomForestClassifier_max_features(X_train,X_test,y_train,y_test)

吴裕雄 python 机器学习——集成学习随机森林RandomForestClassifier分类模型的更多相关文章

吴裕雄 python 机器学习——集成学习随机森林RandomForestRegressor回归模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklear ...
吴裕雄 python 机器学习——集成学习梯度提升决策树GradientBoostingRegressor回归模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklear ...
吴裕雄 python 机器学习——集成学习AdaBoost算法回归模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklear ...
吴裕雄 python 机器学习——集成学习AdaBoost算法分类模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklear ...
机器学习:集成学习:随机森林.GBDT
集成学习(Ensemble Learning) 集成学习的思想是将若干个学习器(分类器&回归器)组合之后产生一个新学习器.弱分类器(weak learner)指那些分类准确率只稍微好于随机猜测 ...
吴裕雄 python 机器学习——伯努利贝叶斯BernoulliNB模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,naive_bayes from skl ...
吴裕雄 python 机器学习——数据预处理过滤式特征选取SelectPercentile模型
from sklearn.feature_selection import SelectPercentile,f_classif #数据预处理过滤式特征选取SelectPercentile模型 def ...
吴裕雄 python 机器学习——数据预处理过滤式特征选取VarianceThreshold模型
from sklearn.feature_selection import VarianceThreshold #数据预处理过滤式特征选取VarianceThreshold模型 def test_Va ...
吴裕雄 python 机器学习——数据预处理字典学习模型
from sklearn.decomposition import DictionaryLearning #数据预处理字典学习DictionaryLearning模型 def test_Diction ...

随机推荐

AtCoder Beginner Contest 154 题解
人生第一场 AtCoder,纪念一下话说年后的 AtCoder 比赛怎么这么少啊(大雾 AtCoder Beginner Contest 154 题解 A - Remaining Balls We ...
docker 的基本命令
查看命令 docker images 查看镜像 docker ps 查看正在运行的容器 docker ps -a 查看所有的容器 docker inspect container-name 记录着 D ...
How to write a paper in a weekend - by Prof. Pete Carr
Key points: don't procrastinate; review the notes and renew the literature search; determine who you ...
Selenium3+python自动化007-警告框
警告框 alert = driver.switch_to.alert alert.text() alert.accpet() alert.dismiss() # 导selenium包 from sel ...
RN开发-windows环境搭建
1.安装jdk,sdk,C++运行环境(cygwin,Windows SDK,mingw),node.js和git 2.设置全局使用指定的镜像打开git-cmd.exe ...
jQuery---版本问题
jQuery的版本官网下载地址:http://jquery.com/download/ jQuery版本有很多,分为1.x 2.x 3.x 大版本分类: 1.x版本:能够兼容IE678浏览器 2.x ...
centos6.8安装教程
特别详细的一个安装教程以及镜像下载等,用虚拟机不会安装或者安装失败的可以参考一下. https://blog.csdn.net/wu_zeqin/article/details/79833046
（转）KMP算法
转自:http://blog.csdn.net/yutianzuijin/article/details/11954939 我们首先用一个图来描述kmp算法的思想.在字符串O中寻找f,当匹配到位置i时 ...
document.getElementById("id").value与$("#id").val()之间的区别
本文链接:https://blog.csdn.net/mottohlm/article/details/78364196....今天在项目中遇到这么一个JS报错:原因是代码中有这么一段:对,就是var ...
vs2019 scanf 解决 C4996问题
1. 首先选择项目 2. 然后选择最下面那行的工程属性, 其后于此处 3. 添加上 :_CRT_SECURE_NO_WARNINGS 最后保存,使用 scanf 读取即无报错了

吴裕雄 python 机器学习——集成学习随机森林RandomForestClassifier分类模型

吴裕雄 python 机器学习——集成学习随机森林RandomForestClassifier分类模型的更多相关文章

随机推荐

热门专题