吴裕雄 python 机器学习——分类决策树模型

import numpy as np

import matplotlib.pyplot as plt

from sklearn import datasets

from sklearn.model_selection import train_test_split

from sklearn.tree import DecisionTreeClassifier,DecisionTreeRegressor

def load_data():

    '''

    加载用于分类问题的数据集。数据集采用 scikit-learn 自带的 iris 数据集

    '''

    # scikit-learn 自带的 iris 数据集

    iris=datasets.load_iris()

    X_train=iris.data

    y_train=iris.target

    return train_test_split(X_train, y_train,test_size=0.25,random_state=0,stratify=y_train)

#分类决策树DecisionTreeClassifier模型

def test_DecisionTreeClassifier(*data):

    X_train,X_test,y_train,y_test=data

    clf = DecisionTreeClassifier()

    clf.fit(X_train, y_train)

    print("Training score:%f"%(clf.score(X_train,y_train)))

    print("Testing score:%f"%(clf.score(X_test,y_test)))

# 产生用于分类问题的数据集

X_train,X_test,y_train,y_test=load_data()

# 调用 test_DecisionTreeClassifier

test_DecisionTreeClassifier(X_train,X_test,y_train,y_test)

def test_DecisionTreeClassifier_criterion(*data):

    '''

    测试 DecisionTreeClassifier 的预测性能随 criterion 参数的影响

    '''

    X_train,X_test,y_train,y_test=data

    criterions=['gini','entropy']

    for criterion in criterions:

        clf = DecisionTreeClassifier(criterion=criterion)

        clf.fit(X_train, y_train)

        print("criterion:%s"%criterion)

        print("Training score:%f"%(clf.score(X_train,y_train)))

        print("Testing score:%f"%(clf.score(X_test,y_test)))

# 调用 test_DecisionTreeClassifier_criterion

test_DecisionTreeClassifier_criterion(X_train,X_test,y_train,y_test)

def test_DecisionTreeClassifier_splitter(*data):

    '''

    测试 DecisionTreeClassifier 的预测性能随划分类型的影响

    '''

    X_train,X_test,y_train,y_test=data

    splitters=['best','random']

    for splitter in splitters:

        clf = DecisionTreeClassifier(splitter=splitter)

        clf.fit(X_train, y_train)

        print("splitter:%s"%splitter)

        print("Training score:%f"%(clf.score(X_train,y_train)))

        print("Testing score:%f"%(clf.score(X_test,y_test)))

# 调用 test_DecisionTreeClassifier_splitter

test_DecisionTreeClassifier_splitter(X_train,X_test,y_train,y_test)

def test_DecisionTreeClassifier_depth(*data,maxdepth):

    '''

    测试 DecisionTreeClassifier 的预测性能随 max_depth 参数的影响

    '''

    X_train,X_test,y_train,y_test=data

    depths=np.arange(1,maxdepth)

    training_scores=[]

    testing_scores=[]

    for depth in depths:

        clf = DecisionTreeClassifier(max_depth=depth)

        clf.fit(X_train, y_train)

        training_scores.append(clf.score(X_train,y_train))

        testing_scores.append(clf.score(X_test,y_test))

    ## 绘图

    fig=plt.figure()

    ax=fig.add_subplot(1,1,1)

    ax.plot(depths,training_scores,label="traing score",marker='o')

    ax.plot(depths,testing_scores,label="testing score",marker='*')

    ax.set_xlabel("maxdepth")

    ax.set_ylabel("score")

    ax.set_title("Decision Tree Classification")

    ax.legend(framealpha=0.5,loc='best')

    plt.show()

# 调用 test_DecisionTreeClassifier_depth

test_DecisionTreeClassifier_depth(X_train,X_test,y_train,y_test,maxdepth=100)

import os

import pydotplus

from io import StringIO

from sklearn.tree import export_graphviz

from sklearn.tree import DecisionTreeClassifier,DecisionTreeRegressor

X_train,X_test,y_train,y_test=load_data()

clf = DecisionTreeClassifier()

clf.fit(X_train,y_train)

export_graphviz(clf,"F://out")

吴裕雄 python 机器学习——分类决策树模型的更多相关文章

吴裕雄 python 机器学习——回归决策树模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_s ...
吴裕雄 python 机器学习——核化PCAKernelPCA模型
# -*- coding: utf-8 -*- import numpy as np import matplotlib.pyplot as plt from sklearn import datas ...
吴裕雄 python 机器学习——KNN分类KNeighborsClassifier模型
import numpy as np import matplotlib.pyplot as plt from sklearn import neighbors, datasets from skle ...
吴裕雄 python 机器学习——支持向量机SVM非线性分类SVC模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets, linear_model,svm fr ...
吴裕雄 python 机器学习——支持向量机线性分类LinearSVC模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets, linear_model,svm fr ...
吴裕雄 python 机器学习——集成学习梯度提升决策树GradientBoostingRegressor回归模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklear ...
吴裕雄 python 机器学习——集成学习随机森林RandomForestClassifier分类模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklear ...
吴裕雄 python 机器学习——集成学习AdaBoost算法分类模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklear ...
吴裕雄 python 机器学习——模型选择分类问题性能度量
import numpy as np import matplotlib.pyplot as plt from sklearn.svm import SVC from sklearn.datasets ...

随机推荐

VS调试快捷键配置更改
VS进行调试时,默认情况下需按下Fn+F5等组合按键,手短的用起来很不便利如何去掉组合键只按下F5? 解决:即按下Fn+Esc,然后就可以直接按下F1-F12使用VS的快捷键,如果想回到组合键也是同 ...
为什么选择Go语言 GO语言都能做什么产品
Go语言,又称Golang,是Google开发的一款静态强类型.编译型.并发型,并具有垃圾回收机制的编程语言,它的运行速度非常之快,同时还有如下特性:具有一流的标准库.无继承关系.支持多核:同时它还有 ...
php使用insert语句动态添加用户
<html> <head> <title>Adding User</title> </head> <body> <h2&g ...
ulimit -a
在linux中执行ulimit -a 即可查询linux相关的参数用ulimit命令是可以修改这些配置的命令的格式:ulimit [-SHacdefilmnpqrstuvx] [limit] 中间的 ...
sequelize 中文文档
https://demopark.github.io/sequelize-docs-Zh-CN/
7_CentOS下安装和卸载AdobeReader
曾经最喜欢Fedora 版本的Linux,但是因为现在Fedora的界面实在太花里胡哨了,所以最近开始捣鼓CenOS,本来 RedHat的EL版本也是一个不错的选择,最后想想还是用社区版的. 话说Ce ...
Oracle 外键级联更新
Oracle数据库中,外键约束只允许级联删除,不允许级联更新,因此,如果想要实现主表数据更新后,子表外键自动更新,只能取消外键关系,通过前端程序来维护实现完整引用,一个代替的解决方案是使用延迟约束和触 ...
UEFI引导过程及windows引导修复
UEFI启动是一种新的主板引导项.传统引导方式就是Legacy模式. CSM的选项是UEFI模拟Legacy模式启动,选中后则可使用Legacy模式启动机器. Legacy模式仅支持传统的MBR分区, ...
Windows FFMPEG开发环境配置
1.去FFMPEG网站上下载Dev版本的库,里面有我们需要的头文件和lib文件,然后下载Shared版本的库,里面有我们需要的dll文件 http://ffmpeg.zeranoe.com/build ...
2018-2019-2 20175227张雪莹《Java程序设计》实验三《敏捷开发与XP实践》
2018-2019-2 20175227张雪莹<Java程序设计> 实验三 <敏捷开发与XP实践> 实验报告封面课程:Java程序设计班级:1752班姓名:张雪莹学号: ...

吴裕雄 python 机器学习——分类决策树模型

吴裕雄 python 机器学习——分类决策树模型的更多相关文章

随机推荐

热门专题