吴裕雄 python 机器学习——数据预处理嵌入式特征选择

import numpy as np

import  matplotlib.pyplot as plt

from sklearn.svm import LinearSVC

from sklearn.linear_model import Lasso

from sklearn.model_selection import train_test_split

from sklearn.feature_selection import SelectFromModel

from sklearn.datasets import load_digits,load_diabetes

#数据预处理嵌入式特征选择SelectFromModel模型

def test_SelectFromModel():

    digits=load_digits()

    X=digits.data

    y=digits.target

    estimator=LinearSVC(penalty='l1',dual=False)

    selector=SelectFromModel(estimator=estimator,threshold='mean')

    selector.fit(X,y)

    selector.transform(X)

    print("Threshold %s"%selector.threshold_)

    print("Support is %s"%selector.get_support(indices=True))

#调用test_SelectFromModel()

test_SelectFromModel()

def load_diabetes():

    digits=load_digits()

    X=digits.data

    y=digits.target

    return X,y

def test_Lasso(*data):

    '''

    测试 alpha 与稀疏性的关系

    '''

    X,y=data

    alphas=np.logspace(-2,2)

    zeros=[]

    for alpha in alphas:

        regr=Lasso(alpha=alpha)

        regr.fit(X,y)

        ### 计算零的个数 ###

        num=0

        for ele in regr.coef_:

            if abs(ele) < 1e-5:num+=1

        zeros.append(num)

    ##### 绘图

    fig=plt.figure()

    ax=fig.add_subplot(1,1,1)

    ax.plot(alphas,zeros)

    ax.set_xlabel(r"$\alpha$")

    ax.set_xscale("log")

    ax.set_ylim(0,X.shape[1]+1)

    ax.set_ylabel("zeros in coef")

    ax.set_title("Sparsity In Lasso")

    plt.show()

X,y = load_diabetes()

test_Lasso(X,y)

def test_LinearSVC(*data):

    '''

    测试 C  与 稀疏性的关系

    '''

    X,y=data

    Cs=np.logspace(-2,2)

    zeros=[]

    for C in Cs:

        clf=LinearSVC(C=C,penalty='l1',dual=False)

        clf.fit(X,y)

     ### 计算零的个数 ###

        num=0

        for row in clf.coef_:

            for ele in row:

                if abs(ele) < 1e-5:num+=1

        zeros.append(num)

    ##### 绘图

    fig=plt.figure()

    ax=fig.add_subplot(1,1,1)

    ax.plot(Cs,zeros)

    ax.set_xlabel("C")

    ax.set_xscale("log")

    ax.set_ylabel("zeros in coef")

    ax.set_title("Sparsity In SVM")

    plt.show()

X,y = load_diabetes()

test_LinearSVC(X,y)

吴裕雄 python 机器学习——数据预处理嵌入式特征选择的更多相关文章

吴裕雄 python 机器学习——数据预处理包裹式特征选取模型
from sklearn.svm import LinearSVC from sklearn.datasets import load_iris from sklearn.feature_select ...
吴裕雄 python 机器学习——数据预处理过滤式特征选取SelectPercentile模型
from sklearn.feature_selection import SelectPercentile,f_classif #数据预处理过滤式特征选取SelectPercentile模型 def ...
吴裕雄 python 机器学习——数据预处理过滤式特征选取VarianceThreshold模型
from sklearn.feature_selection import VarianceThreshold #数据预处理过滤式特征选取VarianceThreshold模型 def test_Va ...
吴裕雄 python 机器学习——数据预处理正则化Normalizer模型
from sklearn.preprocessing import Normalizer #数据预处理正则化Normalizer模型 def test_Normalizer(): X=[[1,2,3, ...
吴裕雄 python 机器学习——数据预处理标准化MaxAbsScaler模型
from sklearn.preprocessing import MaxAbsScaler #数据预处理标准化MaxAbsScaler模型 def test_MaxAbsScaler(): X=[[ ...
吴裕雄 python 机器学习——数据预处理标准化StandardScaler模型
from sklearn.preprocessing import StandardScaler #数据预处理标准化StandardScaler模型 def test_StandardScaler() ...
吴裕雄 python 机器学习——数据预处理标准化MinMaxScaler模型
from sklearn.preprocessing import MinMaxScaler #数据预处理标准化MinMaxScaler模型 def test_MinMaxScaler(): X=[[ ...
吴裕雄 python 机器学习——数据预处理二元化OneHotEncoder模型
from sklearn.preprocessing import OneHotEncoder #数据预处理二元化OneHotEncoder模型 def test_OneHotEncoder(): X ...
吴裕雄 python 机器学习——数据预处理二元化Binarizer模型
from sklearn.preprocessing import Binarizer #数据预处理二元化Binarizer模型 def test_Binarizer(): X=[[1,2,3,4,5 ...

随机推荐

CodeForces - 645 C.Enduring Exodus
快乐二分用前缀和随便搞一下 #include <cstdio> using namespace std; ; int p[N]; ; inline int msum(int a, int ...
[TJOI2017] DNA - 后缀数组,稀疏表
[TJOI2017] DNA Description 求模式串与主串的匹配次数,容错不超过三个字符. Solution 枚举每个开始位置,进行暴力匹配,直到失配次数用光或者匹配成功.考虑到容错量很小, ...
[CF705B] Spider Man - 博弈论
[CF705B] Description ICG 游戏有若干个环,每次操作将一个环断成非空的两部分,节点数总和不变.集合初态为空,每次向集合中添加一个环,询问当前集合用于游戏的胜负. \(n \le ...
js清空子节点
删除全部子节点 function removeAllChild(){ var div = document.getElementById("div1"); while(div.ha ...
springboot中集成memcached
前言 Memcached 是一个高性能的分布式内存对象缓存系统,其存储性能在某些方面不比redis差,甚至在文本类型数据的存储上性能略优于redis,本文将介绍如何在springboot中集成memc ...
pytorch怎么入门学习
pytorch怎么入门学习 https://www.zhihu.com/question/55720139
2分钟就能学会的【Google/百度搜索大法】了解一下？
之前我在知乎回答了「日常生活中有哪些十分钟就能学会并可以终生受用的技能」,现在也整理分享给公众号的朋友们. 作为一个入坑8年国际贸易的老阿姨,真心推荐[google搜索大法](同样适用于百度). 2分 ...
js判断有无属性及新添属性
1.Object 判断有无新属性 obj.hasOwnProperty(propertyName) //有无指定属性propertyName Object.keys(obj) ...
C++如何输入单行和多行带空格的字符串并将字符串倒序输出
首先,我们知道在C++中,空格或者回车被认为是字符串的分割标志,使用cin输入string类的字符串时遇到会自动停止接收输入例如,当如下程序时: #include <bits/stdc++.h ...
chrome firefox浏览器屏蔽百度热搜
我是原文操作点击拦截元素,然后选择页面元素,添加

吴裕雄 python 机器学习——数据预处理嵌入式特征选择

吴裕雄 python 机器学习——数据预处理嵌入式特征选择的更多相关文章

随机推荐

热门专题