import numpy as np import matplotlib.pyplot as plt from sklearn import metrics from sklearn import datasets from sklearn.semi_supervised.label_propagation import LabelSpreading def load_data(): ''' 加载数据集 ''' digits = datasets.load_digits() ###### 混洗样…
import numpy as np import matplotlib.pyplot as plt from sklearn import metrics from sklearn import datasets from sklearn.semi_supervised import LabelPropagation def load_data(): ''' 加载数据集 ''' digits = datasets.load_digits() ###### 混洗样本 ######## rng =…
import numpy as np import matplotlib.pyplot as plt from sklearn import cluster from sklearn.metrics import adjusted_rand_score from sklearn.datasets.samples_generator import make_blobs def create_data(centers,num=100,std=0.7): X, labels_true = make_b…
import numpy as np import matplotlib.pyplot as plt from sklearn import cluster from sklearn.metrics import adjusted_rand_score from sklearn.datasets.samples_generator import make_blobs def create_data(centers,num=100,std=0.7): X, labels_true = make_b…
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets, linear_model,svm from sklearn.model_selection import train_test_split def load_data_regression(): ''' 加载用于回归问题的数据集 ''' diabetes = datasets.load_diabetes() #使用 scikit-lea…
import numpy as np import matplotlib.pyplot as plt from sklearn import neighbors, datasets from sklearn.model_selection import train_test_split def create_regression_data(n): ''' 创建回归模型使用的数据集 ''' X =5 * np.random.rand(n, 1) y = np.sin(X).ravel() # 每隔…
import numpy as np import matplotlib.pyplot as plt from sklearn import neighbors, datasets from sklearn.model_selection import train_test_split def load_classification_data(): # 使用 scikit-learn 自带的手写识别数据集 Digit Dataset digits=datasets.load_digits() X…
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets, linear_model,svm from sklearn.model_selection import train_test_split def load_data_regression(): ''' 加载用于回归问题的数据集 ''' diabetes = datasets.load_diabetes() #使用 scikit-lea…
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier,DecisionTreeRegressor def load_data(): ''' 加载用于分类问题的数据集.数据集采用 scikit-…
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier,DecisionTreeRegressor def creat_data(n): np.random.seed(0) X = 5 * np…
import numpy as np from sklearn import datasets,linear_model from sklearn.model_selection import train_test_split def load_data(): diabetes = datasets.load_diabetes() return train_test_split(diabetes.data,diabetes.target,test_size=0.25,random_state=0…
import numpy as np import matplotlib.pyplot as plt from sklearn import cluster from sklearn.metrics import adjusted_rand_score from sklearn.datasets.samples_generator import make_blobs def create_data(centers,num=100,std=0.7): X, labels_true = make_b…
import numpy as np import matplotlib.pyplot as plt from sklearn import mixture from sklearn.metrics import adjusted_rand_score from sklearn.datasets.samples_generator import make_blobs def create_data(centers,num=100,std=0.7): X, labels_true = make_b…
# -*- coding: utf-8 -*- import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,manifold def load_data(): ''' 加载用于降维的数据 ''' # 使用 scikit-learn 自带的 iris 数据集 iris=datasets.load_iris() return iris.data,iris.target #等度量映射Isomap降维模型…
# -*- coding: utf-8 -*- import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,manifold def load_data(): ''' 加载用于降维的数据 ''' # 使用 scikit-learn 自带的 iris 数据集 iris=datasets.load_iris() return iris.data,iris.target #多维缩放降维MDS模型 def…
# -*- coding: utf-8 -*- import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,decomposition def load_data(): ''' 加载用于降维的数据 ''' # 使用 scikit-learn 自带的 iris 数据集 iris=datasets.load_iris() return iris.data,iris.target #核化PCAKerne…
# -*- coding: utf-8 -*- import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,decomposition def load_data(): ''' 加载用于降维的数据 ''' # 使用 scikit-learn 自带的 iris 数据集 iris=datasets.load_iris() return iris.data,iris.target #超大规模数据集降维I…
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,naive_bayes from sklearn.model_selection import train_test_split # 加载 scikit-learn 自带的 digits 数据集 def load_data(): ''' 加载用于分类问题的数据集.这里使用 scikit-learn 自带的 digits 数据集 ''' d…
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,naive_bayes from sklearn.model_selection import train_test_split # 加载 scikit-learn 自带的 digits 数据集 def load_data(): ''' 加载用于分类问题的数据集.这里使用 scikit-learn 自带的 digits 数据集 ''' d…
from sklearn.feature_selection import SelectPercentile,f_classif #数据预处理过滤式特征选取SelectPercentile模型 def test_SelectKBest(): X=[[1,2,3,4,5], [5,4,3,2,1], [3,3,3,3,3,], [1,1,1,1,1]] y=[0,1,0,1] print("before transform:",X) selector=SelectPercentile(s…
from sklearn.feature_selection import VarianceThreshold #数据预处理过滤式特征选取VarianceThreshold模型 def test_VarianceThreshold(): X=[[100,1,2,3], [100,4,5,6], [100,7,8,9], [101,11,12,13]] selector=VarianceThreshold(1) selector.fit(X) print("Variances is %s"…
from sklearn.preprocessing import Normalizer #数据预处理正则化Normalizer模型 def test_Normalizer(): X=[[1,2,3,4,5], [5,4,3,2,1], [1,3,5,2,4,], [2,4,1,3,5]] print("before transform:",X) normalizer=Normalizer(norm='l2') print("after transform:",no…
from sklearn.preprocessing import MaxAbsScaler #数据预处理标准化MaxAbsScaler模型 def test_MaxAbsScaler(): X=[[1,5,1,2,10], [2,6,3,2,7], [3,7,5,6,4,], [4,8,7,8,1]] print("before transform:",X) scaler=MaxAbsScaler() scaler.fit(X) print("scale_ is :&quo…
from sklearn.preprocessing import StandardScaler #数据预处理标准化StandardScaler模型 def test_StandardScaler(): X=[[1,5,1,2,10], [2,6,3,2,7], [3,7,5,6,4,], [4,8,7,8,1]] print("before transform:",X) scaler=StandardScaler() scaler.fit(X) print("scale_…
from sklearn.preprocessing import MinMaxScaler #数据预处理标准化MinMaxScaler模型 def test_MinMaxScaler(): X=[[1,5,1,2,10], [2,6,3,2,7], [3,7,5,6,4,], [4,8,7,8,1]] print("before transform:",X) scaler=MinMaxScaler(feature_range=(0,2)) scaler.fit(X) print(&q…
from sklearn.preprocessing import OneHotEncoder #数据预处理二元化OneHotEncoder模型 def test_OneHotEncoder(): X=[[1,2,3,4,5], [5,4,3,2,1], [3,3,3,3,3,], [1,1,1,1,1]] print("before transform:",X) encoder=OneHotEncoder(sparse=False) encoder.fit(X) print(&quo…
from sklearn.preprocessing import Binarizer #数据预处理二元化Binarizer模型 def test_Binarizer(): X=[[1,2,3,4,5], [5,4,3,2,1], [3,3,3,3,3,], [1,1,1,1,1]] print("before transform:",X) binarizer=Binarizer(threshold=2.5) print("after transform:",bin…
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklearn.model_selection import train_test_split def load_data_regression(): ''' 加载用于回归问题的数据集 ''' #使用 scikit-learn 自带的一个糖尿病病人的数据集 diabetes = datasets.load_di…
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklearn.model_selection import train_test_split def load_data_classification(): ''' 加载用于分类问题的数据集 ''' # 使用 scikit-learn 自带的 digits 数据集 digits=datasets.load_d…
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklearn.model_selection import train_test_split def load_data_regression(): ''' 加载用于回归问题的数据集 ''' #使用 scikit-learn 自带的一个糖尿病病人的数据集 diabetes = datasets.load_di…