import pandas as pd # 导入第三方模块from sklearn import svmfrom sklearn import model_selectionfrom sklearn import metrics # 读取外部数据letters = pd.read_csv(r'F:\\python_Data_analysis_and_mining\\13\\letterdata.csv')print(letters.shape)# 数据前5行print(letters.head(…
# 导入第三方模块import pandas as pdimport numpy as npimport matplotlib.pyplot as pltimport seaborn as snsfrom sklearn import clusterfrom sklearn.datasets.samples_generator import make_blobs # 模拟数据集X,y = make_blobs(n_samples = 2000, centers = [[-1,-2],[1,3]]…
# 导入第三方包import pandas as pd # 导入数据Knowledge = pd.read_excel(r'F:\\python_Data_analysis_and_mining\\11\\Knowledge.xlsx')print(Knowledge.shape)# 返回前5行数据print(Knowledge.head()) # 构造训练集和测试集# 导入第三方模块from sklearn import model_selection# 将数据集拆分为训练集和测试集predi…
# 饼图的绘制# 导入第三方模块import matplotlibimport matplotlib.pyplot as plt plt.rcParams['font.sans-serif']=['Simhei']plt.rcParams['axes.unicode_minus']=Falseziti = matplotlib.font_manager.FontProperties(fname='C:\Windows\Fonts\simsun.ttc') # 构造数据edu = [0.2515,…
# 导入模块,并重命名为npimport numpy as np# 单个列表创建一维数组arr1 = np.array([3,10,8,7,34,11,28,72])print('一维数组:\n',arr1)# 一维数组元素的获取print(arr1[[2,3,5,7]]) # 嵌套元组创建二维数组arr2 = np.array(((8.5,6,4.1,2,0.7),(1.5,3,5.4,7.3,9),(3.2,3,3.8,3,3),(11.2,13.4,15.6,17.8,19)))print…
list1 = ['张三','男',33,'江苏','硕士','已婚',['身高178','体重72']]# 取出第一个元素print(list1[0])# 取出第四个元素print(list1[3])# 取出最后一个元素print(list1[6])# 取出“体重72”这个值print(list1[6][1]) # 取出最后一个元素print(list1[-1])# 取出“身高178”这个值print(list1[-1][0])# 取出倒数第三个元素print(list1[-3]) list2…
# 导入第三方包import pandas as pdimport numpy as np import matplotlib.pyplot as pltfrom sklearn.cluster import KMeansfrom sklearn import metrics # 随机生成三组二元正态分布随机数 np.random.seed(1234)mean1 = [0.5, 0.5]cov1 = [[0.3, 0], [0, 0.3]]x1, y1 = np.random.multivari…
# 导入第三方包import pandas as pdimport matplotlib.pyplot as plt # 读入数据default = pd.read_excel(r'F:\\python_Data_analysis_and_mining\\14\\default of credit card clients.xls')print(default.shape)print(default.head())print(default.columns)# 数据集中是否违约的客户比例# 为确…
import numpy as npimport pandas as pdimport matplotlib.pyplot as plt # 自定义绘制ks曲线的函数def plot_ks(y_test, y_score, positive_flag): # 对y_test,y_score重新设置索引 y_test.index = np.arange(len(y_test)) #y_score.index = np.arange(len(y_score)) # 构建目标数据集 target_da…
# 导入第三方模块import pandas as pdimport numpy as npimport matplotlib.pyplot as pltfrom sklearn import model_selectionfrom sklearn.linear_model import Ridge,RidgeCV # 读取糖尿病数据集diabetes = pd.read_excel(r'F:\\python_Data_analysis_and_mining\\08\\diabetes.xlsx…