# 导入第三方包import pandas as pdimport numpy as np import matplotlib.pyplot as pltfrom sklearn.cluster import KMeansfrom sklearn import metrics # 随机生成三组二元正态分布随机数 np.random.seed(1234)mean1 = [0.5, 0.5]cov1 = [[0.3, 0], [0, 0.3]]x1, y1 = np.random.multivari…
# 导入第三方模块import pandas as pdimport numpy as npimport matplotlib.pyplot as pltimport seaborn as snsfrom sklearn import clusterfrom sklearn.datasets.samples_generator import make_blobs # 模拟数据集X,y = make_blobs(n_samples = 2000, centers = [[-1,-2],[1,3]]…
# 导入模块,并重命名为npimport numpy as np# 单个列表创建一维数组arr1 = np.array([3,10,8,7,34,11,28,72])print('一维数组:\n',arr1)# 一维数组元素的获取print(arr1[[2,3,5,7]]) # 嵌套元组创建二维数组arr2 = np.array(((8.5,6,4.1,2,0.7),(1.5,3,5.4,7.3,9),(3.2,3,3.8,3,3),(11.2,13.4,15.6,17.8,19)))print…
# 导入第三方包import pandas as pdimport matplotlib.pyplot as plt # 读入数据default = pd.read_excel(r'F:\\python_Data_analysis_and_mining\\14\\default of credit card clients.xls')print(default.shape)print(default.head())print(default.columns)# 数据集中是否违约的客户比例# 为确…
import pandas as pd # 导入第三方模块from sklearn import svmfrom sklearn import model_selectionfrom sklearn import metrics # 读取外部数据letters = pd.read_csv(r'F:\\python_Data_analysis_and_mining\\13\\letterdata.csv')print(letters.shape)# 数据前5行print(letters.head(…
# 导入第三方包import pandas as pd # 导入数据Knowledge = pd.read_excel(r'F:\\python_Data_analysis_and_mining\\11\\Knowledge.xlsx')print(Knowledge.shape)# 返回前5行数据print(Knowledge.head()) # 构造训练集和测试集# 导入第三方模块from sklearn import model_selection# 将数据集拆分为训练集和测试集predi…
import numpy as npimport pandas as pdimport matplotlib.pyplot as plt # 自定义绘制ks曲线的函数def plot_ks(y_test, y_score, positive_flag): # 对y_test,y_score重新设置索引 y_test.index = np.arange(len(y_test)) #y_score.index = np.arange(len(y_score)) # 构建目标数据集 target_da…
# 导入第三方模块import pandas as pdimport numpy as npimport matplotlib.pyplot as pltfrom sklearn import model_selectionfrom sklearn.linear_model import Ridge,RidgeCV # 读取糖尿病数据集diabetes = pd.read_excel(r'F:\\python_Data_analysis_and_mining\\08\\diabetes.xlsx…
# 饼图的绘制# 导入第三方模块import matplotlibimport matplotlib.pyplot as plt plt.rcParams['font.sans-serif']=['Simhei']plt.rcParams['axes.unicode_minus']=Falseziti = matplotlib.font_manager.FontProperties(fname='C:\Windows\Fonts\simsun.ttc') # 构造数据edu = [0.2515,…
# 导入模块import pandas as pdimport numpy as np # 构造序列gdp1 = pd.Series([2.8,3.01,8.99,8.59,5.18])print(gdp1)# 取出gdp1中的第一.第四和第五个元素print('行号风格的序列:\n',gdp1[[0,3,4]])# 数学函数--取对数print('通过numpy函数:\n',np.log(gdp1))# 平均gdpprint('通过numpy函数:\n',np.mean(gdp1))print…