# 导入第三方包import pandas as pdimport numpy as np import matplotlib.pyplot as pltfrom sklearn.cluster import KMeansfrom sklearn import metrics # 随机生成三组二元正态分布随机数 np.random.seed(1234)mean1 = [0.5, 0.5]cov1 = [[0.3, 0], [0, 0.3]]x1, y1 = np.random.multivari…
# 导入第三方包import pandas as pdimport matplotlib.pyplot as plt # 读入数据default = pd.read_excel(r'F:\\python_Data_analysis_and_mining\\14\\default of credit card clients.xls')print(default.shape)print(default.head())print(default.columns)# 数据集中是否违约的客户比例# 为确…
# 导入模块import pandas as pdimport numpy as np # 构造序列gdp1 = pd.Series([2.8,3.01,8.99,8.59,5.18])print(gdp1)# 取出gdp1中的第一.第四和第五个元素print('行号风格的序列:\n',gdp1[[0,3,4]])# 数学函数--取对数print('通过numpy函数:\n',np.log(gdp1))# 平均gdpprint('通过numpy函数:\n',np.mean(gdp1))print…
list1 = ['张三','男',33,'江苏','硕士','已婚',['身高178','体重72']]# 取出第一个元素print(list1[0])# 取出第四个元素print(list1[3])# 取出最后一个元素print(list1[6])# 取出“体重72”这个值print(list1[6][1]) # 取出最后一个元素print(list1[-1])# 取出“身高178”这个值print(list1[-1][0])# 取出倒数第三个元素print(list1[-3]) list2…
# 导入第三方模块import pandas as pdimport numpy as npimport matplotlib.pyplot as pltimport seaborn as snsfrom sklearn import clusterfrom sklearn.datasets.samples_generator import make_blobs # 模拟数据集X,y = make_blobs(n_samples = 2000, centers = [[-1,-2],[1,3]]…
import pandas as pd # 导入第三方模块from sklearn import svmfrom sklearn import model_selectionfrom sklearn import metrics # 读取外部数据letters = pd.read_csv(r'F:\\python_Data_analysis_and_mining\\13\\letterdata.csv')print(letters.shape)# 数据前5行print(letters.head(…
# 导入第三方包import pandas as pd # 导入数据Knowledge = pd.read_excel(r'F:\\python_Data_analysis_and_mining\\11\\Knowledge.xlsx')print(Knowledge.shape)# 返回前5行数据print(Knowledge.head()) # 构造训练集和测试集# 导入第三方模块from sklearn import model_selection# 将数据集拆分为训练集和测试集predi…
import numpy as npimport pandas as pdimport matplotlib.pyplot as plt # 自定义绘制ks曲线的函数def plot_ks(y_test, y_score, positive_flag): # 对y_test,y_score重新设置索引 y_test.index = np.arange(len(y_test)) #y_score.index = np.arange(len(y_score)) # 构建目标数据集 target_da…
# 导入第三方模块import pandas as pdimport numpy as npimport matplotlib.pyplot as pltfrom sklearn import model_selectionfrom sklearn.linear_model import Ridge,RidgeCV # 读取糖尿病数据集diabetes = pd.read_excel(r'F:\\python_Data_analysis_and_mining\\08\\diabetes.xlsx…
# 饼图的绘制# 导入第三方模块import matplotlibimport matplotlib.pyplot as plt plt.rcParams['font.sans-serif']=['Simhei']plt.rcParams['axes.unicode_minus']=Falseziti = matplotlib.font_manager.FontProperties(fname='C:\Windows\Fonts\simsun.ttc') # 构造数据edu = [0.2515,…