原文地址:http://www.jianshu.com/p/9bf9e2add795

AdaBoost

问题描述



程序实现

# coding:utf-8

import math
import numpy as np
import matplotlib.pyplot as plt def ReadData(dataFile): with open(dataFile, 'r') as f:
lines = f.readlines()
data_list = []
for line in lines:
line = line.strip().split()
data_list.append([float(l) for l in line])
dataArray = np.array(data_list)
return dataArray def sign(n): if(n>=0):
return 1
else:
return -1 def GetSortedArray(dataArray,i):
# 根据dataArray第i列的值对dataArray进行从小到大的排序
data_list=dataArray.tolist()
sorted_data_list=sorted(data_list,key=lambda x:x[i],reverse=False)
sortedDataArray=np.array(sorted_data_list)
return sortedDataArray def GetUZeroOneError(pred,dataY,u):
return np.sum(u*np.not_equal(pred,dataY))/np.sum(u) def GetZeroOneError(pred,dataY):
return np.sum(np.not_equal(pred,dataY))/dataY.shape[0] def decision_stump(dataArray,u): num_data=dataArray.shape[0]
num_dim=dataArray.shape[1]-1
min_e=np.inf
min_s = np.inf
min_d=np.inf
min_theta = np.inf
min_pred = np.zeros((num_data,))
for d in range(num_dim):
sortedDataArray=GetSortedArray(dataArray,d) # 确保有效theta
d_min_e=np.inf
d_min_s = np.inf
d_min_theta = np.inf
d_min_pred = np.zeros((num_data,))
for s in [-1.0,1.0]:
for i in range(num_data):
if(i==0):
theta=-np.inf
pred=s*np.ones((num_data,))
else:
if sortedDataArray[i-1,d]==sortedDataArray[i,d]:
continue
theta=(sortedDataArray[i-1,d]+sortedDataArray[i,d])/2
pred=np.zeros((num_data,))
for n in range(num_data):
pred[n]=s*sign(dataArray[n,d]-theta)
d_now_e=GetUZeroOneError(pred,dataArray[:,-1],u)
if(d_now_e<d_min_e):
d_min_e=d_now_e
d_min_s=s
d_min_theta=theta
d_min_pred=pred
if(d_min_e<min_e):
min_e=d_min_e
min_s=d_min_s
min_d=d
min_theta=d_min_theta
min_pred=d_min_pred
return min_s,min_d,min_theta,min_pred,min_e def Pred(paraList,dataX):
# paraList=[s,d,theta]
num_data=dataX.shape[0]
pred=np.zeros((num_data,))
for i in range(num_data):
pred[i]=paraList[0]*sign(dataX[i,paraList[1]]-paraList[2])
return pred def plot_line_chart(X=np.arange(0,300,1).tolist(),Y=np.arange(0,300,1).tolist(),nameX="t",nameY="Ein(gt)",saveName="12.png"): plt.figure(figsize=(30,12))
plt.plot(X,Y,'b')
plt.plot(X,Y,'ro')
plt.xlim((X[0]-1,X[-1]+1))
for (x,y) in zip(X,Y):
if(x%10==0):
plt.text(x+0.1,y,str(round(y,4)))
plt.xlabel(nameX)
plt.ylabel(nameY)
plt.title(nameY+" versus "+nameX)
plt.savefig(saveName)
return if __name__=="__main__": dataArray=ReadData("hw2_adaboost_train.dat")
dataY=dataArray[:,-1]
dataX=dataArray[:,:-1]
num_data=dataArray.shape[0]
u=np.full(shape=(num_data,),fill_value=1/num_data)
ein_g_list=[]
alpha_list=[]
g_list=[]
ein_G_list=[]
u_sum_list=[]
epi_list=[]
min_pred_list=[] # adaboost
for t in range(300):
u_sum_list.append(np.sum(u))
min_s,min_d,min_theta,min_pred,epi=decision_stump(dataArray,u)
g_list.append([min_s,min_d,min_theta])
min_pred_list.append(min_pred)
ein_g=GetZeroOneError(min_pred,dataY)
ein_g_list.append(ein_g)
epi_list.append(epi)
para=math.sqrt((1-epi)/epi)
alpha_list.append(math.log(para))
for i in range(num_data):
if min_pred[i]==dataY[i]:
u[i]/=para
else:
u[i]*=para
predG=np.zeros((num_data,))
for ta in range(t):
predG+=alpha_list[ta]*min_pred_list[ta]
for n in range(num_data):
predG[n]=sign(predG[n])
ein_G_list.append(GetZeroOneError(predG,dataY)) # 12
plot_line_chart(Y=ein_g_list)
print("Ein(g1):",ein_g_list[0])
print("alpha1:",alpha_list[0]) # 14
plot_line_chart(Y=ein_G_list,nameY="Ein(Gt)",saveName="14.png")
print("Ein(G):",ein_G_list[-1]) # 15
plot_line_chart(Y=u_sum_list, nameY="Ut", saveName="15.png")
print("U2:",u_sum_list[1])
print("UT:",u_sum_list[-1]) # 16
plot_line_chart(Y=epi_list,nameY="epsilon_t",saveName="16.png")
print("the minimum value of epsilon_t:",min(epi_list)) testArray=ReadData("hw2_adaboost_test.dat")
num_test=testArray.shape[0]
testX=testArray[:,:-1]
testY=testArray[:,-1]
pred_g_list=[]
eout_g_list=[]
eout_G_list=[]
for t in range(300):
pred_g=Pred(g_list[t],testX)
pred_g_list.append(pred_g)
eout_g_list.append(GetZeroOneError(pred_g,testY))
pred_G=np.zeros((num_test,))
for ta in range(t):
pred_G+=alpha_list[ta]*pred_g_list[ta]
sign_ufunc=np.frompyfunc(sign,1,1)
pred_G=sign_ufunc(pred_G)
eout_G_list.append(GetZeroOneError(pred_G,testY)) # 17
plot_line_chart(Y=eout_g_list, nameY="Eout(gt)", saveName="17.png")
print("Eout(g1):",eout_g_list[0]) # 18
plot_line_chart(Y=eout_G_list, nameY="Eout(Gt)", saveName="18.png")
print("Eout(G):",eout_G_list[-1])

运行结果













Kernel Ridge Regression

问题描述

程序实现

# coding:utf-8

import numpy as np
import math def ReadData(dataFile): with open(dataFile, 'r') as f:
lines = f.readlines()
data_list = []
for line in lines:
line = line.strip().split()
data_list.append([1.0]+[float(l) for l in line])
dataArray = np.array(data_list)
return dataArray def sign(n): if(n>=0):
return 1
else:
return -1 def RBFKernel(X1,X2,gamma):
return math.exp(-gamma*np.sum(np.square(X1-X2))) def GetKernelMatrix(trainX,dataX,gamma):
num_train = trainX.shape[0]
num_data = dataX.shape[0]
mat = np.zeros((num_train,num_data))
for i in range(num_train):
if num_train==num_data and np.equal(trainX,dataX).all():
for j in range(i+1):
mat[i][j] = RBFKernel(dataX[i, :], dataX[j, :], gamma)
if(i!=j):
mat[j][i]=mat[i][j]
else:
for j in range(num_data):
mat[i][j]=RBFKernel(trainX[i,:],dataX[j,:],gamma)
return mat def GetZeroOneError(pred,dataY):
return np.sum(np.not_equal(pred,dataY))/dataY.shape[0] def KernelRidgeRegression(trainArray,lamb,gamma):
num_train=trainArray.shape[0]
trainX=trainArray[:,:-1]
trainY=trainArray[:,-1].reshape((num_train,1))
K=GetKernelMatrix(trainX,trainX,gamma)
beta=np.dot(np.linalg.inv(lamb*np.eye(num_train)+K),trainY)
return beta def Predict(trainX,dataX,beta,gamma):
num_data=dataX.shape[0]
pred=np.zeros((num_data,))
K=GetKernelMatrix(trainX,dataX,gamma)
pred=np.dot(K.transpose(),beta).reshape((num_data,))
for n in range(num_data):
pred[n]=sign(pred[n])
return pred if __name__=="__main__":
dataArray=ReadData("hw2_lssvm_all.dat")
trainArray=dataArray[:400,:]
testArray=dataArray[400:,:]
gammaList=[32,2,0.125]
lambdaList=[0.001,1,1000]
ein_list=[]
eout_list=[]
for l in lambdaList:
for g in gammaList:
beta=KernelRidgeRegression(trainArray,l,g)
ein_list.append(GetZeroOneError(Predict(trainArray[:,:-1],trainArray[:,:-1],beta,g),trainArray[:,-1]))
eout_list.append(GetZeroOneError(Predict(trainArray[:,:-1],testArray[:,:-1],beta,g),testArray[:,-1]))
min_ein=min(ein_list)
min_ein_id=ein_list.index(min_ein)
min_eout=min(eout_list)
min_eout_id=eout_list.index(min_eout) # 19
print("the minimum Ein(g):",min_ein,",the corresponding parameter combinations: gamma=",gammaList[min_ein_id%3],",lambda=",lambdaList[min_ein_id//3])
# 20
print("the minimum Eout(g):",min_eout,",the corresponding parameter combinations: gamma=",gammaList[min_eout_id%3],",lambda=",lambdaList[min_eout_id//3])

运行结果

机器学习技法笔记:Homework #6 AdaBoost&Kernel Ridge Regression相关习题的更多相关文章

  1. 机器学习技法笔记(2)-Linear SVM

    从这一节开始学习机器学习技法课程中的SVM, 这一节主要介绍标准形式的SVM: Linear SVM 引入SVM 首先回顾Percentron Learning Algrithm(感知器算法PLA)是 ...

  2. 机器学习技法笔记:06 Support Vector Regression

    Roadmap Kernel Ridge Regression Support Vector Regression Primal Support Vector Regression Dual Summ ...

  3. support vector regression与 kernel ridge regression

    前一篇,我们将SVM与logistic regression联系起来,这一次我们将SVM与ridge regression(之前的linear regression)联系起来. (一)kernel r ...

  4. Kernel ridge regression(KRR)

    作者:桂. 时间:2017-05-23  15:52:51 链接:http://www.cnblogs.com/xingshansi/p/6895710.html 一.理论描述 Kernel ridg ...

  5. 机器学习技法笔记:05 Kernel Logistic Regression

    Roadmap Soft-Margin SVM as Regularized Model SVM versus Logistic Regression SVM for Soft Binary Clas ...

  6. 机器学习技法笔记:03 Kernel Support Vector Machine

    Roadmap Kernel Trick Polynomial Kernel Gaussian Kernel Comparison of Kernels Summary

  7. 机器学习技法笔记:Homework #5 特征变换&Soft-Margin SVM相关习题

    原文地址:https://www.jianshu.com/p/6bf801bdc644 特征变换 问题描述 程序实现 # coding: utf-8 import numpy as np from c ...

  8. 机器学习技法笔记:Homework #8 kNN&RBF&k-Means相关习题

    原文地址:https://www.jianshu.com/p/1db700f866ee 问题描述 程序实现 # kNN_RBFN.py # coding:utf-8 import numpy as n ...

  9. 机器学习技法笔记:Homework #7 Decision Tree&Random Forest相关习题

    原文地址:https://www.jianshu.com/p/7ff6fd6fc99f 问题描述 程序实现 13-15 # coding:utf-8 # decision_tree.py import ...

随机推荐

  1. 线程类中使用spring注解报空指针异常

    springboot项目开发中,作为服务端,实现了线程类,在此类中添加spring注解@Source注入的service,报空指针异常. 查原因后,发现是线程中,不支持spring注解,因为sprin ...

  2. 9、继续matlab数值分析

    1.matlab拉格朗日插值 function yi=Lagrange(x,y,xi) %x为向量,全部的插值节点 %y为向量,插值节点处的函数值 %xi为标量或向量,被估计函数的自变量: %yi为x ...

  3. python - 小米推送使用

    1. 小米文档及SDK下载 1.文档介绍 https://dev.mi.com/console/doc/detail?pId=863 sdk说明: 2.开发者需要登录开发者网站(申请AppID, Ap ...

  4. 网神SecVSS 3600漏洞扫描系统

    网神SecVSS 3600漏洞扫描系统严格按照计算机信息系统安全的国家标准.相关行业标准设计.编写.制造.网神SecVSS 3600漏洞扫描系统可以对不同操作系统下的计算机(在可扫描IP范围内)进行漏 ...

  5. The document cannot be opened. It has been renamed, deleted or moved.

    In the Individual components section of the Visual Studio Installer, make sure that Razor Language S ...

  6. Ajax,ajax封装

    /** * Created by liyinghao on 2016/8/23. */ /*仿jQuery中的ajax方法,简单版实现;封装ajax的工具函数*/ /* * 1 请求方式 type g ...

  7. 转 Python - openpyxl 读写操作Excel

    Python - openpyxl 读写操作Excel   openpyxl特点   openpyxl(可读写excel表)专门处理Excel2007及以上版本产生的xlsx文件,xls和xlsx之间 ...

  8. vue组件级路由钩子函数(beforeRouteEnter/beforeRouteUpdate/beforeRouteLeave)

    1.vue组件级路由钩子函数(beforeRouteEnter/beforeRouteUpdate/beforeRouteLeave):http://www.menvscode.com/detail/ ...

  9. HashSet源码解析笔记

    HashSet是基于HashMap实现的.HashSet底层采用HashMap来保存元素,因此HashSet底层其实比较简单. HashSet是Set接口典型实现,它按照Hash算法来存储集合中的元素 ...

  10. ubuntu安装goland

    安装goland 首先下载goland https://www.jetbrains.com/zh/go/specials/go/go.html?utm_source=baidu&utm_med ...