import numpy as np
import operator as op
from os import listdir def classify0(inX, dataSet, labels, k):
dataSetSize = dataSet.shape[0]
diffMat = np.tile(inX, (dataSetSize,1)) - dataSet
sqDiffMat = diffMat**2
sqDistances = sqDiffMat.sum(axis=1)
distances = sqDistances**0.5
sortedDistIndicies = distances.argsort()
classCount={}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
sortedClassCount = sorted(classCount.items(), key=op.itemgetter(1), reverse=True)
return sortedClassCount[0][0] def createDataSet():
group = np.array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])
labels = ['A','A','B','B']
return group, labels data,labels = createDataSet()
print(data)
print(labels) test = np.array([[0,0.5]])
result = classify0(test,data,labels,3)
print(result)

import numpy as np
import operator as op
from os import listdir def classify0(inX, dataSet, labels, k):
dataSetSize = dataSet.shape[0]
diffMat = np.tile(inX, (dataSetSize,1)) - dataSet
sqDiffMat = diffMat**2
sqDistances = sqDiffMat.sum(axis=1)
distances = sqDistances**0.5
sortedDistIndicies = distances.argsort()
classCount={}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
sortedClassCount = sorted(classCount.items(), key=op.itemgetter(1), reverse=True)
return sortedClassCount[0][0] def file2matrix(filename):
fr = open(filename)
returnMat = []
classLabelVector = [] #prepare labels return
for line in fr.readlines():
line = line.strip()
listFromLine = line.split('\t')
returnMat.append([float(listFromLine[0]),float(listFromLine[1]),float(listFromLine[2])])
classLabelVector.append(int(listFromLine[-1]))
return np.array(returnMat),np.array(classLabelVector) trainData,trainLabel = file2matrix("D:\\LearningResource\\machinelearninginaction\\Ch02\\datingTestSet2.txt")
print(trainData[0:4])
print(trainLabel[0:4]) def autoNorm(dataSet):
minVals = dataSet.min(0)
maxVals = dataSet.max(0)
ranges = maxVals - minVals
normDataSet = np.zeros(np.shape(dataSet))
m = dataSet.shape[0]
normDataSet = dataSet - np.tile(minVals, (m,1))
normDataSet = normDataSet/np.tile(ranges, (m,1)) #element wise divide
return normDataSet, ranges, minVals normDataSet, ranges, minVals = autoNorm(trainData)
print(ranges)
print(minVals)
print(normDataSet[0:4])
print(trainLabel[0:4]) testData = np.array([[0.5,0.3,0.5]])
result = classify0(testData, normDataSet, trainLabel, 5)
print(result)

import numpy as np
import operator as op
from os import listdir def classify0(inX, dataSet, labels, k):
dataSetSize = dataSet.shape[0]
diffMat = np.tile(inX, (dataSetSize,1)) - dataSet
sqDiffMat = diffMat**2
sqDistances = sqDiffMat.sum(axis=1)
distances = sqDistances**0.5
sortedDistIndicies = distances.argsort()
classCount={}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
sortedClassCount = sorted(classCount.items(), key=op.itemgetter(1), reverse=True)
return sortedClassCount[0][0] def file2matrix(filename):
fr = open(filename)
returnMat = []
classLabelVector = [] #prepare labels return
for line in fr.readlines():
line = line.strip()
listFromLine = line.split('\t')
returnMat.append([float(listFromLine[0]),float(listFromLine[1]),float(listFromLine[2])])
classLabelVector.append(listFromLine[-1])
return np.array(returnMat),np.array(classLabelVector) def autoNorm(dataSet):
minVals = dataSet.min(0)
maxVals = dataSet.max(0)
ranges = maxVals - minVals
normDataSet = np.zeros(np.shape(dataSet))
m = dataSet.shape[0]
normDataSet = dataSet - np.tile(minVals, (m,1))
normDataSet = normDataSet/np.tile(ranges, (m,1)) #element wise divide
return normDataSet, ranges, minVals normDataSet, ranges, minVals = autoNorm(trainData) def datingClassTest():
hoRatio = 0.10 #hold out 10%
datingDataMat,datingLabels = file2matrix("D:\\LearningResource\\machinelearninginaction\\Ch02\\datingTestSet.txt")
normMat, ranges, minVals = autoNorm(datingDataMat)
m = normMat.shape[0]
numTestVecs = int(m*hoRatio)
errorCount = 0.0
for i in range(numTestVecs):
classifierResult = classify0(normMat[i,:],normMat[numTestVecs:m,:],datingLabels[numTestVecs:m],3)
print(('the classifier came back with: %s, the real answer is: %s') % (classifierResult, datingLabels[i]))
if (classifierResult != datingLabels[i]):
errorCount += 1.0
print(('the total error rate is: %f') % (errorCount/float(numTestVecs)))
print(errorCount) datingClassTest()
import numpy as np
import operator as op
from os import listdir def classify0(inX, dataSet, labels, k):
dataSetSize = dataSet.shape[0]
diffMat = np.tile(inX, (dataSetSize,1)) - dataSet
sqDiffMat = diffMat**2
sqDistances = sqDiffMat.sum(axis=1)
distances = sqDistances**0.5
sortedDistIndicies = distances.argsort()
classCount={}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
sortedClassCount = sorted(classCount.items(), key=op.itemgetter(1), reverse=True)
return sortedClassCount[0][0] def file2matrix(filename):
fr = open(filename)
returnMat = []
classLabelVector = [] #prepare labels return
for line in fr.readlines():
line = line.strip()
listFromLine = line.split('\t')
returnMat.append([float(listFromLine[0]),float(listFromLine[1]),float(listFromLine[2])])
classLabelVector.append(listFromLine[-1])
return np.array(returnMat),np.array(classLabelVector) def autoNorm(dataSet):
minVals = dataSet.min(0)
maxVals = dataSet.max(0)
ranges = maxVals - minVals
normDataSet = np.zeros(np.shape(dataSet))
m = dataSet.shape[0]
normDataSet = dataSet - np.tile(minVals, (m,1))
normDataSet = normDataSet/np.tile(ranges, (m,1)) #element wise divide
return normDataSet, ranges, minVals normDataSet, ranges, minVals = autoNorm(trainData) def datingClassTest():
hoRatio = 0.10 #hold out 10%
datingDataMat,datingLabels = file2matrix("D:\\LearningResource\\machinelearninginaction\\Ch02\\datingTestSet.txt")
normMat, ranges, minVals = autoNorm(datingDataMat)
m = normMat.shape[0]
numTestVecs = int(m*hoRatio)
errorCount = 0.0
for i in range(numTestVecs):
classifierResult = classify0(normMat[i,:],normMat[numTestVecs:m,:],datingLabels[numTestVecs:m],3)
print(('the classifier came back with: %s, the real answer is: %s') % (classifierResult, datingLabels[i]))
if (classifierResult != datingLabels[i]):
errorCount += 1.0
print(('the total error rate is: %f') % (errorCount/float(numTestVecs)))
print(errorCount) datingClassTest()

................................................

import numpy as np
import operator as op
from os import listdir def classify0(inX, dataSet, labels, k):
dataSetSize = dataSet.shape[0]
diffMat = np.tile(inX, (dataSetSize,1)) - dataSet
sqDiffMat = diffMat**2
sqDistances = sqDiffMat.sum(axis=1)
distances = sqDistances**0.5
sortedDistIndicies = distances.argsort()
classCount={}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
sortedClassCount = sorted(classCount.items(), key=op.itemgetter(1), reverse=True)
return sortedClassCount[0][0] def file2matrix(filename):
fr = open(filename)
returnMat = []
classLabelVector = [] #prepare labels return
for line in fr.readlines():
line = line.strip()
listFromLine = line.split('\t')
returnMat.append([float(listFromLine[0]),float(listFromLine[1]),float(listFromLine[2])])
classLabelVector.append(int(listFromLine[-1]))
return np.array(returnMat),np.array(classLabelVector) def autoNorm(dataSet):
minVals = dataSet.min(0)
maxVals = dataSet.max(0)
ranges = maxVals - minVals
normDataSet = np.zeros(np.shape(dataSet))
m = dataSet.shape[0]
normDataSet = dataSet - np.tile(minVals, (m,1))
normDataSet = normDataSet/np.tile(ranges, (m,1)) #element wise divide
return normDataSet, ranges, minVals def classifyPerson():
resultList = ["not at all", "in samll doses", "in large doses"]
percentTats = float(input("percentage of time spent playing video game?"))
ffMiles = float(input("frequent flier miles earned per year?"))
iceCream = float(input("liters of ice cream consumed per year?"))
testData = np.array([percentTats,ffMiles,iceCream])
trainData,trainLabel = file2matrix("D:\\LearningResource\\machinelearninginaction\\Ch02\\datingTestSet2.txt")
normDataSet, ranges, minVals = autoNorm(trainData)
result = classify0((testData-minVals)/ranges, normDataSet, trainLabel, 3)
print("You will probably like this person: ",resultList[result-1]) classifyPerson()
import numpy as np
import operator as op
from os import listdir def classify0(inX, dataSet, labels, k):
dataSetSize = dataSet.shape[0]
diffMat = np.tile(inX, (dataSetSize,1)) - dataSet
sqDiffMat = diffMat**2
sqDistances = sqDiffMat.sum(axis=1)
distances = sqDistances**0.5
sortedDistIndicies = distances.argsort()
classCount={}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
sortedClassCount = sorted(classCount.items(), key=op.itemgetter(1), reverse=True)
return sortedClassCount[0][0] def file2matrix(filename):
fr = open(filename)
returnMat = []
classLabelVector = [] #prepare labels return
for line in fr.readlines():
line = line.strip()
listFromLine = line.split('\t')
returnMat.append([float(listFromLine[0]),float(listFromLine[1]),float(listFromLine[2])])
classLabelVector.append(int(listFromLine[-1]))
return np.array(returnMat),np.array(classLabelVector) def autoNorm(dataSet):
minVals = dataSet.min(0)
maxVals = dataSet.max(0)
ranges = maxVals - minVals
normDataSet = np.zeros(np.shape(dataSet))
m = dataSet.shape[0]
normDataSet = dataSet - np.tile(minVals, (m,1))
normDataSet = normDataSet/np.tile(ranges, (m,1)) #element wise divide
return normDataSet, ranges, minVals def classifyPerson():
resultList = ["not at all", "in samll doses", "in large doses"]
percentTats = float(input("percentage of time spent playing video game?"))
ffMiles = float(input("frequent flier miles earned per year?"))
iceCream = float(input("liters of ice cream consumed per year?"))
testData = np.array([percentTats,ffMiles,iceCream])
trainData,trainLabel = file2matrix("D:\\LearningResource\\machinelearninginaction\\Ch02\\datingTestSet2.txt")
normDataSet, ranges, minVals = autoNorm(trainData)
result = classify0((testData-minVals)/ranges, normDataSet, trainLabel, 3)
print("You will probably like this person: ",resultList[result-1]) classifyPerson()

import numpy as np
import operator as op
from os import listdir def classify0(inX, dataSet, labels, k):
dataSetSize = dataSet.shape[0]
diffMat = np.tile(inX, (dataSetSize,1)) - dataSet
sqDiffMat = diffMat**2
sqDistances = sqDiffMat.sum(axis=1)
distances = sqDistances**0.5
sortedDistIndicies = distances.argsort()
classCount={}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
sortedClassCount = sorted(classCount.items(), key=op.itemgetter(1), reverse=True)
return sortedClassCount[0][0] def img2vector(filename):
returnVect = []
fr = open(filename)
for i in range(32):
lineStr = fr.readline()
for j in range(32):
returnVect.append(int(lineStr[j]))
return np.array([returnVect]) def handwritingClassTest():
hwLabels = []
trainingFileList = listdir('D:\\LearningResource\\machinelearninginaction\\Ch02\\trainingDigits') #load the training set
m = len(trainingFileList)
trainingMat = np.zeros((m,1024))
for i in range(m):
fileNameStr = trainingFileList[i]
fileStr = fileNameStr.split('.')[0] #take off .txt
classNumStr = int(fileStr.split('_')[0])
hwLabels.append(classNumStr)
trainingMat[i,:] = img2vector('D:\\LearningResource\\machinelearninginaction\\Ch02\\trainingDigits\\%s' % fileNameStr)
testFileList = listdir('D:\\LearningResource\\machinelearninginaction\\Ch02\\testDigits') #iterate through the test set
mTest = len(testFileList)
errorCount = 0.0
for i in range(mTest):
fileNameStr = testFileList[i]
fileStr = fileNameStr.split('.')[0] #take off .txt
classNumStr = int(fileStr.split('_')[0])
vectorUnderTest = img2vector('D:\\LearningResource\\machinelearninginaction\\Ch02\\testDigits\\%s' % fileNameStr)
classifierResult = classify0(vectorUnderTest, trainingMat, hwLabels, 3)
print("the classifier came back with: %d, the real answer is: %d" % (classifierResult, classNumStr))
if (classifierResult != classNumStr):
errorCount += 1.0
print("\nthe total number of errors is: %d" % errorCount)
print("\nthe total error rate is: %f" % (errorCount/float(mTest))) handwritingClassTest()

.......................................

吴裕雄 python 机器学习-KNN算法(1)的更多相关文章

  1. 吴裕雄 python 机器学习——KNN回归KNeighborsRegressor模型

    import numpy as np import matplotlib.pyplot as plt from sklearn import neighbors, datasets from skle ...

  2. 吴裕雄 python 机器学习——KNN分类KNeighborsClassifier模型

    import numpy as np import matplotlib.pyplot as plt from sklearn import neighbors, datasets from skle ...

  3. 吴裕雄 python 机器学习-KNN(2)

    import matplotlib import numpy as np import matplotlib.pyplot as plt from matplotlib.patches import ...

  4. 吴裕雄 python 机器学习——半监督学习标准迭代式标记传播算法LabelPropagation模型

    import numpy as np import matplotlib.pyplot as plt from sklearn import metrics from sklearn import d ...

  5. 吴裕雄 python 机器学习——集成学习AdaBoost算法回归模型

    import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklear ...

  6. 吴裕雄 python 机器学习——集成学习AdaBoost算法分类模型

    import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklear ...

  7. 吴裕雄 python 机器学习——人工神经网络感知机学习算法的应用

    import numpy as np from matplotlib import pyplot as plt from sklearn import neighbors, datasets from ...

  8. 吴裕雄 python 机器学习——半监督学习LabelSpreading模型

    import numpy as np import matplotlib.pyplot as plt from sklearn import metrics from sklearn import d ...

  9. 吴裕雄 python 机器学习——人工神经网络与原始感知机模型

    import numpy as np from matplotlib import pyplot as plt from mpl_toolkits.mplot3d import Axes3D from ...

随机推荐

  1. hbase启动后子节点的regionserver不能启动

    启动hbase后,主节点的进程正常,但是子节点的regionserver进程会自动挂掉 然后我们看看子节点的情况 可以看到挂掉了 我们这样解决问题,先把hadoop目录下的这个两个文件放到hbase的 ...

  2. RecyclerView中设置match_parent无效;

    在RecyclerView中宽度设置了match_parent,但是效果和wrap_content一样: 说下解决方法: 1.这样子写,match_parent没有效果: View v = View. ...

  3. SQL with(unlock)与with(readpast)

    所有Select加 With (NoLock)解决阻塞死锁,在查询语句中使用 NOLOCK 和 READPAST 处理一个数据库死锁的异常时候,其中一个建议就是使用 NOLOCK 或者 READPAS ...

  4. three.js学习:点光源+动画的实现

    与前几个教程类似,场景和相机等设置就不再重复声明了.这里只列出新学的内容. 1.圆柱体(圆锥体)的初始化 function initObject() { var geometry = new THRE ...

  5. springmvc接收前台(如ajax)传来的数组list,set等图文详解

        ref:https://blog.csdn.net/wabiaozia/article/details/50803581 前言: 相信很人都被springmvc接收数组问题折磨过,查过几个解决 ...

  6. jQuery插件——下拉选择框

    其实,之前也写过jQuery插件,今天写的是一个模拟select选择的下拉插件. 既然是jQuery插件,那么必然是依赖jQuery的了. 老规矩,直接上代码吧! ;(function () { $. ...

  7. Redis实现分布式锁原理与实现分析

    一.关于分布式锁 关于分布式锁,可能绝大部分人都会或多或少涉及到. 我举二个例子: 场景一:从前端界面发起一笔支付请求,如果前端没有做防重处理,那么可能在某一个时刻会有二笔一样的单子同时到达系统后台. ...

  8. Maintenance Planner calculate SPs by manual

    note Are you unable to view your system or updated system information? Apply the latest version of t ...

  9. <转载> MySQL 性能优化的最佳20多条经验分享 http://www.jb51.net/article/24392.htm

    当我们去设计数据库表结构,对操作数据库时(尤其是查表时的SQL语句),我们都需要注意数据操作的性能.这里,我们不会讲过多的SQL语句的优化,而只是针对MySQL这一Web应用最多的数据库.希望下面的这 ...

  10. tomcat中项目后有括号

    引入他人项目时,由于报错,copy本地workspace下其他项目的 .settings和.project到该项目路径下 结果Eclipse 的 Server 中出现了  aaa(bbb)的情况 并且 ...