吴裕雄 python 机器学习-DMT(1)
import numpy as np
import operator as op from math import log def createDataSet():
dataSet = [[1, 1, 'yes'],
[1, 1, 'yes'],
[1, 0, 'no'],
[0, 1, 'no'],
[0, 1, 'no']]
labels = ['no surfacing','flippers']
return dataSet, labels dataSet,labels = createDataSet()
print(dataSet)
print(labels) def calcShannonEnt(dataSet):
labelCounts = {}
for featVec in dataSet:
currentLabel = featVec[-1]
if(currentLabel not in labelCounts.keys()):
labelCounts[currentLabel] = 0
labelCounts[currentLabel] += 1
shannonEnt = 0.0
rowNum = len(dataSet)
for key in labelCounts:
prob = float(labelCounts[key])/rowNum
shannonEnt -= prob * log(prob,2)
return shannonEnt shannonEnt = calcShannonEnt(dataSet)
print(shannonEnt) def splitDataSet(dataSet, axis, value):
retDataSet = []
for featVec in dataSet:
if(featVec[axis] == value):
reducedFeatVec = featVec[:axis]
reducedFeatVec.extend(featVec[axis+1:])
retDataSet.append(reducedFeatVec)
return retDataSet retDataSet = splitDataSet(dataSet,1,1)
print(np.array(retDataSet))
retDataSet = splitDataSet(dataSet,1,0)
print(retDataSet) def chooseBestFeatureToSplit(dataSet):
numFeatures = np.shape(dataSet)[1]-1
baseEntropy = calcShannonEnt(dataSet)
bestInfoGain = 0.0
bestFeature = -1
for i in range(numFeatures):
featList = [example[i] for example in dataSet]
uniqueVals = set(featList)
newEntropy = 0.0
for value in uniqueVals:
subDataSet = splitDataSet(dataSet, i, value)
prob = len(subDataSet)/float(len(dataSet))
newEntropy += prob * calcShannonEnt(subDataSet)
infoGain = baseEntropy - newEntropy
if (infoGain > bestInfoGain):
bestInfoGain = infoGain
bestFeature = i
return bestFeature bestFeature = chooseBestFeatureToSplit(dataSet)
print(bestFeature) def majorityCnt(classList):
classCount={}
for vote in classList:
if(vote not in classCount.keys()):
classCount[vote] = 0
classCount[vote] += 1
sortedClassCount = sorted(classCount.items(), key=op.itemgetter(1), reverse=True)
return sortedClassCount[0][0] def createTree(dataSet,labels):
classList = [example[-1] for example in dataSet]
if(classList.count(classList[0]) == len(classList)):
return classList[0]
if len(dataSet[0]) == 1:
return majorityCnt(classList)
bestFeat = chooseBestFeatureToSplit(dataSet)
bestFeatLabel = labels[bestFeat]
myTree = {bestFeatLabel:{}}
del(labels[bestFeat])
featValues = [example[bestFeat] for example in dataSet]
uniqueVals = set(featValues)
for value in uniqueVals:
subLabels = labels[:]
myTree[bestFeatLabel][value] = createTree(splitDataSet(dataSet, bestFeat, value),subLabels)
return myTree myTree = createTree(dataSet,labels)
print(myTree) def classify(inputTree,featLabels,testVec):
for i in inputTree.keys():
firstStr = i
break
secondDict = inputTree[firstStr]
featIndex = featLabels.index(firstStr)
key = testVec[featIndex]
valueOfFeat = secondDict[key]
if isinstance(valueOfFeat, dict):
classLabel = classify(valueOfFeat, featLabels, testVec)
else:
classLabel = valueOfFeat
return classLabel featLabels = ['no surfacing', 'flippers']
classLabel = classify(myTree,featLabels,[1,1])
print(classLabel) import pickle def storeTree(inputTree,filename):
fw = open(filename,'wb')
pickle.dump(inputTree,fw)
fw.close() def grabTree(filename):
fr = open(filename,'rb')
return pickle.load(fr) filename = "D:\\mytree.txt"
storeTree(myTree,filename)
mySecTree = grabTree(filename)
print(mySecTree) featLabels = ['no surfacing', 'flippers']
classLabel = classify(mySecTree,featLabels,[0,0])
print(classLabel)

吴裕雄 python 机器学习-DMT(1)的更多相关文章
- 吴裕雄 python 机器学习-DMT(2)
import matplotlib.pyplot as plt decisionNode = dict(boxstyle="sawtooth", fc="0.8" ...
- 吴裕雄 python 机器学习——分类决策树模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_s ...
- 吴裕雄 python 机器学习——回归决策树模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_s ...
- 吴裕雄 python 机器学习——线性判断分析LinearDiscriminantAnalysis
import numpy as np import matplotlib.pyplot as plt from matplotlib import cm from mpl_toolkits.mplot ...
- 吴裕雄 python 机器学习——逻辑回归
import numpy as np import matplotlib.pyplot as plt from matplotlib import cm from mpl_toolkits.mplot ...
- 吴裕雄 python 机器学习——ElasticNet回归
import numpy as np import matplotlib.pyplot as plt from matplotlib import cm from mpl_toolkits.mplot ...
- 吴裕雄 python 机器学习——Lasso回归
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets, linear_model from s ...
- 吴裕雄 python 机器学习——岭回归
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets, linear_model from s ...
- 吴裕雄 python 机器学习——线性回归模型
import numpy as np from sklearn import datasets,linear_model from sklearn.model_selection import tra ...
随机推荐
- python数字
#=====>part1:数字类型#掌握:int,float#了解:Long(在python2中才有),complex# num=10# num=int(10)# print(type(num) ...
- asp.net网站中增删文件夹会导致Session或cache等等丢失
因为这会导致网站资源本身重新加载. 如果要改变文件和文件夹,一般应该是对 app_data 下进行操作.
- SQL server 数据库的数据完整性
存储在数据库中的所有数据值均正确的状态.如果数据库中存储有不正确的数据值,则该数据库称为已丧失数据完整性. 详细释义 数据库中的数据是从外界输入的,而数据的输入由于种种原因,会发生输入无效或 错误信息 ...
- glob获取指定目录下的东西+更改工作目录
一:不更改工作目录 import glob path = 'image/imgs/*.jpg' # 正则匹配 指定路径 file_path = glob.glob(path) # 即可获取所有jpg的 ...
- java se 随机数。生成
public class test { public static void main(String[] args) { getRandomNum1(); getRandomNum2(); getRa ...
- IIS w3wp对应的应用程序
IIS7以前我們用IISApp查看IIS哪些服務已啟動,但在IIS7已經不適用了,新語法是appcmd.exe list wp.你可以在%windir%\system32\inetsrv\底下找到ap ...
- 23.纯 CSS 创作一个菜单反色填充特效
原文地址:https://segmentfault.com/a/1190000014876348 HTML代码: <nav> <ul> <li><span&g ...
- Android Studio 3.0 新特性
最新Android Studio版本是Android Studio 3.0,本文提供了所有新功能和更改的摘要. 所有这些功能都可以在最新的金丝雀版本中发布,但beta测试版本可能尚未提供. 核心IDE ...
- PropertiesUtils
package com.icil.elsa.subscribe.milestone.common.utils; import java.io.BufferedInputStream; import j ...
- idea 安装三方插件的方法
<一>在线安装 1,File -> Setting -> Plugins, 大红框内是已经安装的插件,可以搜索 2, 点击上图小红框内的按钮, 如下,搜索自己想要的插件,选中, ...