吴裕雄 python 机器学习-DMT(2)
- import matplotlib.pyplot as plt
- decisionNode = dict(boxstyle="sawtooth", fc="0.8")
- leafNode = dict(boxstyle="round4", fc="0.8")
- arrow_args = dict(arrowstyle="<-")
- def getNumLeafs(myTree):
- numLeafs = 0
- for i in myTree.keys():
- firstStr = i
- break
- secondDict = myTree[firstStr]
- for key in secondDict.keys():
- if type(secondDict[key]).__name__=='dict':
- numLeafs += getNumLeafs(secondDict[key])
- else: numLeafs +=1
- return numLeafs
- def getTreeDepth(myTree):
- maxDepth = 0
- for i in myTree.keys():
- firstStr = i
- break
- secondDict = myTree[firstStr]
- for key in secondDict.keys():
- if type(secondDict[key]).__name__=='dict':
- thisDepth = 1 + getTreeDepth(secondDict[key])
- else: thisDepth = 1
- if thisDepth > maxDepth: maxDepth = thisDepth
- return maxDepth
- def plotNode(nodeTxt, centerPt, parentPt, nodeType):
- createPlot.ax1.annotate(nodeTxt, xy=parentPt, xycoords='axes fraction',xytext=centerPt, textcoords='axes fraction',va="center", ha="center", bbox=nodeType, arrowprops=arrow_args )
- def plotMidText(cntrPt, parentPt, txtString):
- xMid = (parentPt[0]-cntrPt[0])/2.0 + cntrPt[0]
- yMid = (parentPt[1]-cntrPt[1])/2.0 + cntrPt[1]
- createPlot.ax1.text(xMid, yMid, txtString, va="center", ha="center", rotation=30)
- def plotTree(myTree, parentPt, nodeTxt):
- numLeafs = getNumLeafs(myTree)
- depth = getTreeDepth(myTree)
- for i in myTree.keys():
- firstStr = i
- break
- cntrPt = (plotTree.xOff + (1.0 + float(numLeafs))/2.0/plotTree.totalW, plotTree.yOff)
- plotMidText(cntrPt, parentPt, nodeTxt)
- plotNode(firstStr, cntrPt, parentPt, decisionNode)
- secondDict = myTree[firstStr]
- plotTree.yOff = plotTree.yOff - 1.0/plotTree.totalD
- for key in secondDict.keys():
- if type(secondDict[key]).__name__=='dict':
- plotTree(secondDict[key],cntrPt,str(key))
- else:
- plotTree.xOff = plotTree.xOff + 1.0/plotTree.totalW
- plotNode(secondDict[key], (plotTree.xOff, plotTree.yOff), cntrPt, leafNode)
- plotMidText((plotTree.xOff, plotTree.yOff), cntrPt, str(key))
- plotTree.yOff = plotTree.yOff + 1.0/plotTree.totalD
- def createPlot(inTree):
- fig = plt.figure(1, facecolor='white')
- fig.clf()
- axprops = dict(xticks=[], yticks=[])
- createPlot.ax1 = plt.subplot(111, frameon=False, **axprops)
- #createPlot.ax1 = plt.subplot(111, frameon=False) #ticks for demo puropses
- plotTree.totalW = float(getNumLeafs(inTree))
- plotTree.totalD = float(getTreeDepth(inTree))
- plotTree.xOff = -0.5/plotTree.totalW; plotTree.yOff = 1.0;
- plotTree(inTree, (0.5,1.0), '')
- plt.show()
- def retrieveTree(i):
- listOfTrees =[{'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}},
- {'no surfacing': {0: 'no', 1: {'flippers': {0: {'head': {0: 'no', 1: 'yes'}}, 1: 'no'}}}}
- ]
- return listOfTrees[i]
- thisTree = retrieveTree(0)
- createPlot(thisTree)
- thisTree = retrieveTree(1)
- createPlot(thisTree)
- import numpy as np
- import operator as op
- from math import log
- def calcShannonEnt(dataSet):
- labelCounts = {}
- for featVec in dataSet:
- currentLabel = featVec[-1]
- if(currentLabel not in labelCounts.keys()):
- labelCounts[currentLabel] = 0
- labelCounts[currentLabel] += 1
- shannonEnt = 0.0
- rowNum = len(dataSet)
- for key in labelCounts:
- prob = float(labelCounts[key])/rowNum
- shannonEnt -= prob * log(prob,2)
- return shannonEnt
- def splitDataSet(dataSet, axis, value):
- retDataSet = []
- for featVec in dataSet:
- if(featVec[axis] == value):
- reducedFeatVec = featVec[:axis]
- reducedFeatVec.extend(featVec[axis+1:])
- retDataSet.append(reducedFeatVec)
- return retDataSet
- def chooseBestFeatureToSplit(dataSet):
- numFeatures = np.shape(dataSet)[1]-1
- baseEntropy = calcShannonEnt(dataSet)
- bestInfoGain = 0.0
- bestFeature = -1
- for i in range(numFeatures):
- featList = [example[i] for example in dataSet]
- uniqueVals = set(featList)
- newEntropy = 0.0
- for value in uniqueVals:
- subDataSet = splitDataSet(dataSet, i, value)
- prob = len(subDataSet)/float(len(dataSet))
- newEntropy += prob * calcShannonEnt(subDataSet)
- infoGain = baseEntropy - newEntropy
- if (infoGain > bestInfoGain):
- bestInfoGain = infoGain
- bestFeature = i
- return bestFeature
- def majorityCnt(classList):
- classCount={}
- for vote in classList:
- if(vote not in classCount.keys()):
- classCount[vote] = 0
- classCount[vote] += 1
- sortedClassCount = sorted(classCount.items(), key=op.itemgetter(1), reverse=True)
- return sortedClassCount[0][0]
- def createTree(dataSet,labels):
- classList = [example[-1] for example in dataSet]
- if(classList.count(classList[0]) == len(classList)):
- return classList[0]
- if len(dataSet[0]) == 1:
- return majorityCnt(classList)
- bestFeat = chooseBestFeatureToSplit(dataSet)
- bestFeatLabel = labels[bestFeat]
- myTree = {bestFeatLabel:{}}
- del(labels[bestFeat])
- featValues = [example[bestFeat] for example in dataSet]
- uniqueVals = set(featValues)
- for value in uniqueVals:
- subLabels = labels[:]
- myTree[bestFeatLabel][value] = createTree(splitDataSet(dataSet, bestFeat, value),subLabels)
- return myTree
- def classify(inputTree,featLabels,testVec):
- for i in inputTree.keys():
- firstStr = i
- break
- secondDict = inputTree[firstStr]
- featIndex = featLabels.index(firstStr)
- key = testVec[featIndex]
- valueOfFeat = secondDict[key]
- if isinstance(valueOfFeat, dict):
- classLabel = classify(valueOfFeat, featLabels, testVec)
- else:
- classLabel = valueOfFeat
- return classLabel
- data = open("D:\\LearningResource\\machinelearninginaction\\Ch03\\lenses.txt")
- dataSet = [inst.strip().split("\t") for inst in data.readlines()]
- print(dataSet)
- print(np.shape(dataSet))
- labels = ["age","prescript","astigmatic","tearRate"]
- tree = createTree(dataSet,labels)
- print(tree)
- import matplotlib.pyplot as plt
- decisionNode = dict(boxstyle="sawtooth", fc="0.8")
- leafNode = dict(boxstyle="round4", fc="0.8")
- arrow_args = dict(arrowstyle="<-")
- def getNumLeafs(myTree):
- numLeafs = 0
- for i in myTree.keys():
- firstStr = i
- break
- secondDict = myTree[firstStr]
- for key in secondDict.keys():
- if type(secondDict[key]).__name__=='dict':
- numLeafs += getNumLeafs(secondDict[key])
- else: numLeafs +=1
- return numLeafs
- def getTreeDepth(myTree):
- maxDepth = 0
- for i in myTree.keys():
- firstStr = i
- break
- secondDict = myTree[firstStr]
- for key in secondDict.keys():
- if type(secondDict[key]).__name__=='dict':
- thisDepth = 1 + getTreeDepth(secondDict[key])
- else: thisDepth = 1
- if thisDepth > maxDepth: maxDepth = thisDepth
- return maxDepth
- def plotNode(nodeTxt, centerPt, parentPt, nodeType):
- createPlot.ax1.annotate(nodeTxt, xy=parentPt, xycoords='axes fraction',xytext=centerPt, textcoords='axes fraction',va="center", ha="center", bbox=nodeType, arrowprops=arrow_args )
- def plotMidText(cntrPt, parentPt, txtString):
- xMid = (parentPt[0]-cntrPt[0])/2.0 + cntrPt[0]
- yMid = (parentPt[1]-cntrPt[1])/2.0 + cntrPt[1]
- createPlot.ax1.text(xMid, yMid, txtString, va="center", ha="center", rotation=30)
- def plotTree(myTree, parentPt, nodeTxt):
- numLeafs = getNumLeafs(myTree)
- depth = getTreeDepth(myTree)
- for i in myTree.keys():
- firstStr = i
- break
- cntrPt = (plotTree.xOff + (1.0 + float(numLeafs))/2.0/plotTree.totalW, plotTree.yOff)
- plotMidText(cntrPt, parentPt, nodeTxt)
- plotNode(firstStr, cntrPt, parentPt, decisionNode)
- secondDict = myTree[firstStr]
- plotTree.yOff = plotTree.yOff - 1.0/plotTree.totalD
- for key in secondDict.keys():
- if type(secondDict[key]).__name__=='dict':
- plotTree(secondDict[key],cntrPt,str(key))
- else:
- plotTree.xOff = plotTree.xOff + 1.0/plotTree.totalW
- plotNode(secondDict[key], (plotTree.xOff, plotTree.yOff), cntrPt, leafNode)
- plotMidText((plotTree.xOff, plotTree.yOff), cntrPt, str(key))
- plotTree.yOff = plotTree.yOff + 1.0/plotTree.totalD
- def createPlot(inTree):
- fig = plt.figure(1, facecolor='white')
- fig.clf()
- axprops = dict(xticks=[], yticks=[])
- createPlot.ax1 = plt.subplot(111, frameon=False, **axprops)
- #createPlot.ax1 = plt.subplot(111, frameon=False) #ticks for demo puropses
- plotTree.totalW = float(getNumLeafs(inTree))
- plotTree.totalD = float(getTreeDepth(inTree))
- plotTree.xOff = -0.5/plotTree.totalW; plotTree.yOff = 1.0;
- plotTree(inTree, (0.5,1.0), '')
- plt.show()
- createPlot(tree)
吴裕雄 python 机器学习-DMT(2)的更多相关文章
- 吴裕雄 python 机器学习-DMT(1)
import numpy as np import operator as op from math import log def createDataSet(): dataSet = [[1, 1, ...
- 吴裕雄 python 机器学习——分类决策树模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_s ...
- 吴裕雄 python 机器学习——回归决策树模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_s ...
- 吴裕雄 python 机器学习——线性判断分析LinearDiscriminantAnalysis
import numpy as np import matplotlib.pyplot as plt from matplotlib import cm from mpl_toolkits.mplot ...
- 吴裕雄 python 机器学习——逻辑回归
import numpy as np import matplotlib.pyplot as plt from matplotlib import cm from mpl_toolkits.mplot ...
- 吴裕雄 python 机器学习——ElasticNet回归
import numpy as np import matplotlib.pyplot as plt from matplotlib import cm from mpl_toolkits.mplot ...
- 吴裕雄 python 机器学习——Lasso回归
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets, linear_model from s ...
- 吴裕雄 python 机器学习——岭回归
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets, linear_model from s ...
- 吴裕雄 python 机器学习——线性回归模型
import numpy as np from sklearn import datasets,linear_model from sklearn.model_selection import tra ...
随机推荐
- 怎样找到微信小程序功能呢?
怎样找到微信小程序功能呢? 17年1月9号这个功能刚推出时,非常火,但我在微信中迟迟找不到微信小程序功能,微信明明更新到了最新版也没有:后来经过以下步骤才找到这个功能! 1.微信版本不能过低,这个一般 ...
- Django-models的字段类型
model的field类型 1.models.AutoField ---自增列 = int(11) 如果没有的话,默认会生成一个名称为 id 的列,如果要显示的自定义一个自增列,必须将给列设 ...
- useful tools and website
1.https://www.processon.com/ 在线流程图制作网站 2.http://www.easyicon.net/ 专门下载图标的网站 3.https://www.lfd.uci ...
- CPU Rings, Privilege, and Protection.CPU的运行环, 特权级与保护
原文标题:CPU Rings, Privilege, and Protection 原文地址:http://duartes.org/gustavo/blog/ [注:本人水平有限,只好挑一些国外高手的 ...
- Spring之jdbcTemplate:查询的三种方式(单个值、单个对象、对象集合)
JdbcTemplateDemo2.java package helloworld.jdbcTemplate; import org.springframework.jdbc.core.JdbcTem ...
- WPF开发ArcGis系统时的异常信息: ArcGIS product not specified. You must first bind to an ArcGIS version prior to using any ArcGIS components.
“System.Runtime.InteropServices.COMException”类型的未经处理的异常在 Arcgis_Test.exe 中发生 其他信息: ArcGIS product no ...
- [Unity动画]05.Entry & Exit & Any State
0.状态机如下: Any State->Dying:isDying为true Dying->Reviving:isDying为false Reviving->Exit:isDying ...
- 【每日一学】pandas_透视表函数&交叉表函数
每日一悟 [分开工作内外8小时] 前一个月,我经常把工作内的问题带到路上.地铁上.睡觉前,甚至是周末. 然而很快发现,我工作外的成就几乎没有,而工作内的进展也并不理想. 仔细想想,工作外是需要学新东西 ...
- 【Social listening实操】从社交媒体传播和文本挖掘角度解读《欢乐颂2》
本文转自知乎 作者:苏格兰折耳喵 ----------------------------------------------------- 作为数据分析爱好者,本文作者将想从数据的角度去解读< ...
- Android自定义View学习笔记(一)
绘制基础 参考:HenCoder Android 开发进阶: 自定义 View 1-1 绘制基础 Paint详解 参考:HenCoder Android 开发进阶: 自定义 View 1-2 Pain ...