1. import matplotlib.pyplot as plt
  2.  
  3. decisionNode = dict(boxstyle="sawtooth", fc="0.8")
  4. leafNode = dict(boxstyle="round4", fc="0.8")
  5. arrow_args = dict(arrowstyle="<-")
  6.  
  7. def getNumLeafs(myTree):
  8. numLeafs = 0
  9. for i in myTree.keys():
  10. firstStr = i
  11. break
  12. secondDict = myTree[firstStr]
  13. for key in secondDict.keys():
  14. if type(secondDict[key]).__name__=='dict':
  15. numLeafs += getNumLeafs(secondDict[key])
  16. else: numLeafs +=1
  17. return numLeafs
  18.  
  19. def getTreeDepth(myTree):
  20. maxDepth = 0
  21. for i in myTree.keys():
  22. firstStr = i
  23. break
  24. secondDict = myTree[firstStr]
  25. for key in secondDict.keys():
  26. if type(secondDict[key]).__name__=='dict':
  27. thisDepth = 1 + getTreeDepth(secondDict[key])
  28. else: thisDepth = 1
  29. if thisDepth > maxDepth: maxDepth = thisDepth
  30. return maxDepth
  31.  
  32. def plotNode(nodeTxt, centerPt, parentPt, nodeType):
  33. createPlot.ax1.annotate(nodeTxt, xy=parentPt, xycoords='axes fraction',xytext=centerPt, textcoords='axes fraction',va="center", ha="center", bbox=nodeType, arrowprops=arrow_args )
  34.  
  35. def plotMidText(cntrPt, parentPt, txtString):
  36. xMid = (parentPt[0]-cntrPt[0])/2.0 + cntrPt[0]
  37. yMid = (parentPt[1]-cntrPt[1])/2.0 + cntrPt[1]
  38. createPlot.ax1.text(xMid, yMid, txtString, va="center", ha="center", rotation=30)
  39.  
  40. def plotTree(myTree, parentPt, nodeTxt):
  41. numLeafs = getNumLeafs(myTree)
  42. depth = getTreeDepth(myTree)
  43. for i in myTree.keys():
  44. firstStr = i
  45. break
  46. cntrPt = (plotTree.xOff + (1.0 + float(numLeafs))/2.0/plotTree.totalW, plotTree.yOff)
  47. plotMidText(cntrPt, parentPt, nodeTxt)
  48. plotNode(firstStr, cntrPt, parentPt, decisionNode)
  49. secondDict = myTree[firstStr]
  50. plotTree.yOff = plotTree.yOff - 1.0/plotTree.totalD
  51. for key in secondDict.keys():
  52. if type(secondDict[key]).__name__=='dict':
  53. plotTree(secondDict[key],cntrPt,str(key))
  54. else:
  55. plotTree.xOff = plotTree.xOff + 1.0/plotTree.totalW
  56. plotNode(secondDict[key], (plotTree.xOff, plotTree.yOff), cntrPt, leafNode)
  57. plotMidText((plotTree.xOff, plotTree.yOff), cntrPt, str(key))
  58. plotTree.yOff = plotTree.yOff + 1.0/plotTree.totalD
  59.  
  60. def createPlot(inTree):
  61. fig = plt.figure(1, facecolor='white')
  62. fig.clf()
  63. axprops = dict(xticks=[], yticks=[])
  64. createPlot.ax1 = plt.subplot(111, frameon=False, **axprops)
  65. #createPlot.ax1 = plt.subplot(111, frameon=False) #ticks for demo puropses
  66. plotTree.totalW = float(getNumLeafs(inTree))
  67. plotTree.totalD = float(getTreeDepth(inTree))
  68. plotTree.xOff = -0.5/plotTree.totalW; plotTree.yOff = 1.0;
  69. plotTree(inTree, (0.5,1.0), '')
  70. plt.show()
  71.  
  72. def retrieveTree(i):
  73. listOfTrees =[{'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}},
  74. {'no surfacing': {0: 'no', 1: {'flippers': {0: {'head': {0: 'no', 1: 'yes'}}, 1: 'no'}}}}
  75. ]
  76. return listOfTrees[i]
  77.  
  78. thisTree = retrieveTree(0)
  79. createPlot(thisTree)
  80. thisTree = retrieveTree(1)
  81. createPlot(thisTree)

  1. import numpy as np
  2. import operator as op
  3. from math import log
  4.  
  5. def calcShannonEnt(dataSet):
  6. labelCounts = {}
  7. for featVec in dataSet:
  8. currentLabel = featVec[-1]
  9. if(currentLabel not in labelCounts.keys()):
  10. labelCounts[currentLabel] = 0
  11. labelCounts[currentLabel] += 1
  12. shannonEnt = 0.0
  13. rowNum = len(dataSet)
  14. for key in labelCounts:
  15. prob = float(labelCounts[key])/rowNum
  16. shannonEnt -= prob * log(prob,2)
  17. return shannonEnt
  18.  
  19. def splitDataSet(dataSet, axis, value):
  20. retDataSet = []
  21. for featVec in dataSet:
  22. if(featVec[axis] == value):
  23. reducedFeatVec = featVec[:axis]
  24. reducedFeatVec.extend(featVec[axis+1:])
  25. retDataSet.append(reducedFeatVec)
  26. return retDataSet
  27.  
  28. def chooseBestFeatureToSplit(dataSet):
  29. numFeatures = np.shape(dataSet)[1]-1
  30. baseEntropy = calcShannonEnt(dataSet)
  31. bestInfoGain = 0.0
  32. bestFeature = -1
  33. for i in range(numFeatures):
  34. featList = [example[i] for example in dataSet]
  35. uniqueVals = set(featList)
  36. newEntropy = 0.0
  37. for value in uniqueVals:
  38. subDataSet = splitDataSet(dataSet, i, value)
  39. prob = len(subDataSet)/float(len(dataSet))
  40. newEntropy += prob * calcShannonEnt(subDataSet)
  41. infoGain = baseEntropy - newEntropy
  42. if (infoGain > bestInfoGain):
  43. bestInfoGain = infoGain
  44. bestFeature = i
  45. return bestFeature
  46.  
  47. def majorityCnt(classList):
  48. classCount={}
  49. for vote in classList:
  50. if(vote not in classCount.keys()):
  51. classCount[vote] = 0
  52. classCount[vote] += 1
  53. sortedClassCount = sorted(classCount.items(), key=op.itemgetter(1), reverse=True)
  54. return sortedClassCount[0][0]
  55.  
  56. def createTree(dataSet,labels):
  57. classList = [example[-1] for example in dataSet]
  58. if(classList.count(classList[0]) == len(classList)):
  59. return classList[0]
  60. if len(dataSet[0]) == 1:
  61. return majorityCnt(classList)
  62. bestFeat = chooseBestFeatureToSplit(dataSet)
  63. bestFeatLabel = labels[bestFeat]
  64. myTree = {bestFeatLabel:{}}
  65. del(labels[bestFeat])
  66. featValues = [example[bestFeat] for example in dataSet]
  67. uniqueVals = set(featValues)
  68. for value in uniqueVals:
  69. subLabels = labels[:]
  70. myTree[bestFeatLabel][value] = createTree(splitDataSet(dataSet, bestFeat, value),subLabels)
  71. return myTree
  72.  
  73. def classify(inputTree,featLabels,testVec):
  74. for i in inputTree.keys():
  75. firstStr = i
  76. break
  77. secondDict = inputTree[firstStr]
  78. featIndex = featLabels.index(firstStr)
  79. key = testVec[featIndex]
  80. valueOfFeat = secondDict[key]
  81. if isinstance(valueOfFeat, dict):
  82. classLabel = classify(valueOfFeat, featLabels, testVec)
  83. else:
  84. classLabel = valueOfFeat
  85. return classLabel
  86.  
  87. data = open("D:\\LearningResource\\machinelearninginaction\\Ch03\\lenses.txt")
  88. dataSet = [inst.strip().split("\t") for inst in data.readlines()]
  89. print(dataSet)
  90. print(np.shape(dataSet))
  91. labels = ["age","prescript","astigmatic","tearRate"]
  92. tree = createTree(dataSet,labels)
  93. print(tree)
  94.  
  95. import matplotlib.pyplot as plt
  96.  
  97. decisionNode = dict(boxstyle="sawtooth", fc="0.8")
  98. leafNode = dict(boxstyle="round4", fc="0.8")
  99. arrow_args = dict(arrowstyle="<-")
  100.  
  101. def getNumLeafs(myTree):
  102. numLeafs = 0
  103. for i in myTree.keys():
  104. firstStr = i
  105. break
  106. secondDict = myTree[firstStr]
  107. for key in secondDict.keys():
  108. if type(secondDict[key]).__name__=='dict':
  109. numLeafs += getNumLeafs(secondDict[key])
  110. else: numLeafs +=1
  111. return numLeafs
  112.  
  113. def getTreeDepth(myTree):
  114. maxDepth = 0
  115. for i in myTree.keys():
  116. firstStr = i
  117. break
  118. secondDict = myTree[firstStr]
  119. for key in secondDict.keys():
  120. if type(secondDict[key]).__name__=='dict':
  121. thisDepth = 1 + getTreeDepth(secondDict[key])
  122. else: thisDepth = 1
  123. if thisDepth > maxDepth: maxDepth = thisDepth
  124. return maxDepth
  125.  
  126. def plotNode(nodeTxt, centerPt, parentPt, nodeType):
  127. createPlot.ax1.annotate(nodeTxt, xy=parentPt, xycoords='axes fraction',xytext=centerPt, textcoords='axes fraction',va="center", ha="center", bbox=nodeType, arrowprops=arrow_args )
  128.  
  129. def plotMidText(cntrPt, parentPt, txtString):
  130. xMid = (parentPt[0]-cntrPt[0])/2.0 + cntrPt[0]
  131. yMid = (parentPt[1]-cntrPt[1])/2.0 + cntrPt[1]
  132. createPlot.ax1.text(xMid, yMid, txtString, va="center", ha="center", rotation=30)
  133.  
  134. def plotTree(myTree, parentPt, nodeTxt):
  135. numLeafs = getNumLeafs(myTree)
  136. depth = getTreeDepth(myTree)
  137. for i in myTree.keys():
  138. firstStr = i
  139. break
  140. cntrPt = (plotTree.xOff + (1.0 + float(numLeafs))/2.0/plotTree.totalW, plotTree.yOff)
  141. plotMidText(cntrPt, parentPt, nodeTxt)
  142. plotNode(firstStr, cntrPt, parentPt, decisionNode)
  143. secondDict = myTree[firstStr]
  144. plotTree.yOff = plotTree.yOff - 1.0/plotTree.totalD
  145. for key in secondDict.keys():
  146. if type(secondDict[key]).__name__=='dict':
  147. plotTree(secondDict[key],cntrPt,str(key))
  148. else:
  149. plotTree.xOff = plotTree.xOff + 1.0/plotTree.totalW
  150. plotNode(secondDict[key], (plotTree.xOff, plotTree.yOff), cntrPt, leafNode)
  151. plotMidText((plotTree.xOff, plotTree.yOff), cntrPt, str(key))
  152. plotTree.yOff = plotTree.yOff + 1.0/plotTree.totalD
  153.  
  154. def createPlot(inTree):
  155. fig = plt.figure(1, facecolor='white')
  156. fig.clf()
  157. axprops = dict(xticks=[], yticks=[])
  158. createPlot.ax1 = plt.subplot(111, frameon=False, **axprops)
  159. #createPlot.ax1 = plt.subplot(111, frameon=False) #ticks for demo puropses
  160. plotTree.totalW = float(getNumLeafs(inTree))
  161. plotTree.totalD = float(getTreeDepth(inTree))
  162. plotTree.xOff = -0.5/plotTree.totalW; plotTree.yOff = 1.0;
  163. plotTree(inTree, (0.5,1.0), '')
  164. plt.show()
  165.  
  166. createPlot(tree)

吴裕雄 python 机器学习-DMT(2)的更多相关文章

  1. 吴裕雄 python 机器学习-DMT(1)

    import numpy as np import operator as op from math import log def createDataSet(): dataSet = [[1, 1, ...

  2. 吴裕雄 python 机器学习——分类决策树模型

    import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_s ...

  3. 吴裕雄 python 机器学习——回归决策树模型

    import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_s ...

  4. 吴裕雄 python 机器学习——线性判断分析LinearDiscriminantAnalysis

    import numpy as np import matplotlib.pyplot as plt from matplotlib import cm from mpl_toolkits.mplot ...

  5. 吴裕雄 python 机器学习——逻辑回归

    import numpy as np import matplotlib.pyplot as plt from matplotlib import cm from mpl_toolkits.mplot ...

  6. 吴裕雄 python 机器学习——ElasticNet回归

    import numpy as np import matplotlib.pyplot as plt from matplotlib import cm from mpl_toolkits.mplot ...

  7. 吴裕雄 python 机器学习——Lasso回归

    import numpy as np import matplotlib.pyplot as plt from sklearn import datasets, linear_model from s ...

  8. 吴裕雄 python 机器学习——岭回归

    import numpy as np import matplotlib.pyplot as plt from sklearn import datasets, linear_model from s ...

  9. 吴裕雄 python 机器学习——线性回归模型

    import numpy as np from sklearn import datasets,linear_model from sklearn.model_selection import tra ...

随机推荐

  1. 怎样找到微信小程序功能呢?

    怎样找到微信小程序功能呢? 17年1月9号这个功能刚推出时,非常火,但我在微信中迟迟找不到微信小程序功能,微信明明更新到了最新版也没有:后来经过以下步骤才找到这个功能! 1.微信版本不能过低,这个一般 ...

  2. Django-models的字段类型

    model的field类型 1.models.AutoField   ---自增列 = int(11)    如果没有的话,默认会生成一个名称为 id 的列,如果要显示的自定义一个自增列,必须将给列设 ...

  3. useful tools and website

    1.https://www.processon.com/   在线流程图制作网站 2.http://www.easyicon.net/  专门下载图标的网站 3.https://www.lfd.uci ...

  4. CPU Rings, Privilege, and Protection.CPU的运行环, 特权级与保护

    原文标题:CPU Rings, Privilege, and Protection 原文地址:http://duartes.org/gustavo/blog/ [注:本人水平有限,只好挑一些国外高手的 ...

  5. Spring之jdbcTemplate:查询的三种方式(单个值、单个对象、对象集合)

    JdbcTemplateDemo2.java package helloworld.jdbcTemplate; import org.springframework.jdbc.core.JdbcTem ...

  6. WPF开发ArcGis系统时的异常信息: ArcGIS product not specified. You must first bind to an ArcGIS version prior to using any ArcGIS components.

    “System.Runtime.InteropServices.COMException”类型的未经处理的异常在 Arcgis_Test.exe 中发生 其他信息: ArcGIS product no ...

  7. [Unity动画]05.Entry & Exit & Any State

    0.状态机如下: Any State->Dying:isDying为true Dying->Reviving:isDying为false Reviving->Exit:isDying ...

  8. 【每日一学】pandas_透视表函数&交叉表函数

    每日一悟 [分开工作内外8小时] 前一个月,我经常把工作内的问题带到路上.地铁上.睡觉前,甚至是周末. 然而很快发现,我工作外的成就几乎没有,而工作内的进展也并不理想. 仔细想想,工作外是需要学新东西 ...

  9. 【Social listening实操】从社交媒体传播和文本挖掘角度解读《欢乐颂2》

    本文转自知乎 作者:苏格兰折耳喵 ----------------------------------------------------- 作为数据分析爱好者,本文作者将想从数据的角度去解读< ...

  10. Android自定义View学习笔记(一)

    绘制基础 参考:HenCoder Android 开发进阶: 自定义 View 1-1 绘制基础 Paint详解 参考:HenCoder Android 开发进阶: 自定义 View 1-2 Pain ...