1. # file: dt_cls_dense_batch.py
  2. #===============================================================================
  3. # Copyright 2014-2018 Intel Corporation.
  4. #
  5. # This software and the related documents are Intel copyrighted materials, and
  6. # your use of them is governed by the express license under which they were
  7. # provided to you (License). Unless the License provides otherwise, you may not
  8. # use, modify, copy, publish, distribute, disclose or transmit this software or
  9. # the related documents without Intel's prior written permission.
  10. #
  11. # This software and the related documents are provided as is, with no express
  12. # or implied warranties, other than those that are expressly stated in the
  13. # License.
  14. #===============================================================================
  15.  
  16. ## <a name="DAAL-EXAMPLE-PY-DT_CLS_DENSE_BATCH"></a>
  17. ## \example dt_cls_dense_batch.py
  18.  
  19. import os
  20. import sys
  21.  
  22. from daal.algorithms.decision_tree.classification import prediction, training
  23. from daal.algorithms import classifier
  24. from daal.data_management import (
  25. FileDataSource, DataSourceIface, NumericTableIface, HomogenNumericTable, MergedNumericTable
  26. )
  27. utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
  28. if utils_folder not in sys.path:
  29. sys.path.insert(0, utils_folder)
  30. from utils import printNumericTables
  31.  
  32. DAAL_PREFIX = os.path.join('..', 'data')
  33.  
  34. # Input data set parameters
  35. trainDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'decision_tree_train.csv')
  36. pruneDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'decision_tree_prune.csv')
  37. testDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'decision_tree_test.csv')
  38.  
  39. nFeatures = 5
  40. nClasses = 5
  41.  
  42. # Model object for the decision tree classification algorithm
  43. model = None
  44. predictionResult = None
  45. testGroundTruth = None
  46.  
  47. def trainModel():
  48. global model
  49.  
  50. # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
  51. trainDataSource = FileDataSource(
  52. trainDatasetFileName,
  53. DataSourceIface.notAllocateNumericTable,
  54. DataSourceIface.doDictionaryFromContext
  55. )
  56.  
  57. # Create Numeric Tables for training data and labels
  58. trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
  59. trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
  60. mergedData = MergedNumericTable(trainData, trainGroundTruth)
  61.  
  62. # Retrieve the data from the input file
  63. trainDataSource.loadDataBlock(mergedData)
  64.  
  65. # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
  66. pruneDataSource = FileDataSource(
  67. pruneDatasetFileName,
  68. DataSourceIface.notAllocateNumericTable,
  69. DataSourceIface.doDictionaryFromContext
  70. )
  71.  
  72. # Create Numeric Tables for pruning data and labels
  73. pruneData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
  74. pruneGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
  75. pruneMergedData = MergedNumericTable(pruneData, pruneGroundTruth)
  76.  
  77. # Retrieve the data from the input file
  78. pruneDataSource.loadDataBlock(pruneMergedData)
  79.  
  80. # Create an algorithm object to train the decision tree classification model
  81. algorithm = training.Batch(nClasses)
  82.  
  83. # Pass the training data set and dependent values to the algorithm
  84. algorithm.input.set(classifier.training.data, trainData)
  85. algorithm.input.set(classifier.training.labels, trainGroundTruth)
  86. algorithm.input.setTable(training.dataForPruning, pruneData)
  87. algorithm.input.setTable(training.labelsForPruning, pruneGroundTruth)
  88.  
  89. # Train the decision tree classification model and retrieve the results of the training algorithm
  90. trainingResult = algorithm.compute()
  91. model = trainingResult.get(classifier.training.model)
  92.  
  93. def testModel():
  94. global testGroundTruth, predictionResult
  95.  
  96. # Initialize FileDataSource<CSVFeatureManager> to retrieve the test data from a .csv file
  97. testDataSource = FileDataSource(
  98. testDatasetFileName,
  99. DataSourceIface.notAllocateNumericTable,
  100. DataSourceIface.doDictionaryFromContext
  101. )
  102.  
  103. # Create Numeric Tables for testing data and labels
  104. testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
  105. testGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
  106. mergedData = MergedNumericTable(testData, testGroundTruth)
  107.  
  108. # Retrieve the data from input file
  109. testDataSource.loadDataBlock(mergedData)
  110.  
  111. # Create algorithm objects for decision tree classification prediction with the default method
  112. algorithm = prediction.Batch()
  113.  
  114. # Pass the testing data set and trained model to the algorithm
  115. #print("Number of columns: {}".format(testData.getNumberOfColumns()))
  116. algorithm.input.setTable(classifier.prediction.data, testData)
  117. algorithm.input.setModel(classifier.prediction.model, model)
  118.  
  119. # Compute prediction results and retrieve algorithm results
  120. # (Result class from classifier.prediction)
  121. predictionResult = algorithm.compute()
  122.  
  123. def printResults():
  124.  
  125. printNumericTables(
  126. testGroundTruth,
  127. predictionResult.get(classifier.prediction.prediction),
  128. "Ground truth", "Classification results",
  129. "Decision tree classification results (first 20 observations):",
  130. 20, flt64=False
  131. )
  132.  
  133. if __name__ == "__main__":
  134.  
  135. trainModel()
  136. testModel()
  137. printResults()

  

随机森林的:

  1. # file: df_cls_dense_batch.py
  2. #===============================================================================
  3. # Copyright 2014-2018 Intel Corporation.
  4. #
  5. # This software and the related documents are Intel copyrighted materials, and
  6. # your use of them is governed by the express license under which they were
  7. # provided to you (License). Unless the License provides otherwise, you may not
  8. # use, modify, copy, publish, distribute, disclose or transmit this software or
  9. # the related documents without Intel's prior written permission.
  10. #
  11. # This software and the related documents are provided as is, with no express
  12. # or implied warranties, other than those that are expressly stated in the
  13. # License.
  14. #===============================================================================
  15.  
  16. ## <a name="DAAL-EXAMPLE-PY-DF_CLS_DENSE_BATCH"></a>
  17. ## \example df_cls_dense_batch.py
  18.  
  19. import os
  20. import sys
  21.  
  22. from daal.algorithms import decision_forest
  23. from daal.algorithms.decision_forest.classification import prediction, training
  24. from daal.algorithms import classifier
  25. from daal.data_management import (
  26. FileDataSource, DataSourceIface, NumericTableIface, HomogenNumericTable,
  27. MergedNumericTable, features
  28. )
  29.  
  30. utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
  31. if utils_folder not in sys.path:
  32. sys.path.insert(0, utils_folder)
  33. from utils import printNumericTable, printNumericTables
  34.  
  35. DAAL_PREFIX = os.path.join('..', 'data')
  36.  
  37. # Input data set parameters
  38. trainDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'df_classification_train.csv')
  39. testDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'df_classification_test.csv')
  40.  
  41. nFeatures = 3
  42. nClasses = 5
  43.  
  44. # Decision forest parameters
  45. nTrees = 10
  46. minObservationsInLeafNode = 8
  47.  
  48. # Model object for the decision forest classification algorithm
  49. model = None
  50. predictionResult = None
  51. testGroundTruth = None
  52.  
  53. def trainModel():
  54. global model
  55.  
  56. # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
  57. trainDataSource = FileDataSource(
  58. trainDatasetFileName,
  59. DataSourceIface.notAllocateNumericTable,
  60. DataSourceIface.doDictionaryFromContext
  61. )
  62.  
  63. # Create Numeric Tables for training data and labels
  64. trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
  65. trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
  66. mergedData = MergedNumericTable(trainData, trainGroundTruth)
  67.  
  68. # Retrieve the data from the input file
  69. trainDataSource.loadDataBlock(mergedData)
  70.  
  71. # Get the dictionary and update it with additional information about data
  72. dict = trainData.getDictionary()
  73.  
  74. # Add a feature type to the dictionary
  75. dict[0].featureType = features.DAAL_CONTINUOUS
  76. dict[1].featureType = features.DAAL_CONTINUOUS
  77. dict[2].featureType = features.DAAL_CATEGORICAL
  78.  
  79. # Create an algorithm object to train the decision forest classification model
  80. algorithm = training.Batch(nClasses)
  81. algorithm.parameter.nTrees = nTrees
  82. algorithm.parameter.minObservationsInLeafNode = minObservationsInLeafNode
  83. algorithm.parameter.featuresPerNode = nFeatures
  84. algorithm.parameter.varImportance = decision_forest.training.MDI
  85. algorithm.parameter.resultsToCompute = decision_forest.training.computeOutOfBagError
  86.  
  87. # Pass the training data set and dependent values to the algorithm
  88. algorithm.input.set(classifier.training.data, trainData)
  89. algorithm.input.set(classifier.training.labels, trainGroundTruth)
  90.  
  91. # Train the decision forest classification model and retrieve the results of the training algorithm
  92. trainingResult = algorithm.compute()
  93. model = trainingResult.get(classifier.training.model)
  94. printNumericTable(trainingResult.getTable(training.variableImportance), "Variable importance results: ")
  95. printNumericTable(trainingResult.getTable(training.outOfBagError), "OOB error: ")
  96.  
  97. def testModel():
  98. global testGroundTruth, predictionResult
  99.  
  100. # Initialize FileDataSource<CSVFeatureManager> to retrieve the test data from a .csv file
  101. testDataSource = FileDataSource(
  102. testDatasetFileName,
  103. DataSourceIface.notAllocateNumericTable,
  104. DataSourceIface.doDictionaryFromContext
  105. )
  106.  
  107. # Create Numeric Tables for testing data and labels
  108. testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
  109. testGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
  110. mergedData = MergedNumericTable(testData, testGroundTruth)
  111.  
  112. # Retrieve the data from input file
  113. testDataSource.loadDataBlock(mergedData)
  114.  
  115. # Get the dictionary and update it with additional information about data
  116. dict = testData.getDictionary()
  117.  
  118. # Add a feature type to the dictionary
  119. dict[0].featureType = features.DAAL_CONTINUOUS
  120. dict[1].featureType = features.DAAL_CONTINUOUS
  121. dict[2].featureType = features.DAAL_CATEGORICAL
  122.  
  123. # Create algorithm objects for decision forest classification prediction with the default method
  124. algorithm = prediction.Batch(nClasses)
  125.  
  126. # Pass the testing data set and trained model to the algorithm
  127. algorithm.input.setTable(classifier.prediction.data, testData)
  128. algorithm.input.setModel(classifier.prediction.model, model)
  129.  
  130. # Compute prediction results and retrieve algorithm results
  131. # (Result class from classifier.prediction)
  132. predictionResult = algorithm.compute()
  133.  
  134. def printResults():
  135. printNumericTable(predictionResult.get(classifier.prediction.prediction),"Decision forest prediction results (first 10 rows):",10)
  136. printNumericTable(testGroundTruth, "Ground truth (first 10 rows):", 10);
  137.  
  138. if __name__ == "__main__":
  139.  
  140. trainModel()
  141. testModel()
  142. printResults()

  

Intel DAAL AI加速 ——传统决策树和随机森林的更多相关文章

  1. Intel DAAL AI加速——支持从数据预处理到模型预测,数据源必须使用DAAL的底层封装库

    数据源加速见官方文档(必须使用DAAL自己的库): Data Management Numeric Tables Tensors Data Sources Data Dictionaries Data ...

  2. Intel DAAL AI加速——神经网络

    # file: neural_net_dense_batch.py #================================================================= ...

  3. R语言︱决策树族——随机森林算法

    每每以为攀得众山小,可.每每又切实来到起点,大牛们,缓缓脚步来俺笔记葩分享一下吧,please~ --------------------------- 笔者寄语:有一篇<有监督学习选择深度学习 ...

  4. [ML学习笔记] 决策树与随机森林(Decision Tree&Random Forest)

    [ML学习笔记] 决策树与随机森林(Decision Tree&Random Forest) 决策树 决策树算法以树状结构表示数据分类的结果.每个决策点实现一个具有离散输出的测试函数,记为分支 ...

  5. web安全之机器学习入门——3.2 决策树与随机森林

    目录 简介 决策树简单用法 决策树检测P0P3爆破 决策树检测FTP爆破 随机森林检测FTP爆破 简介 决策树和随机森林算法是最常见的分类算法: 决策树,判断的逻辑很多时候和人的思维非常接近. 随机森 ...

  6. 逻辑斯蒂回归VS决策树VS随机森林

    LR 与SVM 不同 1.logistic regression适合需要得到一个分类概率的场景,SVM则没有分类概率 2.LR其实同样可以使用kernel,但是LR没有support vector在计 ...

  7. Machine Learning笔记整理 ------ (五)决策树、随机森林

    1. 决策树 一般的,一棵决策树包含一个根结点.若干内部结点和若干叶子结点,叶子节点对应决策结果,其他每个结点对应一个属性测试,每个结点包含的样本集合根据属性测试结果被划分到子结点中,而根结点包含样本 ...

  8. 美团店铺评价语言处理以及分类(tfidf,SVM,决策树,随机森林,Knn,ensemble)

    第一篇 数据清洗与分析部分 第二篇 可视化部分, 第三篇 朴素贝叶斯文本分类 支持向量机分类 支持向量机 网格搜索 临近法 决策树 随机森林 bagging方法 import pandas as pd ...

  9. chapter02 三种决策树模型:单一决策树、随机森林、GBDT(梯度提升决策树) 预测泰坦尼克号乘客生还情况

    单一标准的决策树:会根每维特征对预测结果的影响程度进行排序,进而决定不同特征从上至下构建分类节点的顺序.Random Forest Classifier:使用相同的训练样本同时搭建多个独立的分类模型, ...

随机推荐

  1. P1852 [国家集训队]跳跳棋

    P1852 [国家集训队]跳跳棋 lca+二分 详细解析见题解 对于每组跳棋,我们可以用一个三元组(x,y,z)表示 我们发现,这个三元组的转移具有唯一性,收束性 也就是说,把每个三元组当成点,以转移 ...

  2. system.data.sqlite的源代码下载

    帮助文档 http://system.data.sqlite.org/index.html/doc/trunk/www/index.wiki 历史版本https://system.data.sqlit ...

  3. 精巧好用的DelayQueue 转

    我们谈一下实际的场景吧.我们在开发中,有如下场景 a) 关闭空闲连接.服务器中,有很多客户端的连接,空闲一段时间之后需要关闭之.b) 缓存.缓存中的对象,超过了空闲时间,需要从缓存中移出.c) 任务超 ...

  4. IntelliJ IDEA问题总结

    在使用Idea的过程中,会遇到各种各样的问题,下面我将在这里持续总结: 1.Unable to import maven project: See logs for details 在遇到这个问题时, ...

  5. python 获取当前时间戳

    #!/usr/bin/python # -*- coding: UTF- -*- import time; # 引入time模块 ticks = time.time() print("当前时 ...

  6. c++ 对符合条件的元素进行计数(count_if)

    #include <iostream> // cout #include <algorithm> // count_if #include <vector> // ...

  7. md5 32位 加密原理 Java实现md5加密

    md5 32位 加密原理 简单概括起来,MD5 算法的过程分为四步:处理原文,设置初始值,循环加工,拼接结果. 第一步:处理原文 首先,我们计算出原文长度(bit)对 512 求余的结果,如果不等于 ...

  8. Selenium库的使用

    一.什么是Selenium selenium 是一套完整的web应用程序测试系统,包含了测试的录制(selenium IDE),编写及运行(Selenium Remote Control)和测试的并行 ...

  9. Qt加载OSg视图例子

    //QT += core gui opengl //LIBS += -losgViewer -losgDB -losgUtil -losg -lOpenThreads -losgGA -losgQt ...

  10. [ios][swift]swift中如果做基本类型的转换

    在swift中如果做基本类型的转换的?比如Int -> Float(Double)Double -> 保留两位小数String -> IntDouble -> String 有 ...