PythonOpenCV--Rtrees随机森林

360确实很个性，哈哈,你个貔貅，只吃不吐！

Rtrees介绍！参考链接：http://docs.opencv.org/modules/ml/doc/random_trees.html

原文链接：Python opencv实现的手写字符串识别--SVM 、神经网络、 K近邻、 Boosting、Rtrees

代码copy：

#http://www.360doc.com/content/14/0811/10/13997450_400977867.shtml

#code from!

#dataset:http://yann.lecun.com/exdb/mnist/

from cv2.cv import *

import cv2

import os

import struct

import numpy

class_n = 10

number_of_training_set = 2000 #0 for all, 60,000 max

number_of_test_set = 0 #0 for all, 10,000 max

#trainimagepath = r'.\data\train-images.idx3-ubyte'

trainimagepath = r'E:\Develope\EclipseWorks\MeachinLearning\Ch16_RandomTrees\data\train-images.idx3-ubyte'

#trainlabelpath = r'.\data\train-labels.idx1-ubyte'

#testimagepath = r'.\data\t10k-images.idx3-ubyte'

#testlabelpath = r'.\data\t10k-labels.idx1-ubyte'

trainlabelpath = r'E:\Develope\EclipseWorks\MeachinLearning\Ch16_RandomTrees\data\train-labels.idx1-ubyte'

testimagepath = r'E:\Develope\EclipseWorks\MeachinLearning\Ch16_RandomTrees\data\t10k-images.idx3-ubyte'

testlabelpath = r'E:\Develope\EclipseWorks\MeachinLearning\Ch16_RandomTrees\data\t10k-labels.idx1-ubyte'

def evalfun(method, y_val, test_labels, test_number_of_images):

    count = 0

    for item in range(test_number_of_images):

        if y_val[item] == test_labels[item]:

            count += 1

    print method + ':' + str(float(count)/test_number_of_images)

def unroll_samples(samples):

    sample_n, var_n = samples.shape

    new_samples = numpy.zeros((sample_n * class_n, var_n+1), numpy.float32)

    new_samples[:,:-1] = numpy.repeat(samples, class_n, axis=0)

    new_samples[:,-1] = numpy.tile(numpy.arange(class_n), sample_n)

    return new_samples

def unroll_responses(responses):

    sample_n = len(responses)

    new_responses = numpy.zeros(sample_n*class_n, numpy.int32)

    resp_idx = numpy.int32( responses + numpy.arange(sample_n)*class_n )

    new_responses[resp_idx] = 1

    return new_responses

def readImage(filepath, re_size, number_of_images_set):

    f = open(filepath, 'rb')

    byte = f.read(4)

    magic_number = struct.unpack('>i',byte)[0]

    byte = f.read(4)

    number_of_images = struct.unpack('>i',byte)[0]

    if number_of_images_set != 0:

        number_of_images = number_of_images_set

    #number_of_images = 30000

    byte = f.read(4)

    number_of_rows = struct.unpack('>i',byte)[0]

    byte = f.read(4)

    number_of_cols = struct.unpack('>i',byte)[0]

    images_readed_count = 0

    images = numpy.array([], dtype = numpy.float32)

    while images_readed_count < number_of_images:

##        print 'starting read image' + str(images_readed_count) + \

##              ' of ' + str(number_of_images)

        byte = f.read(number_of_rows * number_of_cols)

        pixels = [numpy.float32(struct.unpack('B', item)[0])/numpy.float32(255) for item in byte]

        pixels = numpy.resize(pixels, (number_of_rows, number_of_cols))

        left = number_of_cols

        right = 0

        top = number_of_rows

        bottom = 0

        for i in range(number_of_rows):

            for j in range(number_of_cols):

                if pixels[i, j] > 0:

                    if j < left:

                        left = j

                    if j > right:

                        right = j

                    if i < top:

                        top = i

                    if i > bottom:

                        bottom = i

        if (bottom-top) > (right-left):

            length = bottom - top

        else:

            length = right - left

        midx = float(right + left)/2; midy = float(bottom + top)/2

        left = int(midx - float(length)/2)

        right = int(midx + float(length)/2) + 1

        top = int(midy - float(length)/2)

        bottom = int(midy + float(length)/2) + 1

        left = left if left > 0 else 0

        right = right if right < number_of_cols else number_of_cols

        top = top if top > 0 else 0

        bottom = bottom if bottom < number_of_rows else number_of_rows

        pixels = pixels[left:right, top:bottom]

##        print str(int(midx - float(length)/2)) + ':' + \

##              str(int(midx + float(length)/2)) + ',' + \

##              str(int(midy - float(length)/2)) + ':' + \

##              str(int(midy + float(length)/2))

        pixels = numpy.resize(cv2.resize(pixels, re_size), (1, re_size[0]*re_size[1]))

        if images.size == 0:

            images = numpy.array(pixels, dtype = numpy.float32)

        else:

            images = numpy.append(images, pixels, axis = 0)

        images_readed_count += 1

    f.close()

    return number_of_images, images

def readLabel(filepath, number_of_images_set):

    f = open(filepath, 'rb')

    byte = f.read(4)

    magic_number = struct.unpack('>i',byte)[0]

    byte = f.read(4)

    number_of_images = struct.unpack('>i',byte)[0]

    if number_of_images_set != 0:

        number_of_images = number_of_images_set

    #number_of_images = 10000

    images_readed_count = 0

    labels = numpy.array([], dtype = numpy.float32)

    while images_readed_count < number_of_images:

        byte = f.read(1)

        labels = numpy.append(labels, numpy.float32(struct.unpack('B', byte)[0]))

        images_readed_count += 1

    f.close()

    return number_of_images, labels

##[self.model.predict(s) for s in samples]

## params = dict( kernel_type = cv2.SVM_LINEAR,

##                       svm_type = cv2.SVM_C_SVC,

##                       C = 1 )

##

##samples = numpy.array(numpy.random.random((4,2)), dtype = numpy.float32)

##y_train = numpy.array([1.,0.,0.,1.], dtype = numpy.float32)

##

##clf = SVM()

##clf.train(samples, y_train)

##y_val = clf.predict(samples)

def ANN_Pridict():

    to_size = (8,8)

    number_of_training_set = 2000

    train_number_of_images, train_images = readImage(trainimagepath, to_size, number_of_training_set)

    train_number_of_images, train_labels = readLabel(trainlabelpath, number_of_training_set)

    ##train_images = train_images * 255

    ##train_images = cv2.normalize(train_images)

    number_of_test_set = 0

    test_number_of_images, test_images = readImage(testimagepath, to_size, number_of_test_set)

    test_number_of_images, test_labels = readLabel(testlabelpath, number_of_test_set)

    print 'loaded images and labels.'

    ########ANN#########

    modelnn = cv2.ANN_MLP()

    sample_n, var_n = train_images.shape

    new_train_labels = unroll_responses(train_labels).reshape(-1, class_n)

    layer_sizes = numpy.int32([var_n, 100, class_n])

    modelnn.create(layer_sizes)

    params = dict( term_crit = (cv2.TERM_CRITERIA_COUNT, 300, 0.01),

                   train_method = cv2.ANN_MLP_TRAIN_PARAMS_BACKPROP,

                   bp_dw_scale = 0.001,

                   bp_moment_scale = 0.0 )

    modelnn.train(train_images, numpy.float32(new_train_labels), None, params = params)

    ret, resp = modelnn.predict(test_images)

    y_val_nn = resp.argmax(-1)

    evalfun('nn', y_val_nn, test_labels, test_number_of_images)

    return []

def KNearestPridict():

    to_size = (8,8)

    number_of_training_set = 2000

    train_number_of_images, train_images = readImage(trainimagepath, to_size, number_of_training_set)

    train_number_of_images, train_labels = readLabel(trainlabelpath, number_of_training_set)

    ##train_images = train_images * 255

    ##train_images = cv2.normalize(train_images)

    number_of_test_set = 0

    test_number_of_images, test_images = readImage(testimagepath, to_size, number_of_test_set)

    test_number_of_images, test_labels = readLabel(testlabelpath, number_of_test_set)

    print 'loaded images and labels.'

    ########KNearest#########

    modelknn = cv2.KNearest()

    modelknn.train(train_images,train_labels)

    y_val_knn= modelknn.find_nearest(test_images, k = 3)

    count_knn = 0

    for item in range(test_number_of_images):

        if y_val_knn[1][item][0] == test_labels[item]:

            count_knn += 1

    print 'knn:'+str(float(count_knn)/test_number_of_images)

    return []

def SVM_Pridict():

    to_size = (8,8)

    number_of_training_set = 2000

    train_number_of_images, train_images = readImage(trainimagepath, to_size, number_of_training_set)

    train_number_of_images, train_labels = readLabel(trainlabelpath, number_of_training_set)

    ##train_images = train_images * 255

    ##train_images = cv2.normalize(train_images)

    number_of_test_set = 0

    test_number_of_images, test_images = readImage(testimagepath, to_size, number_of_test_set)

    test_number_of_images, test_labels = readLabel(testlabelpath, number_of_test_set)

    print 'loaded images and labels.'

    #######SVM##########

    modelsvm = cv2.SVM()

    modelsvm.train(train_images, train_labels)#, params = params

    y_val_svm = [modelsvm.predict(test_image) for test_image in test_images]

    evalfun('svm', y_val_svm, test_labels, test_number_of_images)

    return []

def RTrees_Pridict():

    to_size = (8,8)

    number_of_training_set = 2000

    train_number_of_images, train_images = readImage(trainimagepath, to_size, number_of_training_set)

    train_number_of_images, train_labels = readLabel(trainlabelpath, number_of_training_set)

    ##train_images = train_images * 255

    ##train_images = cv2.normalize(train_images)

    number_of_test_set = 0

    test_number_of_images, test_images = readImage(testimagepath, to_size, number_of_test_set)

    test_number_of_images, test_labels = readLabel(testlabelpath, number_of_test_set)

    print 'loaded images and labels.'

    #######RTrees##########

    modelRTtree = cv2.RTrees()

    sample_n, var_n = train_images.shape

    var_types = numpy.array([cv2.CV_VAR_NUMERICAL] * var_n + [cv2.CV_VAR_CATEGORICAL], numpy.uint8)

    params = dict(max_depth=10)

    modelRTtree.train(train_images, cv2.CV_ROW_SAMPLE, train_labels, varType = var_types, params = params)

    y_val_RTtree = numpy.float32( [modelRTtree.predict(s) for s in test_images] )

    evalfun('RTtree', y_val_RTtree, test_labels, test_number_of_images)

    return []

def Boost_Pridict():

    to_size = (8,8)

    number_of_training_set = 2000

    train_number_of_images, train_images = readImage(trainimagepath, to_size, number_of_training_set)

    train_number_of_images, train_labels = readLabel(trainlabelpath, number_of_training_set)

    ##train_images = train_images * 255

    ##train_images = cv2.normalize(train_images)

    number_of_test_set = 0

    test_number_of_images, test_images = readImage(testimagepath, to_size, number_of_test_set)

    test_number_of_images, test_labels = readLabel(testlabelpath, number_of_test_set)

    print 'loaded images and labels.'

    #######Boost#########

    modelBoost = cv2.Boost()

    sample_n, var_n = train_images.shape

    new_train_images = unroll_samples(train_images)

    new_train_labels = unroll_responses(train_labels)

    var_types = numpy.array([cv2.CV_VAR_NUMERICAL] * var_n + [cv2.CV_VAR_CATEGORICAL, cv2.CV_VAR_CATEGORICAL], numpy.uint8)

    params = dict(max_depth=5) #, use_surrogates=False)

    modelBoost.train(new_train_images, cv2.CV_ROW_SAMPLE, new_train_labels, varType = var_types, params=params)

    new_test_images = unroll_samples(test_images)

    y_val_Boost = numpy.array( [modelBoost.predict(s, returnSum = True) for s in new_test_images] )

    y_val_Boost = y_val_Boost.reshape(-1, class_n).argmax(1)

    evalfun('Boost', y_val_Boost, test_labels, test_number_of_images)

    return []

def ML_Pridict():

    to_size = (8,8)

    number_of_training_set = 2000

    train_number_of_images, train_images = readImage(trainimagepath, to_size, number_of_training_set)

    train_number_of_images, train_labels = readLabel(trainlabelpath, number_of_training_set)

    ##train_images = train_images * 255

    ##train_images = cv2.normalize(train_images)

    number_of_test_set = 0

    test_number_of_images, test_images = readImage(testimagepath, to_size, number_of_test_set)

    test_number_of_images, test_labels = readLabel(testlabelpath, number_of_test_set)

    print 'loaded images and labels.'

    ########ANN#########

    modelnn = cv2.ANN_MLP()

    sample_n, var_n = train_images.shape

    new_train_labels = unroll_responses(train_labels).reshape(-1, class_n)

    layer_sizes = numpy.int32([var_n, 100, class_n])

    modelnn.create(layer_sizes)

    params = dict( term_crit = (cv2.TERM_CRITERIA_COUNT, 300, 0.01),

                   train_method = cv2.ANN_MLP_TRAIN_PARAMS_BACKPROP,

                   bp_dw_scale = 0.001,

                   bp_moment_scale = 0.0 )

    modelnn.train(train_images, numpy.float32(new_train_labels), None, params = params)

    ret, resp = modelnn.predict(test_images)

    y_val_nn = resp.argmax(-1)

    evalfun('nn', y_val_nn, test_labels, test_number_of_images)

    ########KNearest#########

    modelknn = cv2.KNearest()

    modelknn.train(train_images,train_labels)

    y_val_knn= modelknn.find_nearest(test_images, k = 3)

    count_knn = 0

    for item in range(test_number_of_images):

        if y_val_knn[1][item][0] == test_labels[item]:

            count_knn += 1

    print 'knn:'+str(float(count_knn)/test_number_of_images)

    #######SVM##########

    modelsvm = cv2.SVM()

    modelsvm.train(train_images, train_labels)#, params = params

    y_val_svm = [modelsvm.predict(test_image) for test_image in test_images]

    evalfun('svm', y_val_svm, test_labels, test_number_of_images)

    #######RTrees##########

    modelRTtree = cv2.RTrees()

    sample_n, var_n = train_images.shape

    var_types = numpy.array([cv2.CV_VAR_NUMERICAL] * var_n + [cv2.CV_VAR_CATEGORICAL], numpy.uint8)

    params = dict(max_depth=10)

    modelRTtree.train(train_images, cv2.CV_ROW_SAMPLE, train_labels, varType = var_types, params = params)

    y_val_RTtree = numpy.float32( [modelRTtree.predict(s) for s in test_images] )

    evalfun('RTtree', y_val_RTtree, test_labels, test_number_of_images)

    #######Boost#########

    modelBoost = cv2.Boost()

    sample_n, var_n = train_images.shape

    new_train_images = unroll_samples(train_images)

    new_train_labels = unroll_responses(train_labels)

    var_types = numpy.array([cv2.CV_VAR_NUMERICAL] * var_n + [cv2.CV_VAR_CATEGORICAL, cv2.CV_VAR_CATEGORICAL], numpy.uint8)

    params = dict(max_depth=5) #, use_surrogates=False)

    modelBoost.train(new_train_images, cv2.CV_ROW_SAMPLE, new_train_labels, varType = var_types, params=params)

    new_test_images = unroll_samples(test_images)

    y_val_Boost = numpy.array( [modelBoost.predict(s, returnSum = True) for s in new_test_images] )

    y_val_Boost = y_val_Boost.reshape(-1, class_n).argmax(1)

    evalfun('Boost', y_val_Boost, test_labels, test_number_of_images)

    return []

运行结果：

tree.SVM_Pridict()

loaded images and labels.

svm:0.6251

PythonOpenCV--Rtrees随机森林的更多相关文章

OpenCV：使用OpenCV3随机森林进行统计特征多类分析
原文链接:在opencv3中的机器学习算法练习:对OCR进行分类本文贴出的代码为自己的训练集所用,作为参考.可运行demo程序请拜访原作者. CNN作为图像识别和检测器,在分析物体结构分布的多类识别 ...
OpenCV：使用随机森林与GBDT
随机森林顾名思义,是用随机的方式建立一个森林.简单来说,随机森林就是由多棵CART(Classification And Regression Tree)构成的.对于每棵树,它们使用的训练集是从总的训 ...
scikit-learn随机森林调参小结
在Bagging与随机森林算法原理小结中,我们对随机森林(Random Forest, 以下简称RF)的原理做了总结.本文就从实践的角度对RF做一个总结.重点讲述scikit-learn中RF的调参注 ...
Bagging与随机森林算法原理小结
在集成学习原理小结中,我们讲到了集成学习有两个流派,一个是boosting派系,它的特点是各个弱学习器之间有依赖关系.另一种是bagging流派,它的特点是各个弱学习器之间没有依赖关系,可以并行拟合. ...
[Machine Learning & Algorithm] 随机森林（Random Forest）
1 什么是随机森林? 作为新兴起的.高度灵活的一种机器学习算法,随机森林(Random Forest,简称RF)拥有广泛的应用前景,从市场营销到医疗保健保险,既可以用来做市场营销模拟的建模,统计客户来 ...
使用脚本自动配置matlab安装libsvm和随机森林工具箱
前言支持向量机(SVM)和随机森林都是用于分类的机器学习算法. 这里我需要对网上的工具箱在matlab中进行配置. 效果演示: 1.双击运行“自动配置.bat” 2.matlab会自动启动,手动配 ...
paper 130：MatLab分类器大全(svm,knn,随机森林等)
train_data是训练特征数据, train_label是分类标签.Predict_label是预测的标签.MatLab训练数据, 得到语义标签向量 Scores(概率输出).1.逻辑回归(多项式 ...
kaggle数据挖掘竞赛初步--Titanic<随机森林&特征重要性>
完整代码: https://github.com/cindycindyhi/kaggle-Titanic 特征工程系列: Titanic系列之原始数据分析和数据处理 Titanic系列之数据变换 Ti ...
机器学习中的算法(1)-决策树模型组合之随机森林与GBDT
版权声明: 本文由LeftNotEasy发布于http://leftnoteasy.cnblogs.com, 本文可以被全部的转载或者部分使用,但请注明出处,如果有问题,请联系wheeleast@gm ...
ML—随机森林·1
Introduction to Random forest(Simplified) With increase in computational power, we can now choose al ...

随机推荐

eas之怎么设置单据保存或者提交完不跳到下个新增页面
this.chkMenuItemSubmitAndAddNew.setSelected(false);
Linux浅谈磁盘管理及案例
磁盘管理 MBR原理图从该图可理解到为什么主分区只能是四个. 可以不分区,但为了统一管理,提高访问效率设备不同,生成设备名称不同管理分区命令: lsblk查看块设备 fdisk创建MBR分区 f ...
Linux 中，基础命令 command not found...问题
linux中出现基本命令找不到的问题,经过查找发现是/etc/profile文件出现了问题,解决方案如下: 问题详细: [root@master ~]# ll bash: ls: command no ...
SCVMM-VMWARE ACE虚拟机管理工具
SCVMM是微软的虚拟化管理工具 VMWARE ACE是另一套虚拟化的工具这两套工具都是用来管理虚拟机的,可以直接将物理机虚拟化为虚拟机企业一旦发展到了一定阶段,并然需要自己的服务器和虚拟化环境, ...
SVN提示被锁定的解决方法（转）
1.(常用)出现这个问题后使用“清理”即"Clean up"功能,如果还不行,就直接到上一级目录,再执行“清理”,然后再“更新”. 2.(没试过)有时候如果看到某个包里面的文件夹没 ...
TensorFlow 入门之手写识别(MNIST) softmax算法二
TensorFlow 入门之手写识别(MNIST) softmax算法二 MNIST Fly softmax回归 softmax回归算法 TensorFlow实现softmax softmax回归算 ...
在AIX下面查询上一次命令
在AIX下面查询上一次命令输入 r 或者 set -o vi 用vi的操作找上一次命令: 学习了: http://blog.itpub.net/66634/viewspace-1000843/ ht ...
oracle 数据库开发面试题
近期參加了数场面试,总结一下竞聘oracle 开发岗位最常问到哪些问题: 1.delete 与 truncate 差别? 1)truncate 是DDL语句.delete 是DML语句: 2)trun ...
打破传统天价SAP培训，开创SAP师徒之路，经验丰富的老顾问带徒弟 qq群150104068
SAP领航社区,开设了一个导师性质的师徒圈子,类似大学导师带研究生,导师给学生安排课题.分配任务.分享资料,让学生自学提高.我们的教学方法是以自学为主.辅导为辅助,在实践中积累经验掌握原理.主要方向A ...
IOS总结_实现UIButton的图文混排(二)
非常久没有写博客了,之前写过一篇关于UIButton图文混排的,可是有点复杂,今天来一个比較简单地.相信大家回用得着 UIButton *button=[[UIButton alloc, , )]; ...

PythonOpenCV--Rtrees随机森林

PythonOpenCV--Rtrees随机森林的更多相关文章

随机推荐

热门专题