汽车检测SIFT+BOW+SVM

项目来源于《opencv 3计算机视觉 python语言实现》

整个执行过程如下：

1）获取一个训练数据集。

2）创建BOW训练器并获得视觉词汇。

3）采用词汇训练SVM。

4）尝试对测试图像的图像金字塔采用滑动宽口进行检测。

5）对重叠的矩形使用非极大抑制。

6）输出结果。

该项目的结构如下：

样本与代码见连接。

主程序是car_sliding_windows.py，所有的工具都包含在car_detector文件夹中。由于使用的是Python2.7，因此在文件夹中需要一个检测模块文件__init__.py。

car_detector模块的四个文件如下：

SVM训练的模型
非极大抑制函数
图像金字塔
滑动窗口函数

图像金字塔 pyramid.py

#coding= utf-8

import cv2

"""

功能：缩放图像

输入：图片、尺度

输出：缩放后图像

"""

def resize(img, scaleFactor):

  return cv2.resize(img, (int(img.shape[1] * (1 / scaleFactor)), int(img.shape[0] * (1 / scaleFactor))), interpolation=cv2.INTER_AREA)

"""

功能：建立图像金字塔

输入：图片、尺度、最小尺寸

输出：图像金字塔

"""

def pyramid(image, scale=1.5, minSize=(200, 80)):

  yield image

  """

  yield 的作用就是把一个函数变成一个 generator，带有 yield 的函数不再是一个普通函数，Python 解释器会将其视为一个 generator，

  调用 pyramid() 不会执行 pyramid() 函数，而是返回一个 iterable 对象！在循环执行时，每次循环都会执行 pyramid 函数内部的代码，

  执行到 yield 时，pyramid() 函数就返回一个迭代值，下次迭代时，代码从 yield 的下一条语句继续执行，

  而函数的本地变量看起来和上次中断执行前是完全一样的，于是函数继续执行，直到再次遇到 yield。

  """

  while True:

    image = resize(image, scale)

    if image.shape[0] < minSize[1] or image.shape[1] < minSize[0]:

      break

    yield image

滑动窗口函数 sliding_window.py

#coding= utf-8

"""

功能：滑动窗口

输入：图像、步长、滑动窗大小

输出：图像窗口

"""

def sliding_window(image, step, window_size):

  for y in xrange(0, image.shape[0], step):

    for x in xrange(0, image.shape[1], step):

      yield (x, y, image[y:y + window_size[1], x:x + window_size[0]])

非极大抑制 non_maximum.py

这个函数对于一系列的目标候选框矩阵，按照分类器得分排序。每次将评分最高的矩形框保存，消除掉剩下的矩形框中重叠超过阈值的矩形。

#coding= utf-8

# import the necessary packages

import numpy as np

# Malisiewicz et al.

# Python port by Adrian Rosebrock

"""

功能：非极大抑制

输入：目标框、重合率

输出：最后目标框

"""

def non_max_suppression_fast(boxes, overlapThresh):

  # 如果目标框列表为空，返回空

  if len(boxes) == 0:

    return []

  # 如果目标框参数是整型，转换成浮点型

  # 这很重要，因为后面有一系列除法

  if boxes.dtype.kind == "i":

    boxes = boxes.astype("float")

  # 初始化筛选列表

  pick = []

  # 获得目标框坐标

  x1 = boxes[:,0]

  y1 = boxes[:,1]

  x2 = boxes[:,2]

  y2 = boxes[:,3]

  scores = boxes[:,4]

  # 计算所有目标框面积

  # 并将所有目标框按照score重新排列

  area = (x2 - x1 + 1) * (y2 - y1 + 1)

  idxs = np.argsort(scores)[::-1]

  # keep looping while some indexes still remain in the indexes

  # list

  while len(idxs) > 0:

    # 获得最大得分目标框索引，并放入筛选结果中

    last = len(idxs) - 1

    i = idxs[last]

    pick.append(i)

    # 获得得分最高目标框与其他目标框最大起始坐标和最小终止坐标

    xx1 = np.maximum(x1[i], x1[idxs[:last]])

    yy1 = np.maximum(y1[i], y1[idxs[:last]])

    xx2 = np.minimum(x2[i], x2[idxs[:last]])

    yy2 = np.minimum(y2[i], y2[idxs[:last]])

    # 计算最小目标框长、宽

    w = np.maximum(0, xx2 - xx1 + 1)

    h = np.maximum(0, yy2 - yy1 + 1)

    # 计算除得分最高外的所有目标框与最小目标框的重合度

    overlap = (w * h) / area[idxs[:last]]

    # 删除得分最高（已保存在筛选结果列表）、重合度大于阈值的目标框的索引

    idxs = np.delete(idxs, np.concatenate(([last],

      np.where(overlap > overlapThresh)[0])))

  # return only the bounding boxes that were picked using the

  # integer data type

  return boxes[pick].astype("int")

检测函数 detector.py

这个代码包含 SIFT特征提取、BOW图像词汇描述符获得、SVM分类器训练

#coding= utf-8

import cv2

import numpy as np

datapath = "./CarData/TrainImages/"

SAMPLES = 400

def path(cls,i):

    return "%s/%s%d.pgm"  % (datapath,cls,i+1)

# 定义 FLANN 匹配器函数

def get_flann_matcher():

  flann_params = dict(algorithm = 1, trees = 5)

  return cv2.FlannBasedMatcher(flann_params, {})

def get_bow_extractor(extract, match):

  return cv2.BOWImgDescriptorExtractor(extract, match)

# 创建 SIFT 特征检测器

def get_extract_detect():

  return cv2.xfeatures2d.SIFT_create(), cv2.xfeatures2d.SIFT_create()

def extract_sift(fn, extractor, detector):

  im = cv2.imread(fn,0)

  return extractor.compute(im, detector.detect(im))[1]

# 创建 BOW 训练器

def bow_features(img, extractor_bow, detector):

  return extractor_bow.compute(img, detector.detect(img))

def car_detector():

  pos, neg = "pos-", "neg-"

  detect, extract = get_extract_detect()

  matcher = get_flann_matcher()

  print "building BOWKMeansTrainer..."

  bow_kmeans_trainer = cv2.BOWKMeansTrainer(12)

  extract_bow = cv2.BOWImgDescriptorExtractor(extract, matcher)

  print "adding features to trainer"

  for i in range(8):

    print i

    bow_kmeans_trainer.add(extract_sift(path(pos,i), extract, detect))

    bow_kmeans_trainer.add(extract_sift(path(neg,i), extract, detect))

  vocabulary = bow_kmeans_trainer.cluster()

  extract_bow.setVocabulary(vocabulary)

  traindata, trainlabels = [],[]

  print "adding to train data"

  for i in range(SAMPLES):

    print i

    traindata.extend(bow_features(cv2.imread(path(pos, i), 0), extract_bow, detect))

    trainlabels.append(1)

    traindata.extend(bow_features(cv2.imread(path(neg, i), 0), extract_bow, detect))

    trainlabels.append(-1)

  # 创建 SVM 分类器

  svm = cv2.ml.SVM_create()

  svm.setType(cv2.ml.SVM_C_SVC)

  svm.setGamma(1)

  svm.setC(35) # 此参数决定分类器的训练误差和预测误差

  svm.setKernel(cv2.ml.SVM_RBF) # 核函数

  # 训练

  svm.train(np.array(traindata), cv2.ml.ROW_SAMPLE, np.array(trainlabels))

  return svm, extract_bow

主函数 car_sliding_windows.py

#coding= utf-8

import cv2

import numpy as np

from car_detector.detector import car_detector, bow_features

from car_detector.pyramid import pyramid

from car_detector.non_maximum import non_max_suppression_fast as nms

from car_detector.sliding_window import sliding_window

import urllib

def in_range(number, test, thresh=0.2):

  return abs(number - test) < thresh

test_image = "../images/cars.jpg" # 测试样本路径

img_path = "../images/test.jpg"

urllib.urlretrieve(test_image, img_path) # 检测文件是否存在

svm, extractor = car_detector() # 提取特征训练分类器

detect = cv2.xfeatures2d.SIFT_create() #

w, h = 100, 40

img = cv2.imread(img_path)

#img = cv2.imread(test_image)

rectangles = []

counter = 1

scaleFactor = 1.25

scale = 1

font = cv2.FONT_HERSHEY_PLAIN

for resized in pyramid(img, scaleFactor):

  scale = float(img.shape[1]) / float(resized.shape[1])

  for (x, y, roi) in sliding_window(resized, 20, (100, 40)):

    if roi.shape[1] != w or roi.shape[0] != h:

      continue

    try:

      bf = bow_features(roi, extractor, detect)

      _, result = svm.predict(bf)

      a, res = svm.predict(bf, flags=cv2.ml.STAT_MODEL_RAW_OUTPUT | cv2.ml.STAT_MODEL_UPDATE_MODEL)

      print "Class: %d, Score: %f, a: %s" % (result[0][0], res[0][0], res)

      score = res[0][0]

      if result[0][0] == 1:

        if score < -1.0:

          rx, ry, rx2, ry2 = int(x * scale), int(y * scale), int((x+w) * scale), int((y+h) * scale)

          rectangles.append([rx, ry, rx2, ry2, abs(score)])

    except:

      pass

    counter += 1 

windows = np.array(rectangles)

boxes = nms(windows, 0.25)

for (x, y, x2, y2, score) in boxes:

  print x, y, x2, y2, score

  cv2.rectangle(img, (int(x),int(y)),(int(x2), int(y2)),(0, 255, 0), 1)

  cv2.putText(img, "%f" % score, (int(x),int(y)), font, 1, (0, 255, 0))

cv2.imshow("img", img)

cv2.waitKey(0)

汽车检测SIFT+BOW+SVM的更多相关文章

图像识别sift+bow+svm
本文概述利用SIFT特征进行简单的花朵识别 SIFT算法的特点有: SIFT特征是图像的局部特征,其对旋转.尺度缩放.亮度变化保持不变性,对视角变化.仿射变换.噪声也保持一定程度的稳定性: SIFT ...
py4CV例子2汽车检测和svm算法
1.什么是汽车检测数据集: ) pos, neg = , ) matcher = cv2.FlannBasedMatcher(flann_params, {}) bow_kmeans_trainer ...
第十九节、基于传统图像处理的目标检测与识别(词袋模型BOW+SVM附代码)
在上一节.我们已经介绍了使用HOG和SVM实现目标检测和识别,这一节我们将介绍使用词袋模型BOW和SVM实现目标检测和识别. 一词袋介绍词袋模型(Bag-Of-Word)的概念最初不是针对计算机视 ...
BoW(SIFT/SURF/...)+SVM/KNN的OpenCV 实现
本文转载了文章(沈阳的博客),目的在于记录自己重复过程中遇到的问题,和更多的人分享讨论. 程序包:猛戳我物体分类物体分类是计算机视觉中一个很有意思的问题,有一些已经归类好的图片作为输入,对一些未知 ...
SIFT+BOW 实现图像检索
原文地址:https://blog.csdn.net/silence2015/article/details/77374910 本文概述图像检索是图像研究领域中一个重要的话题,广泛应用于医学,电子商 ...
【目标检测】基于传统算法的目标检测方法总结概述 Viola-Jones | HOG+SVM | DPM | NMS
"目标检测"是当前计算机视觉和机器学习领域的研究热点.从Viola-Jones Detector.DPM等冷兵器时代的智慧到当今RCNN.YOLO等深度学习土壤孕育下的GPU暴力美 ...
OpenCV 学习笔记 07 目标检测与识别
目标检测与识别是计算机视觉中最常见的挑战之一.属于高级主题. 本章节将扩展目标检测的概念,首先探讨人脸识别技术,然后将该技术应用到显示生活中的各种目标检测. 1 目标检测与识别技术为了与OpenCV ...
阅读《RobHess的SIFT源码分析：综述》笔记
今天总算是机缘巧合的找到了照样一篇纲要性质的文章. 如是能早一些找到就好了.不过“在你认为为时已晚的时候,其实还为时未晚”倒是也能聊以自慰,不过不能经常这样迷惑自己,毕竟我需要开始跑了! 就照着这个大 ...
在vs环境中跑动sift特征提取（代码部分）
因为在前两天的学习中发现.在opencv环境中跑动sift特征点提取还是比较困难的. 所以在此,进行记述. 遇到的问题分别有,csdn不愿意花费积分.配置gtk困难.教程海量然而能跑者鲜.描述不详尽等 ...

随机推荐

eclipse缓存太重，新手最容易中招
有4种方法,从上到下清理:
div浮停在页面最上或最下
div{ position:fixed; bottom:0px; // top:0px; z-index:999; } bottom:0px 浮停在最下面,top:0px 浮停在最上面:z-index ...
dbutils封装对象，单列，一行一列(用)
基本用法:查找并封装对象与对象集合 public User findUserByNamePassword(String name,String password){ QueryRunner runne ...
如何优化Mysql数据库
1.添加主键ID 2.尽量避免使用select * form table 3.创建索引对于查询占主要的应用来说,索引显得尤为重要.很多时候性能问题很简单的就是因为我们忘了添加索引而造成的,或 ...
cmake-mark_as_advanced
mark_as_advanced: Mark cmake cached variables as advanced. mark_as_advanced([CLEAR|FORCE] VAR VAR2 V ...
Cache Algorithms
1. 平均内存引用时间 T = average memory reference time m = miss ratio = 1 - (hit ratio) Tm = time to make a m ...
解决Error creating bean with name 'huayuanjingguanDaoimp' defined in file [D:\apache-tomcat-7.0.52\webapps\landscapings\WEB-INF\classes\com\itheima\landscaping\dao\imp\huayuanjingguanDaoimp.class]: Invo
问题描述: 10:23:13,585 ERROR ContextLoader:307 - Context initialization failedorg.springframework.beans. ...
(字典树)How many--hdu--2609
http://acm.hdu.edu.cn/showproblem.php?pid=2609 How many Time Limit: 2000/1000 MS (Java/Others) Me ...
TensorFlow入门案例
入门小案例,分别是回归模型建立和mnist数据集的模型建立 1.回归案例: import tensorflow as tf import numpy as np from tensorflow.exa ...
结对编程--四则运算（Java）梅进鹏欧思良
结对编程--四则运算(Java)梅进鹏欧思良 Github项目地址:https://github.com/MeiJinpen/Arithmetic 功能要求题目:实现一个自动生成小学四则运算题目的 ...