Correlation Filter in Visual Tracking

涉及两篇论文：Visual Object Tracking using Adaptive Correlation Filters 和Fast Visual Tracking via Dense Spatio-Temporal Context Learning

可参考这位博主笔记：http://www.cnblogs.com/hanhuili/p/4266990.html

第一篇我说下自己的理解：训练时的输出都认为是高斯形状，因为这种形状符合PSR。

训练得到模板后开始跟踪，由输出继续按照新的规则更校模板，进行跟踪。

第二篇主要用到了上下文的信息，通过背景信息来确定目标的位置。可参考这篇博文：http://blog.csdn.net/zouxy09/article/details/16889905，博主还将其用C++实现了,很有启发性。

STCTracker.h

// Fast object tracking algorithm

// Author : zouxy

// Date   : 2013-11-21

// HomePage : http://blog.csdn.net/zouxy09

// Email  : zouxy09@qq.com

// Reference: Kaihua Zhang, et al. Fast Tracking via Spatio-Temporal Context Learning

// HomePage : http://www4.comp.polyu.edu.hk/~cskhzhang/

// Email: zhkhua@gmail.com

#pragma once  

#include <opencv2/opencv.hpp>  

using namespace cv;

using namespace std;  

class STCTracker

{

public:

    STCTracker();

    ~STCTracker();

    void init(const Mat frame, const Rect box);

    void tracking(const Mat frame, Rect &trackBox);  

private:

    void createHammingWin();

    void complexOperation(const Mat src1, const Mat src2, Mat &dst, int flag = );

    void getCxtPriorPosteriorModel(const Mat image);

    void learnSTCModel(const Mat image);  

private:

    double sigma;           // scale parameter (variance)

    double alpha;           // scale parameter

    double beta;            // shape parameter

    double rho;             // learning parameter

    Point center;           // the object position

    Rect cxtRegion;         // context region  

    Mat cxtPriorPro;        // prior probability

    Mat cxtPosteriorPro;    // posterior probability

    Mat STModel;            // conditional probability

    Mat STCModel;           // spatio-temporal context model

    Mat hammingWin;         // Hamming window

};

STCTracker.cpp

// Fast object tracking algorithm

// Author : zouxy

// Date   : 2013-11-21

// HomePage : http://blog.csdn.net/zouxy09

// Email  : zouxy09@qq.com

// Reference: Kaihua Zhang, et al. Fast Tracking via Spatio-Temporal Context Learning

// HomePage : http://www4.comp.polyu.edu.hk/~cskhzhang/

// Email: zhkhua@gmail.com   

#include "STCTracker.h"  

STCTracker::STCTracker()

{  

}  

STCTracker::~STCTracker()

{  

}  

/************ Create a Hamming window ********************/

void STCTracker::createHammingWin()

{

    for (int i = ; i < hammingWin.rows; i++)

    {

        for (int j = ; j < hammingWin.cols; j++)

        {

            hammingWin.at<double>(i, j) = (0.54 - 0.46 * cos(  * CV_PI * i / hammingWin.rows ))

                                        * (0.54 - 0.46 * cos(  * CV_PI * j / hammingWin.cols ));

        }

    }

}  

/************ Define two complex-value operation *****************/

void STCTracker::complexOperation(const Mat src1, const Mat src2, Mat &dst, int flag)

{

    CV_Assert(src1.size == src2.size);

    CV_Assert(src1.channels() == );  

    Mat A_Real, A_Imag, B_Real, B_Imag, R_Real, R_Imag;

    vector<Mat> planes;

    split(src1, planes);

    planes[].copyTo(A_Real);

    planes[].copyTo(A_Imag);  

    split(src2, planes);

    planes[].copyTo(B_Real);

    planes[].copyTo(B_Imag);  

    dst.create(src1.rows, src1.cols, CV_64FC2);

    split(dst, planes);

    R_Real = planes[];

    R_Imag = planes[];  

    for (int i = ; i < A_Real.rows; i++)

    {

        for (int j = ; j < A_Real.cols; j++)

        {

            double a = A_Real.at<double>(i, j);

            double b = A_Imag.at<double>(i, j);

            double c = B_Real.at<double>(i, j);

            double d = B_Imag.at<double>(i, j);  

            if (flag)

            {

                // division: (a+bj) / (c+dj)

                R_Real.at<double>(i, j) = (a * c + b * d) / (c * c + d * d + 0.000001);

                R_Imag.at<double>(i, j) = (b * c - a * d) / (c * c + d * d + 0.000001);

            }

            else

            {

                // multiplication: (a+bj) * (c+dj)

                R_Real.at<double>(i, j) = a * c - b * d;

                R_Imag.at<double>(i, j) = b * c + a * d;

            }

        }

    }

    merge(planes, dst);

}  

/************ Get context prior and posterior probability ***********/

void STCTracker::getCxtPriorPosteriorModel(const Mat image)

{

    CV_Assert(image.size == cxtPriorPro.size);  

    double sum_prior(), sum_post();

    for (int i = ; i < cxtRegion.height; i++)

    {

        for (int j = ; j < cxtRegion.width; j++)

        {

            double x = j + cxtRegion.x;

            double y = i + cxtRegion.y;

            double dist = sqrt((center.x - x) * (center.x - x) + (center.y - y) * (center.y - y));  

            // equation (5) in the paper

            cxtPriorPro.at<double>(i, j) = exp(- dist * dist / ( * sigma * sigma));

            sum_prior += cxtPriorPro.at<double>(i, j);  

            // equation (6) in the paper

            cxtPosteriorPro.at<double>(i, j) = exp(- pow(dist / sqrt(alpha), beta));

            sum_post += cxtPosteriorPro.at<double>(i, j);

        }

    }

    cxtPriorPro.convertTo(cxtPriorPro, -, 1.0/sum_prior);

    cxtPriorPro = cxtPriorPro.mul(image);

    cxtPosteriorPro.convertTo(cxtPosteriorPro, -, 1.0/sum_post);

}  

/************ Learn Spatio-Temporal Context Model ***********/

void STCTracker::learnSTCModel(const Mat image)

{

    // step 1: Get context prior and posterior probability

    getCxtPriorPosteriorModel(image);  

    // step 2-1: Execute 2D DFT for prior probability

    Mat priorFourier;

    Mat planes1[] = {cxtPriorPro, Mat::zeros(cxtPriorPro.size(), CV_64F)};

    merge(planes1, , priorFourier);

    dft(priorFourier, priorFourier);  

    // step 2-2: Execute 2D DFT for posterior probability

    Mat postFourier;

    Mat planes2[] = {cxtPosteriorPro, Mat::zeros(cxtPosteriorPro.size(), CV_64F)};

    merge(planes2, , postFourier);

    dft(postFourier, postFourier);  

    // step 3: Calculate the division

    Mat conditionalFourier;

    complexOperation(postFourier, priorFourier, conditionalFourier, );  

    // step 4: Execute 2D inverse DFT for conditional probability and we obtain STModel

    dft(conditionalFourier, STModel, DFT_INVERSE | DFT_REAL_OUTPUT | DFT_SCALE);  

    // step 5: Use the learned spatial context model to update spatio-temporal context model

    addWeighted(STCModel, 1.0 - rho, STModel, rho, 0.0, STCModel);

}  

/************ Initialize the hyper parameters and models ***********/

void STCTracker::init(const Mat frame, const Rect box)

{

    // initial some parameters

    alpha = 2.25;

    beta = ;

    rho = 0.075;

    sigma = 0.5 * (box.width + box.height);  

    // the object position

    center.x = box.x + 0.5 * box.width;

    center.y = box.y + 0.5 * box.height;  

    // the context region

    cxtRegion.width =  * box.width;

    cxtRegion.height =  * box.height;

    cxtRegion.x = center.x - cxtRegion.width * 0.5;

    cxtRegion.y = center.y - cxtRegion.height * 0.5;

    cxtRegion &= Rect(, , frame.cols, frame.rows);  

    // the prior, posterior and conditional probability and spatio-temporal context model

    cxtPriorPro = Mat::zeros(cxtRegion.height, cxtRegion.width, CV_64FC1);

    cxtPosteriorPro = Mat::zeros(cxtRegion.height, cxtRegion.width, CV_64FC1);

    STModel = Mat::zeros(cxtRegion.height, cxtRegion.width, CV_64FC1);

    STCModel = Mat::zeros(cxtRegion.height, cxtRegion.width, CV_64FC1);  

    // create a Hamming window

    hammingWin = Mat::zeros(cxtRegion.height, cxtRegion.width, CV_64FC1);

    createHammingWin();  

    Mat gray;

    cvtColor(frame, gray, CV_RGB2GRAY);  

    // normalized by subtracting the average intensity of that region

    Scalar average = mean(gray(cxtRegion));

    Mat context;

    gray(cxtRegion).convertTo(context, CV_64FC1, 1.0, - average[]);  

    // multiplies a Hamming window to reduce the frequency effect of image boundary

    context = context.mul(hammingWin);  

    // learn Spatio-Temporal context model from first frame

    learnSTCModel(context);

}  

/******** STCTracker: calculate the confidence map and find the max position *******/

void STCTracker::tracking(const Mat frame, Rect &trackBox)

{

    Mat gray;

    cvtColor(frame, gray, CV_RGB2GRAY);  

    // normalized by subtracting the average intensity of that region

    Scalar average = mean(gray(cxtRegion));

    Mat context;

    gray(cxtRegion).convertTo(context, CV_64FC1, 1.0, - average[]);  

    // multiplies a Hamming window to reduce the frequency effect of image boundary

    context = context.mul(hammingWin);  

    // step 1: Get context prior probability

    getCxtPriorPosteriorModel(context);  

    // step 2-1: Execute 2D DFT for prior probability

    Mat priorFourier;

    Mat planes1[] = {cxtPriorPro, Mat::zeros(cxtPriorPro.size(), CV_64F)};

    merge(planes1, , priorFourier);

    dft(priorFourier, priorFourier);  

    // step 2-2: Execute 2D DFT for conditional probability

    Mat STCModelFourier;

    Mat planes2[] = {STCModel, Mat::zeros(STCModel.size(), CV_64F)};

    merge(planes2, , STCModelFourier);

    dft(STCModelFourier, STCModelFourier);  

    // step 3: Calculate the multiplication

    Mat postFourier;

    complexOperation(STCModelFourier, priorFourier, postFourier, );  

    // step 4: Execute 2D inverse DFT for posterior probability namely confidence map

    Mat confidenceMap;

    dft(postFourier, confidenceMap, DFT_INVERSE | DFT_REAL_OUTPUT| DFT_SCALE);  

    // step 5: Find the max position

    Point point;

    minMaxLoc(confidenceMap, , , , &point);  

    // step 6-1: update center, trackBox and context region

    center.x = cxtRegion.x + point.x;

    center.y = cxtRegion.y + point.y;

    trackBox.x = center.x - 0.5 * trackBox.width;

    trackBox.y = center.y - 0.5 * trackBox.height;

    trackBox &= Rect(, , frame.cols, frame.rows);  

    cxtRegion.x = center.x - cxtRegion.width * 0.5;

    cxtRegion.y = center.y - cxtRegion.height * 0.5;

    cxtRegion &= Rect(, , frame.cols, frame.rows);  

    // step 7: learn Spatio-Temporal context model from this frame for tracking next frame

    average = mean(gray(cxtRegion));

    gray(cxtRegion).convertTo(context, CV_64FC1, 1.0, - average[]);

    context = context.mul(hammingWin);

    learnSTCModel(context);

}

runTracker.cpp

// Fast object tracking algorithm

// Author : zouxy

// Date   : 2013-11-21

// HomePage : http://blog.csdn.net/zouxy09

// Email  : zouxy09@qq.com

// Reference: Kaihua Zhang, et al. Fast Tracking via Spatio-Temporal Context Learning

// HomePage : http://www4.comp.polyu.edu.hk/~cskhzhang/

// Email: zhkhua@gmail.com   

#include "STCTracker.h"  

// Global variables

Rect box;

bool drawing_box = false;

bool gotBB = false;  

// bounding box mouse callback

void mouseHandler(int event, int x, int y, int flags, void *param){

  switch( event ){

  case CV_EVENT_MOUSEMOVE:

    if (drawing_box){

        box.width = x-box.x;

        box.height = y-box.y;

    }

    break;

  case CV_EVENT_LBUTTONDOWN:

    drawing_box = true;

    box = Rect( x, y, ,  );

    break;

  case CV_EVENT_LBUTTONUP:

    drawing_box = false;

    if( box.width <  ){

        box.x += box.width;

        box.width *= -;

    }

    if( box.height <  ){

        box.y += box.height;

        box.height *= -;

    }

    gotBB = true;

    break;

  }

}  

int main(int argc, char * argv[])

{

    VideoCapture capture;

    capture.open("handwave.wmv");

    bool fromfile = true;  

    if (!capture.isOpened())

    {

        cout << "capture device failed to open!" << endl;

        return -;

    }

    //Register mouse callback to draw the bounding box

    cvNamedWindow("Tracker", CV_WINDOW_AUTOSIZE);

    cvSetMouseCallback("Tracker", mouseHandler, NULL );   

    Mat frame;

    capture >> frame;

    while(!gotBB)

    {

        if (!fromfile)

            capture >> frame;  

        imshow("Tracker", frame);

        if (cvWaitKey() == )

            return ;

    }

    //Remove callback

    cvSetMouseCallback("Tracker", NULL, NULL );   

    STCTracker stcTracker;

    stcTracker.init(frame, box);  

    int frameCount = ;

    while ()

    {

        capture >> frame;

        if (frame.empty())

            return -;

        double t = (double)cvGetTickCount();

        frameCount++;  

        // tracking

        stcTracker.tracking(frame, box);      

        // show the result

        stringstream buf;

        buf << frameCount;

        string num = buf.str();

        putText(frame, num, Point(, ), FONT_HERSHEY_SIMPLEX, , Scalar(, , ), );

        rectangle(frame, box, Scalar(, , ), );

        imshow("Tracker", frame);  

        t = (double)cvGetTickCount() - t;

        cout << "cost time: " << t / ((double)cvGetTickFrequency()*.) << endl;  

        if ( cvWaitKey() ==  )

            break;

    }  

    return ;

}

这篇论文的原码作者已经看出，非常的简洁，有空再研究。

文中还将生成模型和判别模型进行了对比。生成模型一般就是学习一个外表模型来表示目标，然后寻找最匹配的图像区域。

判别模型把跟踪作为一个分类问题，评估目标和背景的决策边界。

为了便于理解，我把流程图画了下来，visio用的还不熟，不知道拐弯的箭头咋画，所以那个循环没画出来

Correlation Filter in Visual Tracking的更多相关文章

Correlation Filter in Visual Tracking系列一：Visual Object Tracking using Adaptive Correlation Filters 论文笔记
Visual Object Tracking using Adaptive Correlation Filters 一文发表于2010的CVPR上,是笔者所知的第一篇将correlation filt ...
Correlation Filter in Visual Tracking系列二：Fast Visual Tracking via Dense Spatio-Temporal Context Learning 论文笔记
原文再续,书接一上回.话说上一次我们讲到了Correlation Filter类 tracker的老祖宗MOSSE,那么接下来就让我们看看如何对其进一步地优化改良.这次要谈的论文是我们国内Zhang ...
Particle filter for visual tracking
Kalman Filter Cons: Kalman filtering is inadequate because it is based on the unimodal Gaussian dist ...
Multi-hierarchical Independent Correlation Filters for Visual Tracking（MFT）略读
作者写道: 有幸在本届的VOT 2018 主赛中,我们的参赛方案Multi-solution Fusion for Visual Tracking(MFT)获得第一名的成绩,通过结果来看,MFT无论在 ...
correlation filters in object tracking
http://www.cnblogs.com/hanhuili/p/4266990.html Correlation Filter in Visual Tracking系列一:Visual Objec ...
Summary on Visual Tracking: Paper List, Benchmarks and Top Groups
Summary on Visual Tracking: Paper List, Benchmarks and Top Groups 2018-07-26 10:32:15 This blog is c ...
Learning Spatial-Temporal Regularized Correlation Filters for Visual Tracking---随笔
Learning Spatial-Temporal Regularized Correlation Filters for Visual Tracking DCF跟踪算法因边界效应,鲁棒性较差.SRD ...
论文笔记：Attentional Correlation Filter Network for Adaptive Visual Tracking
Attentional Correlation Filter Network for Adaptive Visual Tracking CVPR2017 摘要:本文提出一种新的带有注意机制的跟踪框架, ...
论文笔记之： Hierarchical Convolutional Features for Visual Tracking
Hierarchical Convolutional Features for Visual Tracking ICCV 2015 摘要:跟卢湖川的那个文章一样,本文也是利用深度学习各个 layer ...

随机推荐

ejs
这个博客比较专业些http://sunnyhl.iteye.com/blog/1985539 ejs速度不是最快的,推荐最多大概是因为其简单的语法结构.主要通过<% %><%=%&g ...
标准I/O
在程序运行时,会默认为我们打开三个流:标准输入流.标准输出流.标准出错流. 标准输入流一般对应我们的键盘标准输出流一般对应显示器标准出错流一般也对应显示器 1.标准输入流在标准I/O中,java ...
[Json.net]快速入门
引言有个朋友问了一个如何更方便的解析json格式字符串,之前也没怎么研究过json.net,就上网帮他查了一下,现学现卖的给他整了一个demo,这才发现json.net的强大,用着很方便. Json ...
Excel解析与导入导出
第三次结对编程作业结对成员: 031302610黄志鹏 031302603 陈波功能分析 1.将初始排课表excel导入系统数据库 2.将系统数据库的排课数据显示在web界面实现思路一.实现将 ...
python 参数
摘自:http://www.liaoxuefeng.com/wiki/001374738125095c955c1e6d8bb493182103fac9270762a000/00137473844933 ...
hdu3487 伸展树（区间搬移区间旋转）
对于区间旋转使用lazy思想就能解决.然后对于区间搬移,先把a-1结点做根,b+1作为它的右孩子,这样ch[ch[root][1]][0]就是区间[a,b],现将他取出. 然后在将当前的树伸展,把c结 ...
网络包处理工具NetBee
What is NetBee? NetBee is a new library intended for several types of packet processing, such as pac ...
JQueryEasyUI datagrid框架的基本使用
http://www.jb51.net/article/35348.htm 今天说说这个datagrid框架的基本使用,这个框架一直以来都是大家比较头疼的框架,尤其是Json数据的拼接,后台前台都很重 ...
弱键（Weak Key, ACM/ICPC Seoul 2004, UVa1618）
I think: 给出k(4≤k≤5000)个互不相同的整数组成的序列Ni,判断是否存在4个整数Np.Nq.Nr和Ns(1≤p<q<r<s≤k),使得Nq>Ns>Np&g ...
(转)CSS3 @font-face
@font-face是CSS3中的一个模块,他主要是把自己定义的Web字体嵌入到你的网页中,随着@font-face模块的出现,我们在Web的开发中使用字体不怕只能使用Web安全字体,你们当中或许有许 ...

Correlation Filter in Visual Tracking

Correlation Filter in Visual Tracking的更多相关文章

随机推荐

热门专题