今天我们来介绍车牌定位中的一种新方法--文字定位方法(MSER),包括其主要设计思想与实现。接着我们会介绍一下EasyPR v1.5-beta版本中带来的几项改动。

一. 文字定位法


  针对这种现象,EasyPR v1.5增加了一种新的定位方法,文字定位方法,大幅度改善了这些问题。下面几幅图可以说明文字定位法的效果。


图1 夜间的车牌图像(左) , 图2 对比度非常低的图像(右)


图3 近距离的图像(左) , 图4 高分辨率的图像(右)


  文字定位方法是采用了低级过滤器提取文字,然后再将其组合的一种定位方法。原先是利用在场景中定位文字,在这里利用其定位车牌。与在扫描文档中的文字不同,自然场景中的文字具有低对比度,背景各异,光亮干扰较多等情况,因此需要一个极为鲁棒的方法去提取出来。目前业界用的较多的是MSER(最大稳定极值区域)方法。EasyPR使用的是MSER的一个改良方法,专门针对文字进行了优化。在文字定位出来以后,一般需要用一个分类器将其中大部分的定位错误的文字去掉,例如ANN模型。为了获得最终的车牌,这些文字需要组合起来。由于实际情况的复杂,简单的使用普通的聚类效果往往不好,因此EasyPR使用了一种鲁棒性较强的种子生长方法(seed growing)去组合。


 //! use verify size to first generate char candidates
void mserCharMatch(const Mat &src, std::vector<Mat> &match, std::vector<CPlate>& out_plateVec_blue, std::vector<CPlate>& out_plateVec_yellow,
bool usePlateMser, std::vector<RotatedRect>& out_plateRRect_blue, std::vector<RotatedRect>& out_plateRRect_yellow, int img_index,
bool showDebug) {
Mat image = src; std::vector<std::vector<std::vector<Point>>> all_contours;
std::vector<std::vector<Rect>> all_boxes;
all_boxes.at().reserve(); match.resize(); std::vector<Color> flags;
flags.push_back(YELLOW); const int imageArea = image.rows * image.cols;
const int delta = ;
//const int delta = CParams::instance()->getParam2i();;
const int minArea = ;
const double maxAreaRatio = 0.05; Ptr<MSER2> mser;
mser = MSER2::create(delta, minArea, int(maxAreaRatio * imageArea));
mser->detectRegions(image, all_contours.at(), all_boxes.at(), all_contours.at(), all_boxes.at()); // mser detect
// color_index = 0 : mser-, detect white characters, which is in blue plate.
// color_index = 1 : mser+, detect dark characters, which is in yellow plate. #pragma omp parallel for
for (int color_index = ; color_index < ; color_index++) {
Color the_color = flags.at(color_index); std::vector<CCharacter> charVec;
charVec.reserve(); match.at(color_index) = Mat::zeros(image.rows, image.cols, image.type()); Mat result = image.clone();
cvtColor(result, result, COLOR_GRAY2BGR); size_t size = all_contours.at(color_index).size(); int char_index = ;
int char_size = ; // Chinese plate has max 7 characters.
const int char_max_count = ; // verify char size and output to rects;
for (size_t index = ; index < size; index++) {
Rect rect = all_boxes.at(color_index)[index];
std::vector<Point>& contour = all_contours.at(color_index)[index]; // sometimes a plate could be a mser rect, so we could
// also use mser algorithm to find plate
if (usePlateMser) {
RotatedRect rrect = minAreaRect(Mat(contour));
if (verifyRotatedPlateSizes(rrect)) {
//rotatedRectangle(result, rrect, Scalar(255, 0, 0), 2);
if (the_color == BLUE) out_plateRRect_blue.push_back(rrect);
if (the_color == YELLOW) out_plateRRect_yellow.push_back(rrect);
} // find character
if (verifyCharSizes(rect)) {
Mat mserMat = adaptive_image_from_points(contour, rect, Size(char_size, char_size));
Mat charInput = preprocessChar(mserMat, char_size);
Rect charRect = rect; Point center(charRect.tl().x + charRect.width / , charRect.tl().y + charRect.height / );
Mat tmpMat;
double ostu_level = cv::threshold(image(charRect), tmpMat, , , CV_THRESH_BINARY | CV_THRESH_OTSU); //cv::circle(result, center, 3, Scalar(0, 0, 255), 2); // use judegMDOratio2 function to
// remove the small lines in character like "zh-cuan"
if (judegMDOratio2(image, rect, contour, result)) {
CCharacter charCandidate;
} // improtant, use matrix multiplication to acclerate the
// classification of many samples. use the character
// score, we can use non-maximum superssion (nms) to
// reduce the characters which are not likely to be true
// charaters, and use the score to select the strong seed
// of which the score is larger than 0.9
CharsIdentify::instance()->classify(charVec); // use nms to remove the character are not likely to be true.
double overlapThresh = 0.6;
//double overlapThresh = CParams::instance()->getParam1f();
NMStoCharacter(charVec, overlapThresh);
charVec.shrink_to_fit(); std::vector<CCharacter> strongSeedVec;
std::vector<CCharacter> weakSeedVec;
std::vector<CCharacter> littleSeedVec;
littleSeedVec.reserve(); //size_t charCan_size = charVec.size();
for (auto charCandidate : charVec) {
//CCharacter& charCandidate = charVec[char_index];
Rect rect = charCandidate.getCharacterPos();
double score = charCandidate.getCharacterScore();
if (charCandidate.getIsStrong()) {
else if (charCandidate.getIsWeak()) {
//cv::rectangle(result, rect, Scalar(255, 0, 255));
else if (charCandidate.getIsLittle()) {
//cv::rectangle(result, rect, Scalar(255, 0, 255));
} std::vector<CCharacter> searchCandidate = charVec; // nms to srong seed, only leave the strongest one
overlapThresh = 0.3;
NMStoCharacter(strongSeedVec, overlapThresh); // merge chars to group
std::vector<std::vector<CCharacter>> charGroupVec;
mergeCharToGroup(strongSeedVec, charGroupVec); // genenrate the line of the group
// based on the assumptions , the mser rects which are
// given high socre by character classifier could be no doubtly
// be the characters in one plate, and we can use these characeters
// to fit a line which is the middle line of the plate.
std::vector<CPlate> plateVec;
for (auto charGroup : charGroupVec) {
Rect plateResult = charGroup[].getCharacterPos();
std::vector<Point> points;
points.reserve(); Vec4f line;
int maxarea = ;
Rect maxrect;
double ostu_level_sum = ; int leftx = image.cols;
Point leftPoint(leftx, );
int rightx = ;
Point rightPoint(rightx, ); std::vector<CCharacter> mserCharVec;
mserCharVec.reserve(); // remove outlier CharGroup
std::vector<CCharacter> roCharGroup;
roCharGroup.reserve(); removeRightOutliers(charGroup, roCharGroup, 0.2, 0.5, result);
//roCharGroup = charGroup; for (auto character : roCharGroup) {
Rect charRect = character.getCharacterPos();
cv::rectangle(result, charRect, Scalar(, , ), );
plateResult |= charRect; Point center(charRect.tl().x + charRect.width / , charRect.tl().y + charRect.height / );
//cv::circle(result, center, 3, Scalar(0, 255, 0), 2); ostu_level_sum += character.getOstuLevel(); if (charRect.area() > maxarea) {
maxrect = charRect;
maxarea = charRect.area();
if (center.x < leftPoint.x) {
leftPoint = center;
if (center.x > rightPoint.x) {
rightPoint = center;
} double ostu_level_avg = ostu_level_sum / (double)roCharGroup.size();
if ( && showDebug) {
std::cout << "ostu_level_avg:" << ostu_level_avg << std::endl;
float ratio_maxrect = (float)maxrect.width / (float)maxrect.height; if (points.size() >= && ratio_maxrect >= 0.3) {
fitLine(Mat(points), line, CV_DIST_L2, , 0.01, 0.01); float k = line[] / line[];
//float angle = atan(k) * 180 / (float)CV_PI;
//std::cout << "k:" << k << std::endl;
//std::cout << "angle:" << angle << std::endl;
//std::cout << "cos:" << 0.3 * cos(k) << std::endl;
//std::cout << "ratio_maxrect:" << ratio_maxrect << std::endl; std::sort(mserCharVec.begin(), mserCharVec.end(),
[](const CCharacter& r1, const CCharacter& r2) {
return r1.getCharacterPos().tl().x < r2.getCharacterPos().tl().x;
}); CCharacter midChar = mserCharVec.at(int(mserCharVec.size() / .f));
Rect midRect = midChar.getCharacterPos();
Point midCenter(midRect.tl().x + midRect.width / , midRect.tl().y + midRect.height / ); int mindist = * maxrect.width;
std::vector<Vec2i> distVecVec;
distVecVec.reserve(); Vec2i mindistVec;
Vec2i avgdistVec; // computer the dist which is the distacne between
// two near characters in the plate, use dist we can
// judege how to computer the max search range, and choose the
// best location of the sliding window in the next steps.
for (size_t mser_i = ; mser_i + < mserCharVec.size(); mser_i++) {
Rect charRect = mserCharVec.at(mser_i).getCharacterPos();
Point center(charRect.tl().x + charRect.width / , charRect.tl().y + charRect.height / ); Rect charRectCompare = mserCharVec.at(mser_i + ).getCharacterPos();
Point centerCompare(charRectCompare.tl().x + charRectCompare.width / ,
charRectCompare.tl().y + charRectCompare.height / ); int dist = charRectCompare.x - charRect.x;
Vec2i distVec(charRectCompare.x - charRect.x, charRectCompare.y - charRect.y);
distVecVec.push_back(distVec); //if (dist < mindist) {
// mindist = dist;
// mindistVec = distVec;
} std::sort(distVecVec.begin(), distVecVec.end(),
[](const Vec2i& r1, const Vec2i& r2) {
return r1[] < r2[];
}); avgdistVec = distVecVec.at(int((distVecVec.size() - ) / .f)); //float step = 10.f * (float)maxrect.width;
//float step = (float)mindistVec[0];
float step = (float)avgdistVec[]; //cv::line(result, Point2f(line[2] - step, line[3] - k*step), Point2f(line[2] + step, k*step + line[3]), Scalar(255, 255, 255));
cv::line(result, Point2f(midCenter.x - step, midCenter.y - k*step), Point2f(midCenter.x + step, k*step + midCenter.y), Scalar(, , ));
//cv::circle(result, leftPoint, 3, Scalar(0, 0, 255), 2); CPlate plate;
plate.setPlateRightPoint(rightPoint); plate.setPlateLine(line);
plate.setOstuLevel(ostu_level_avg); plate.setPlateMergeCharRect(plateResult);
} // use strong seed to construct the first shape of the plate,
// then we need to find characters which are the weak seed.
// because we use strong seed to build the middle lines of the plate,
// we can simply use this to consider weak seeds only lie in the
// near place of the middle line
for (auto plate : plateVec) {
Vec4f line = plate.getPlateLine();
Point leftPoint = plate.getPlateLeftPoint();
Point rightPoint = plate.getPlateRightPoint(); Rect plateResult = plate.getPlateMergeCharRect();
Rect maxrect = plate.getPlateMaxCharRect();
Vec2i dist = plate.getPlateDistVec();
double ostu_level = plate.getOstuLevel(); std::vector<CCharacter> mserCharacter = plate.getCopyOfMserCharacters();
mserCharacter.reserve(); float k = line[] / line[];
float x_1 = line[];
float y_1 = line[]; std::vector<CCharacter> searchWeakSeedVec;
searchWeakSeedVec.reserve(); std::vector<CCharacter> searchRightWeakSeed;
std::vector<CCharacter> searchLeftWeakSeed;
searchLeftWeakSeed.reserve(); std::vector<CCharacter> slideRightWindow;
std::vector<CCharacter> slideLeftWindow;
slideLeftWindow.reserve(); // draw weak seed and little seed from line;
// search for mser rect
if ( && showDebug) {
std::cout << "search for mser rect:" << std::endl;
} if ( && showDebug) {
std::stringstream ss(std::stringstream::in | std::stringstream::out);
ss << "resources/image/tmp/" << img_index << "_1_" << "searcgMserRect.jpg";
imwrite(ss.str(), result);
if ( && showDebug) {
std::cout << "mserCharacter:" << mserCharacter.size() << std::endl;
} // if the count of strong seed is larger than max count, we dont need
// all the next steps, if not, we first need to search the weak seed in
// the same line as the strong seed. The judge condition contains the distance
// between strong seed and weak seed , and the rect simily of each other to improve
// the roubustnedd of the seed growing algorithm.
if (mserCharacter.size() < char_max_count) {
double thresh1 = 0.15;
double thresh2 = 2.0;
searchWeakSeed(searchCandidate, searchRightWeakSeed, thresh1, thresh2, line, rightPoint,
maxrect, plateResult, result, CharSearchDirection::RIGHT);
if ( && showDebug) {
std::cout << "searchRightWeakSeed:" << searchRightWeakSeed.size() << std::endl;
for (auto seed : searchRightWeakSeed) {
cv::rectangle(result, seed.getCharacterPos(), Scalar(, , ), );
} searchWeakSeed(searchCandidate, searchLeftWeakSeed, thresh1, thresh2, line, leftPoint,
maxrect, plateResult, result, CharSearchDirection::LEFT);
if ( && showDebug) {
std::cout << "searchLeftWeakSeed:" << searchLeftWeakSeed.size() << std::endl;
for (auto seed : searchLeftWeakSeed) {
cv::rectangle(result, seed.getCharacterPos(), Scalar(, , ), );
} // sometimes the weak seed is in the middle of the strong seed.
// and sometimes two strong seed are actually the two parts of one character.
// because we only consider the weak seed in the left and right direction of strong seed.
// now we examine all the strong seed and weak seed. not only to find the seed in the middle,
// but also to combine two seed which are parts of one character to one seed.
// only by this process, we could use the seed count as the condition to judge if or not to use slide window.
float min_thresh = 0.3f;
float max_thresh = 2.5f;
reFoundAndCombineRect(mserCharacter, min_thresh, max_thresh, dist, maxrect, result); // if the characters count is less than max count
// this means the mser rect in the lines are not enough.
// sometimes there are still some characters could not be captured by mser algorithm,
// such as blur, low light ,and some chinese characters like zh-cuan.
// to handle this ,we use a simple slide window method to find them.
if (mserCharacter.size() < char_max_count) {
if ( && showDebug) {
std::cout << "search chinese:" << std::endl;
std::cout << "judege the left is chinese:" << std::endl;
} // if the left most character is chinese, this means
// that must be the first character in chinese plate,
// and we need not to do a slide window to left. So,
// the first thing is to judge the left charcater is
// or not the chinese.
bool leftIsChinese = false;
if () {
std::sort(mserCharacter.begin(), mserCharacter.end(),
[](const CCharacter& r1, const CCharacter& r2) {
return r1.getCharacterPos().tl().x < r2.getCharacterPos().tl().x;
}); CCharacter leftChar = mserCharacter[]; //Rect theRect = adaptive_charrect_from_rect(leftChar.getCharacterPos(), image.cols, image.rows);
Rect theRect = leftChar.getCharacterPos();
//cv::rectangle(result, theRect, Scalar(255, 0, 0), 1); Mat region = image(theRect);
Mat binary_region; ostu_level = cv::threshold(region, binary_region, , , CV_THRESH_BINARY | CV_THRESH_OTSU);
if ( && showDebug) {
std::cout << "left : ostu_level:" << ostu_level << std::endl;
//plate.setOstuLevel(ostu_level); Mat charInput = preprocessChar(binary_region, char_size);
if ( /*&& showDebug*/) {
imshow("charInput", charInput);
} std::string label = "";
float maxVal = -.f;
leftIsChinese = CharsIdentify::instance()->isCharacter(charInput, label, maxVal, true);
//auto character = CharsIdentify::instance()->identifyChinese(charInput, maxVal, leftIsChinese);
//label = character.second;
if ( /* && showDebug*/) {
std::cout << "isChinese:" << leftIsChinese << std::endl;
std::cout << "chinese:" << label;
std::cout << "__score:" << maxVal << std::endl;
} // if the left most character is not a chinese,
// this means we meed to slide a window to find the missed mser rect.
// search for sliding window
float ratioWindow = 0.4f;
//float ratioWindow = CParams::instance()->getParam3f();
float threshIsCharacter = 0.8f;
//float threshIsCharacter = CParams::instance()->getParam3f();
if (!leftIsChinese) {
slideWindowSearch(image, slideLeftWindow, line, leftPoint, dist, ostu_level, ratioWindow, threshIsCharacter,
maxrect, plateResult, CharSearchDirection::LEFT, true, result);
if ( && showDebug) {
std::cout << "slideLeftWindow:" << slideLeftWindow.size() << std::endl;
for (auto window : slideLeftWindow) {
cv::rectangle(result, window.getCharacterPos(), Scalar(, , ), );
} // if we still have less than max count characters,
// we need to slide a window to right to search for the missed mser rect.
if (mserCharacter.size() < char_max_count) {
// change ostu_level
float ratioWindow = 0.4f;
//float ratioWindow = CParams::instance()->getParam3f();
float threshIsCharacter = 0.8f;
//float threshIsCharacter = CParams::instance()->getParam3f();
slideWindowSearch(image, slideRightWindow, line, rightPoint, dist, plate.getOstuLevel(), ratioWindow, threshIsCharacter,
maxrect, plateResult, CharSearchDirection::RIGHT, false, result);
if ( && showDebug) {
std::cout << "slideRightWindow:" << slideRightWindow.size() << std::endl;
for (auto window : slideRightWindow) {
cv::rectangle(result, window.getCharacterPos(), Scalar(, , ), );
} // computer the plate angle
float angle = atan(k) * / (float)CV_PI;
if ( && showDebug) {
std::cout << "k:" << k << std::endl;
std::cout << "angle:" << angle << std::endl;
} // the plateResult rect need to be enlarge to contains all the plate,
// not only the character area.
float widthEnlargeRatio = 1.15f;
float heightEnlargeRatio = 1.25f;
RotatedRect platePos(Point2f((float)plateResult.x + plateResult.width / .f, (float)plateResult.y + plateResult.height / .f),
Size2f(plateResult.width * widthEnlargeRatio, maxrect.height * heightEnlargeRatio), angle); // justify the size is likely to be a plate size.
if (verifyRotatedPlateSizes(platePos)) {
rotatedRectangle(result, platePos, Scalar(, , ), ); plate.setPlatePos(platePos);
plate.setPlateLocateType(CMSER); if (the_color == BLUE) out_plateVec_blue.push_back(plate);
if (the_color == YELLOW) out_plateVec_yellow.push_back(plate);
} // use deskew to rotate the image, so we need the binary image.
if () {
for (auto mserChar : mserCharacter) {
Rect rect = mserChar.getCharacterPos();
match.at(color_index)(rect) = ;
cv::line(match.at(color_index), rightPoint, leftPoint, Scalar());
} if ( /*&& showDebug*/) {
imshow("result", result);
} if () {
imshow("match", match.at(color_index));
} if () {
std::stringstream ss(std::stringstream::in | std::stringstream::out);
ss << "resources/image/tmp/plateDetect/plate_" << img_index << "_" << the_color << ".jpg";
imwrite(ss.str(), result);
} }




图5 文字定位的中间结果(调试图像)

二. 更加合理准确的评价指标





  总图片数:50, Plates count:52, 定位率:51.9231%
  Recall:46.1696%, Precise:26.3273%, Fscore:33.533%.
  0-error:12.5%, 1-error:12.5%, Chinese-precise:37.5%

  总图片数:50, Plates count:52, 定位率:78.8462%
  Recall:70.6192%, Precise:70.1825%, Fscore:70.4002%.
  0-error:59.4595%, 1-error:70.2703%, Chinese-precise:70.2703%


三. 非极大值抑制

  新版本中另一个较大的改动就是大量的使用了非极大值抑制(Non-maximum suppression)。使用非极大值抑制有几个好处:1.当有几个定位区域重叠时,可以根据它们的置信度(也是SVM车牌判断模型得出的值)来取出其中最大概率准确的一个,移除其他几个。这样,不同定位方法,例如Sobel与Color定位的同一个区域,只有一个可以保留。因此,EasyPR新版本中,最终定位出的一个车牌区域,不再会有几个框了。2.结合滑动窗口,可以用其来准确定位文字的位置,例如在车牌定位模块中找到概率最大的文字位置,或者在文字识别模块中,更准确的找到中文文字的位置。



四. 字符分割与识别部分的强化



图6 车牌图像(左),普通大津阈值结果(中),空间大津阈值结果(右)

  同时,识别部分针对中文增加了一种adaptive threshold方法。这种方法在二值化“川”字时有比ostu更好的效果。通过将两者一并使用,并选择其中字符识别概率最大的一个,显著提升了中文字符的识别准确率。在识别中文时,增加了一个小型的滑动窗口,以此来弥补通过省份字符直接查找中文字符时的定位不精等现象。

五. 新的特征与SVM模型,新的中文识别ANN模型


六. 其他

  几天前EasyPR发布了1.5-alpha版本。今天发布的beta版本相对于alpha版本,增加了Grid Search功能, 对文字定位方法的参数又进行了部分调优,同时去除了一些中文注释以提高window下的兼容性,除此之外,在速度方面,此版本首次使用了多线程编程技术(OpenMP)来提高算法整体的效率等,使得最终的速度有了2倍左右的提升。






  1.Character-MSER : Scene Text Detection with Robust Character Candidate Extraction Method, ICDAR2015

  2.Seed-growing : A robust hierarchical detection method for scene text based on convolutional neural networks, ICME2015


