本文首发于个人博客https://kezunlin.me/post/61d55ab4/,欢迎阅读!

opencv mat for loop

Series

Guide

Mat

  • for gray image, use type <uchar>
  • for RGB color image,use type <Vec3b>

gray format storage

color format storage: BGR

we can use method isContinuous() to judge whether the memory buffer is continuous or not.

color space reduction

  1. uchar color_space_reduction(uchar pixel)
  2. {
  3. /*
  4. 0-9 ===>0
  5. 10-19===>10
  6. 20-29===>20
  7. ...
  8. 240-249===>24
  9. 250-255===>25
  10. map from 256*256*256===>26*26*26
  11. */
  12. int divideWith = 10;
  13. uchar new_pixel = (pixel / divideWith)*divideWith;
  14. return new_pixel;
  15. }

color table

  1. void get_color_table()
  2. {
  3. // cache color value in table[256]
  4. int divideWith = 10;
  5. uchar table[256];
  6. for (int i = 0; i < 256; ++i)
  7. table[i] = divideWith* (i / divideWith);
  8. }

C++

ptr []

  1. // C ptr []: faster but not safe
  2. Mat& ScanImageAndReduce_Cptr(Mat& I, const uchar* const table)
  3. {
  4. // accept only char type matrices
  5. CV_Assert(I.depth() != sizeof(uchar));
  6. int channels = I.channels();
  7. int nRows = I.rows;
  8. int nCols = I.cols* channels;
  9. if (I.isContinuous())
  10. {
  11. nCols *= nRows;
  12. nRows = 1;
  13. }
  14. int i, j;
  15. uchar* p;
  16. for (i = 0; i < nRows; ++i)
  17. {
  18. p = I.ptr<uchar>(i);
  19. for (j = 0; j < nCols; ++j)
  20. {
  21. p[j] = table[p[j]];
  22. }
  23. }
  24. return I;
  25. }

ptr ++

  1. // C ptr ++: faster but not safe
  2. Mat& ScanImageAndReduce_Cptr2(Mat& I, const uchar* const table)
  3. {
  4. // accept only char type matrices
  5. CV_Assert(I.depth() != sizeof(uchar));
  6. int channels = I.channels();
  7. int nRows = I.rows;
  8. int nCols = I.cols* channels;
  9. if (I.isContinuous())
  10. {
  11. nCols *= nRows;
  12. nRows = 1;
  13. }
  14. uchar* start = I.ptr<uchar>(0); // same as I.ptr<uchar>(0,0)
  15. uchar* end = start + nRows * nCols;
  16. for (uchar* p=start; p < end; ++p)
  17. {
  18. *p = table[*p];
  19. }
  20. return I;
  21. }

at(i,j)

  1. // at<uchar>(i,j): random access, slow
  2. Mat& ScanImageAndReduce_atRandomAccess(Mat& I, const uchar* const table)
  3. {
  4. // accept only char type matrices
  5. CV_Assert(I.depth() != sizeof(uchar));
  6. const int channels = I.channels();
  7. switch (channels)
  8. {
  9. case 1:
  10. {
  11. for (int i = 0; i < I.rows; ++i)
  12. for (int j = 0; j < I.cols; ++j)
  13. I.at<uchar>(i, j) = table[I.at<uchar>(i, j)];
  14. break;
  15. }
  16. case 3:
  17. {
  18. Mat_<Vec3b> _I = I;
  19. for (int i = 0; i < I.rows; ++i)
  20. for (int j = 0; j < I.cols; ++j)
  21. {
  22. _I(i, j)[0] = table[_I(i, j)[0]];
  23. _I(i, j)[1] = table[_I(i, j)[1]];
  24. _I(i, j)[2] = table[_I(i, j)[2]];
  25. }
  26. I = _I;
  27. break;
  28. }
  29. }
  30. return I;
  31. }

Iterator

  1. // MatIterator_<uchar>: safe but slow
  2. Mat& ScanImageAndReduce_Iterator(Mat& I, const uchar* const table)
  3. {
  4. // accept only char type matrices
  5. CV_Assert(I.depth() != sizeof(uchar));
  6. const int channels = I.channels();
  7. switch (channels)
  8. {
  9. case 1:
  10. {
  11. MatIterator_<uchar> it, end;
  12. for (it = I.begin<uchar>(), end = I.end<uchar>(); it != end; ++it)
  13. *it = table[*it];
  14. break;
  15. }
  16. case 3:
  17. {
  18. MatIterator_<Vec3b> it, end;
  19. for (it = I.begin<Vec3b>(), end = I.end<Vec3b>(); it != end; ++it)
  20. {
  21. (*it)[0] = table[(*it)[0]];
  22. (*it)[1] = table[(*it)[1]];
  23. (*it)[2] = table[(*it)[2]];
  24. }
  25. }
  26. }
  27. return I;
  28. }

opencv LUT

  1. // LUT
  2. Mat& ScanImageAndReduce_LUT(Mat& I, const uchar* const table)
  3. {
  4. Mat lookUpTable(1, 256, CV_8U);
  5. uchar* p = lookUpTable.data;
  6. for (int i = 0; i < 256; ++i)
  7. p[i] = table[i];
  8. cv::LUT(I, lookUpTable, I);
  9. return I;
  10. }

forEach

forEach method of the Mat class that utilizes all the cores on your machine to apply any function at every pixel.

  1. // Parallel execution with function object.
  2. struct ForEachOperator
  3. {
  4. uchar m_table[256];
  5. ForEachOperator(const uchar* const table)
  6. {
  7. for (size_t i = 0; i < 256; i++)
  8. {
  9. m_table[i] = table[i];
  10. }
  11. }
  12. void operator ()(uchar& p, const int * position) const
  13. {
  14. // Perform a simple operation
  15. p = m_table[p];
  16. }
  17. };
  18. // forEach use multiple processors, very fast
  19. Mat& ScanImageAndReduce_forEach(Mat& I, const uchar* const table)
  20. {
  21. I.forEach<uchar>(ForEachOperator(table));
  22. return I;
  23. }

forEach with lambda

  1. // forEach lambda use multiple processors, very fast (lambda slower than ForEachOperator)
  2. Mat& ScanImageAndReduce_forEach_with_lambda(Mat& I, const uchar* const table)
  3. {
  4. I.forEach<uchar>
  5. (
  6. [=](uchar &p, const int * position) -> void
  7. {
  8. p = table[p];
  9. }
  10. );
  11. return I;
  12. }

time cost

no foreach

  1. [1 Cptr ] times=5000, total_cost=988 ms, avg_cost=0.1976 ms
  2. [1 Cptr2 ] times=5000, total_cost=1704 ms, avg_cost=0.3408 ms
  3. [2 atRandom] times=5000, total_cost=9611 ms, avg_cost=1.9222 ms
  4. [3 Iterator] times=5000, total_cost=20195 ms, avg_cost=4.039 ms
  5. [4 LUT ] times=5000, total_cost=899 ms, avg_cost=0.1798 ms
  6. [1 Cptr ] times=10000, total_cost=2425 ms, avg_cost=0.2425 ms
  7. [1 Cptr2 ] times=10000, total_cost=3391 ms, avg_cost=0.3391 ms
  8. [2 atRandom] times=10000, total_cost=20024 ms, avg_cost=2.0024 ms
  9. [3 Iterator] times=10000, total_cost=39980 ms, avg_cost=3.998 ms
  10. [4 LUT ] times=10000, total_cost=103 ms, avg_cost=0.0103 ms

foreach

  1. [5 forEach ] times=200000, total_cost=199 ms, avg_cost=0.000995 ms
  2. [5 forEach lambda] times=200000, total_cost=521 ms, avg_cost=0.002605 ms
  3. [5 forEach ] times=20000, total_cost=17 ms, avg_cost=0.00085 ms
  4. [5 forEach lambda] times=20000, total_cost=23 ms, avg_cost=0.00115 ms

results

Loop Type | Time Cost (us)

:----: |

ptr [] | 242

ptr ++ | 339

at | 2002

iterator | 3998

LUT | 10

forEach | 0.85

forEach lambda | 1.15

forEach is 10x times faster than LUT, 240~340x times faster than ptr [] and ptr ++, and 2000~4000x times faster than at and iterator.

code

code here

Python

pure python

  1. # import the necessary packages
  2. import matplotlib.pyplot as plt
  3. import cv2
  4. print(cv2.__version__)
  5. %matplotlib inline
  1. 3.4.2
  1. # load the original image, convert it to grayscale, and display
  2. # it inline
  3. image = cv2.imread("cat.jpg")
  4. image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
  5. print(image.shape)
  6. #plt.imshow(image, cmap="gray")
  1. (360, 480)
  1. %load_ext cython
  1. The cython extension is already loaded. To reload it, use:
  2. %reload_ext cython
  1. %%cython -a
  2. def threshold_python(T, image):
  3. # grab the image dimensions
  4. h = image.shape[0]
  5. w = image.shape[1]
  6. # loop over the image, pixel by pixel
  7. for y in range(0, h):
  8. for x in range(0, w):
  9. # threshold the pixel
  10. image[y, x] = 255 if image[y, x] >= T else 0
  11. # return the thresholded image
  12. return image
  1. %timeit threshold_python(5, image)
  1. 263 ms ± 20.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

cython

  1. %%cython -a
  2. import cython
  3. @cython.boundscheck(False)
  4. cpdef unsigned char[:, :] threshold_cython(int T, unsigned char [:, :] image):
  5. # set the variable extension types
  6. cdef int x, y, w, h
  7. # grab the image dimensions
  8. h = image.shape[0]
  9. w = image.shape[1]
  10. # loop over the image
  11. for y in range(0, h):
  12. for x in range(0, w):
  13. # threshold the pixel
  14. image[y, x] = 255 if image[y, x] >= T else 0
  15. # return the thresholded image
  16. return image

numba

  1. %timeit threshold_cython(5, image)
  1. 150 µs ± 7.14 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
  1. from numba import njit
  2. @njit
  3. def threshold_njit(T, image):
  4. # grab the image dimensions
  5. h = image.shape[0]
  6. w = image.shape[1]
  7. # loop over the image, pixel by pixel
  8. for y in range(0, h):
  9. for x in range(0, w):
  10. # threshold the pixel
  11. image[y, x] = 255 if image[y, x] >= T else 0
  12. # return the thresholded image
  13. return image
  1. %timeit threshold_njit(5, image)
  1. 43.5 µs ± 142 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)

numpy

  1. def threshold_numpy(T, image):
  2. image[image > T] = 255
  3. return image
  1. %timeit threshold_numpy(5, image)
  1. 111 µs ± 334 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)

conclusions

  1. image = cv2.imread("cat.jpg")
  2. image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
  3. print(image.shape)
  4. %timeit threshold_python(5, image)
  5. %timeit threshold_cython(5, image)
  6. %timeit threshold_njit(5, image)
  7. %timeit threshold_numpy(5, image)
  1. (360, 480)
  2. 251 ms ± 6.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
  3. 143 µs ± 1.19 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
  4. 43.8 µs ± 284 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
  5. 113 µs ± 957 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
  1. image = cv2.imread("big.jpg")
  2. image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
  3. print(image.shape)
  4. %timeit threshold_python(5, image)
  5. %timeit threshold_cython(5, image)
  6. %timeit threshold_njit(5, image)
  7. %timeit threshold_numpy(5, image)
  1. (2880, 5120)
  2. 21.8 s ± 460 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
  3. 12.3 ms ± 231 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
  4. 3.91 ms ± 66.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
  5. 10.3 ms ± 179 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)

60,480

  • python: 251 ms
  • cython: 143 us
  • numba: 43 us
  • numpy: 113 us

2880, 5120

  • python: 21 s
  • cython: 12 ms
  • numba: 4 ms
  • numpy: 10 ms

Reference

History

  • 20180823: created.

Copyright

快速遍历OpenCV Mat图像数据的多种方法和性能分析 | opencv mat for loop的更多相关文章

  1. 用 Python 排序数据的多种方法

    用 Python 排序数据的多种方法 目录 [Python HOWTOs系列]排序 Python 列表有内置就地排序的方法 list.sort(),此外还有一个内置的 sorted() 函数将一个可迭 ...

  2. Python的list循环遍历中,删除数据的正确方法

    在遍历list,删除符合条件的数据时,总是报异常,代码如下: num_list = [1, 2, 3, 4, 5] print(num_list) for i in range(len(num_lis ...

  3. Delphi导出数据的多种方法

    //Dxdbgrid,则直接用SaveToexcel即可//使用 ExcelWithOdbc 控件function TDataModule1.GetDataToFile(DsData: TObject ...

  4. php遍历目录与文件夹的多种方法详解

    遍历目录或遍历目录下指定类型的文件,这是每一个童鞋在写程序的时候难免会用到的.PHP本身也提供了很多灰常有用的函数,正确地使用它们,不会有错滴.下面就我个人学习过程中的一些总结,希望对想学PHP的童鞋 ...

  5. 遍历Map和List的几种方法和性能比较

    public static void main(String[] args) { Map<String, String> map = new HashMap<String, Stri ...

  6. android+opencv+opencl: cv::dft()的opencl版本的性能分析

    在小米mix 2s + 高通骁龙 845 + Adreno 630 上测试了opencl版本的cv::dft(). 测试数据 先看表格里面的描述: 名称 函数名 最大时间(ms) 平均时间(ms) 说 ...

  7. OpenCV图像数据字节对齐

    目录 1. IplImage的data字段,是char*类型,是4字节对齐. 2. 手动创建的Mat通常是没有字节对齐的 3. 从IplImage转过来的Mat,是字节对齐的 4. 总结 图像数据是否 ...

  8. opencv-4-成像系统与Mat图像颜色空间

    opencv-4-成像系统与Mat图像颜色空间 opencvc++qtmat 目标 知道 opencv 处理图像数据的格式 介绍 mat 基础内容 知道 BGR 颜色 显示 颜色转换 BGR 到 灰度 ...

  9. Python使用plotly绘制数据图表的方法

    转载:http://www.jb51.net/article/118936.htm 本篇文章主要介绍了Python使用plotly绘制数据图表的方法,实例分析了plotly绘制的技巧. 导语:使用 p ...

随机推荐

  1. springMVC初学简单例子

    新建web项目,保留web.xml. 配置web.xml文件(/WEB-INF/下): <?xml version="1.0" encoding="UTF-8&qu ...

  2. odoo联调

    odoo联调(剑飞花 373500710) 1.准备工作 1.1.参考文章“odoo8.0+PyCharm4.5开发环境配置”配置好odoo开发环境 1.2.下载Chrome浏览器,安装. 1.3.下 ...

  3. 玩转u8g2 OLED库,一篇就够

    授人以鱼不如授人以渔,目的不是为了教会你具体项目开发,而是学会学习的能力.希望大家分享给你周边需要的朋友或者同学,说不定大神成长之路有博哥的奠基石... QQ技术互动交流群:ESP8266&3 ...

  4. 解决Zend OPcache huge_code_pages: mmap(HUGETLB) failed: Cannot allocate memory报错

    前几日看到鸟哥介绍的 <让你的PHP7更快之Hugepage>, 于是想试试手给服务器加上,参照格式安装好扩展,调整好配置文件,然后重启php-fpm,结果启动一直报Zend OPcach ...

  5. vue-class-component使用Mixins

    vue-class-component提供了mixinshelper函数,以类样式的方式使用mixins.通过使用mixins帮助程序,TypeScript可以推断mixin类型并在组件类型上继承它们 ...

  6. day05整理

    目录 一.上节课回顾 (一)数据类型 (1)数字类型 (2)字符串类型str (3)列表类型list (4)字典类型dict (二)jieba模块 (三)wordcloud模块 二.文本处理 (一)什 ...

  7. 用最复杂的方式学会数组(Python实现动态数组)

    Python序列类型 在本博客中,我们将学习探讨Python的各种"序列"类,内置的三大常用数据结构--列表类(list).元组类(tuple)和字符串类(str). 不知道你发现 ...

  8. MyBatis与Spring的整合实例详解

    从之前的代码中可以看出直接使用 MyBatis 框架的 SqlSession 访问数据库并不简便.MyBatis 框架的重点是 SQL 映射文件,为方便后续学习,本节讲解 MyBatis 与 Spri ...

  9. Shiro笔记---身份验证

    1.shiro有哪些主要功能 2.搭建shiro环境(*) idea2018.2.maven3.5.4.jdk1.8   项目结构: pom.xml: <dependencies> < ...

  10. ABP WebApi的请求类型

    Api对应的请求类型分为以下四种方法: 1.POST 2.PUT 3.DELETE 4.GET 一般abp的请求类型都是根据接口命名来定义的,Create——POST,Delete——DELETE,U ...