openMP简介

openMP是一个编译器指令和库函数的集合，主要是为共享式存储计算机上的并行程序设计使用的。

当计算机升级到多核时，程序中创建的线程数量需要随CPU核数变化，如在CPU核数超过线程数量的机器上运行，则不能很好的完全利用机器性能，虽然通过可以通过操作系统的API创建可变化数量的线程，但是比较麻烦，不如openMP方便
操作系统API创建线程时,需要线程函数入口，如pthread编程。对于同一函数或者同一循环内的并行非常不利，函数入口非常之多，而openMP不需要函数入口。
现在主流的操作系统的API 互不兼容，移植性非常差。openMP是标准规范，支持它的编译器都执行同一套标准，很好的解决了这个问题。

openMP的库函数和指令

指令的格式： #pragma omp 指令 [字句]

常见库函数：

指令	含义	库函数	含义
parallel	所表示的代码将被多个线程并行执行	omp_set_num_threads(parameter)	设置线程数目
parallel for	对for循环进行拆分并行	omp_get_threads_num()	返回线程号
barrier	执行到barrier时，等待所有线程执行完，再往下执行	omp_get_num_threads()	返回并行区域中的活动线程个数
master / single	指定代码块由主线程/随机一个单线程执行	omp_get_num_procs()	返回运行本线程的处理器个数
parallel sections	section语句用在sections语句里面,将sections语句里的代码分成几个不同的段，每段都并行执行	omp_init_lock (parameter)	初始化一个简单的锁
ordered	指定并行区域的循环按顺序执行	omp_set_lock(parameter)	上锁
critical	用在代码临界区之前，让线程互斥的访问资源	omp_unset_lock(parameter)	解锁
		omp_destroy_lock(parameter)	关闭一个锁

openMP指令和库函数的用法示例

parallel :

#include "omp.h"    //openmp的头文件

#include "stdio.h"

#define NUM_THREADS 4

int main()

{

   int i ;

   omp_set_num_threads(NUM_THREADS) ;  //设置线程的个数

   #pragma omp parallel

   {

     //  被parallel所表示的这个区域叫做并行块，每个线程都会执行这个块中的所有代码

     printf ("hello world! \n");

     for (i=0;i<5;i++)

       printf("i=%d,thread = %d\n",i,omp_get_thread_num());

   }

}

hello world!

i=0,thread = 0

i=1,thread = 0

i=2,thread = 0

i=3,thread = 0

i=4,thread = 0

hello world!

i=0,thread = 3

i=1,thread = 3

i=2,thread = 3

i=3,thread = 3

i=4,thread = 3

hello world!

i=0,thread = 1

i=1,thread = 1

i=2,thread = 1

i=3,thread = 1

i=4,thread = 1

hello world!

i=0,thread = 2

i=1,thread = 2

i=2,thread = 2

i=3,thread = 2

i=4,thread = 2

parallel for :

牵扯到for循环时，往往需要用到parallel for指令。

#include "omp.h"

#include "stdio.h"

#define NUM_THREADS 3

int main()

{

   int i,j,k ;

   omp_set_num_threads(NUM_THREADS);

  #pragma omp parallel for

       //此并行块中的for循环，把for循环体中的代码并行执行，即整个for循环被拆分为多个线程执行

       //注意，parallel是连for循环一起并行

        for (i = 0;i<5;i++)

             printf("i= %d,thread=%d\n",i,omp_get_thread_num());

   for (j=0;j<4;j++)  //普通循环，仅一个线程

      printf("j= %d,thread=%d\n",j,omp_get_thread_num());

   return 0;

}

i= 0,thread=0

i= 1,thread=0

i= 4,thread=2

i= 2,thread=1

i= 3,thread=1

j= 0,thread=0

j= 1,thread=0

j= 2,thread=0

j= 3,thread=0

这种写法很有局限，就是#pragma omp parallel for 只能作用到紧跟着的for循环，也就是说，并行块中第一句话只能是for循环，不能是其他代码。因为这个写法为for循环专属。可以将上述写成如下形式：

#include "omp.h"

#include "stdio.h"

#define NUM_THREADS 3

int main()

{

   int i,j,k ;

   omp_set_num_threads(NUM_THREADS);

  #pragma omp parallel

  {

     printf("HelloWorld! , thread=%d\n",omp_get_thread_num());  //每个线程都执行这条语句

     #pragma omp for

          //这个并行块中的代码，对for循环体中的代码进行并行执行

     for (i = 0;i<5;i++){

        printf("i= %d,thread=%d\n",i,omp_get_thread_num());

     }

     #pragma omp for

        //这个并行块中的代码，对for循环体中的代码进行并行执行

     for (j=0;j<4;j++){

        printf("j= %d,thread=%d\n",j,omp_get_thread_num());

     }

  }

   return 0;

}

HelloWorld! , thread=0

i= 0,thread=0

i= 1,thread=0

HelloWorld! , thread=2

i= 4,thread=2

HelloWorld! , thread=1

i= 2,thread=1

i= 3,thread=1

j= 0,thread=0

j= 1,thread=0

j= 2,thread=1

j= 3,thread=1

可见，第二种写法完全能够完成对for循环的拆分并行，而且能够多次对多个for循环进行操作，更好的是，这种写法衍生了另一种功能，就是能够输出helleworld的那条输出语句，这条语句能够被所有的线程执行，如果for循环需要为每个线程赋值一个变量，那么这个变量可以放在此输出语句的位置，示例请看文章最后的例子。

barrier:

#include <stdio.h>

#include "omp.h"

int main (){

  int i,j ;

  omp_set_num_threads (5);

  #pragma omp parallel

  {

     printf ("hello world!,thread=%d\n", omp_get_thread_num ());

     #pragma omp barrier   //执行到此代码时，程序暂停，直到上一条输出语句被所有线程都执行完后，才开始执行下面的语句。

     #pragma omp for

         for ( i = 0; i < 5; i++)

             printf ("i= %d,thread=%d\n",i, omp_get_thread_num ());

     #pragma omp barrier   //执行到此代码时，程序暂停，直到上一条的for循环语句被所有线程都并行执行完后，才开始执行下面的语句。

     #pragma omp for

         for ( j = 0; j < 5; j++)

             printf ("j= %d ,thread= %d\n", j,omp_get_thread_num ());

  }

}

hello world!,thread=4

hello world!,thread=1

hello world!,thread=3

hello world!,thread=2

hello world!,thread=0

i= 4,thread=4

i= 0,thread=0

i= 3,thread=3

i= 1,thread=1

i= 2,thread=2

j= 0 ,thread= 0

j= 1 ,thread= 1

j= 2 ,thread= 2

j= 4 ,thread= 4

j= 3 ,thread= 3

master / single :

看了对于for循环的并行之后，产生了一个新的问题，如果要在两个并行的for循环之间插入一个单线程执行的语句，应该如下做：

#include "omp.h"

#include "stdio.h"

#define NUM_THREADS 5

int main()

{

   int i ,j ;

   omp_set_num_threads(NUM_THREADS) ;

   #pragma omp parallel for

       for (i=0;i<4;i++)

          printf ("i = %d ,thread=%d \n",i,omp_get_thread_num());

  //以下输出语句位于两个for循环之间的代码，只能由一个线程来执行

   printf ("I am a single thread %d \n",omp_get_thread_num());

   #pragma omp parallel for

       for (j=0;j<4;j++)

          printf ("j = %d ,thread=%d \n",j,omp_get_thread_num());

   return 0;

}

i = 0 ,thread=0

i = 3 ,thread=3

i = 2 ,thread=2

i = 1 ,thread=1

I am a single thread 0

j = 3 ,thread=3

j = 1 ,thread=1

j = 0 ,thread=0

j = 2 ,thread=2

但是上述的程序看起来很麻烦，master和single指令就是解决这个问题的：

#include <stdio.h>

#include "omp.h"

#define NUM_THREADS 5

int main (){

  int i ,j;

  omp_set_num_threads (NUM_THREADS);

  #pragma omp parallel

  {

    #pragma omp for

       for (i = 0; i < 4; i++)

          printf ("i= %d, thread= %d\n",i, omp_get_thread_num ());

    #pragma omp barrier

   // #pragma omp master  //下面的程序由主线程执行

   #pragma omp single     //下面的程序由随便一个单线程执行

         printf ("I am a single thread ! thread= %d\n", omp_get_thread_num ());

    #pragma omp barrier

    #pragma omp for

       for (j = 0; j < 5; j++)

          printf ("j= %d, thread= %d\n",j, omp_get_thread_num ());

  }

}

i= 2, thread= 2

i= 0, thread= 0

i= 1, thread= 1

i= 3, thread= 3

I am a single thread ! thread= 2

j= 2, thread= 2

j= 0, thread= 0

j= 3, thread= 3

j= 1, thread= 1

j= 4, thread= 4

效果是一样的，master 是指定用主线程0，而single是随机的一个单线程执行

parallel sections:

#include <stdio.h>

#include "omp.h"

#define  NUM_THREADS 10

int main () {

  omp_set_num_threads (NUM_THREADS);

  #pragma omp parallel sections

  {

      #pragma omp section    //并行执行

        printf ("thread %d section A!\n", omp_get_thread_num ());

      #pragma omp section   //并行执行

        printf ("thread %d section B!\n", omp_get_thread_num ());

      #pragma omp section   //并行执行

        printf ("thread %d section C!\n", omp_get_thread_num ());

      #pragma omp section   //并行执行

        printf ("thread %d section D!\n", omp_get_thread_num ());

      #pragma omp section   //并行执行

        printf ("thread %d section E!\n", omp_get_thread_num ());

  }

}

thread 4 section A!

thread 4 section E!

thread 8 section D!

thread 3 section C!

thread 0 section B!

同parallel for 相似，可以写成如下形式：

#include <stdio.h>

#include "omp.h"

#define  NUM_THREADS 3

int main () {

  omp_set_num_threads (NUM_THREADS);

  #pragma omp parallel

  {

    #pragma omp sections

    {

      #pragma omp section

        printf ("thread %d section A!\n", omp_get_thread_num ());

      #pragma omp section

        printf ("thread %d section B!\n", omp_get_thread_num ());

    }

    #pragma omp sections

    {

       #pragma omp section

        printf ("thread %d section C!\n", omp_get_thread_num ());

      #pragma omp section

        printf ("thread %d section D!\n", omp_get_thread_num ());

      #pragma omp section

        printf ("thread %d section E!\n", omp_get_thread_num ());

    }

  }

}

ordered:

#include <stdio.h>

#include <omp.h>

main ()

{

  int i ;

   omp_set_num_threads(5) ;

  #pragma omp parallel for ordered

  for ( i = 1; i <= 5; i++)

    {

         #pragma omp ordered //指定以下的循环体按照顺序执行

        printf ("i=%d,thread=%d\n", i,omp_get_thread_num());

    }

}

i=1,thread=0

i=2,thread=1

i=3,thread=2

i=4,thread=3

i=5,thread=4

openMP中的互斥（锁）

critical:

这个指令可以有枷锁的效果，所指定的代码表示只允许一个线程进行操作

/*

 *加和程序,从1一直加到100的和

 *

 * */

#include <stdio.h>

#include "omp.h"

int main(){

  int sum=0;

  #pragma omp parallel

  {

    int i=0;

    int id=omp_get_thread_num();  //获得当前并行区域中活动线程个数

    int nthread=omp_get_num_threads();  //返回当前的线程号

    for(i=id+1;i<=100;i+=nthread)

      #pragma omp critical  //对sum进行互斥的操作，同一时间，只允许一个线程对sum变量进行操作

         sum=sum+i;

  }

  printf("sum=%d\n",sum);

}

sum=5050

使用锁

另一个互斥访问资源的方法就是使用锁

#include <stdio.h>

#include <omp.h>

int main(){

  int sum=0;

  int i ;

  omp_lock_t lck ; //定义一把锁

  omp_init_lock(&lck); //初始化一把锁

  #pragma omp parallel for

    for( i=1;i<=100;i++)

    {

      omp_set_lock(&lck);  //给下面的sum上锁,同一时间只有一个线程能对sum变量操作

      sum=sum+i;

      omp_unset_lock(&lck);  // 解锁

    }

  printf("sum=%d\n",sum);

  omp_destroy_lock(&lck);  //关闭这把锁

}

sum=5050

上述代码中，只定义的了一把锁，如果要定义多把锁，并使用多把锁，看下面的代码：

/*

 *随机产生0~9之间1000个数，统计0~9的个数。

 *histogram[]存放统计的个数

 *

 * */

#include <stdio.h>

#include <stdlib.h>

#include "omp.h"

int  main ()

{

  int array[1000];

  omp_lock_t locks[10]; //定义10把锁

  int histogram[10];

  omp_set_num_threads (5);

  srandom (10);

  int i ;

  #pragma omp parallel for

   // 多线程随机产生1000个数放在array数组中

    for ( i = 0; i < 1000; i++)

      array[i] = random () % 10;

  #pragma omp parallel for

    // 多线程初始化10把锁和初始化histogram数组

     for ( i = 0; i < 10; i++)

     {

        omp_init_lock (&locks[i]);

        histogram[i] = 0;

     }

  #pragma omp parallel for

   // 统计出现0~9的个数

     for ( i = 0; i < 1000; i++)

     {

        omp_set_lock(&locks[array[i]]);  //上锁

        histogram[array[i]] += 1 ;

        omp_unset_lock(&locks[array[i]]); //解锁

     }

     for ( i = 0; i < 10; i++)

         printf ("histogram[%d]=%d\n", i, histogram[i]);

     //普通方式（单线程）关闭10把锁

     for ( i = 0; i < 10; i++)

        omp_destroy_lock (&locks[i]);

 }

histogram[0]=97

histogram[1]=109

histogram[2]=95

histogram[3]=108

histogram[4]=89

histogram[5]=103

histogram[6]=85

histogram[7]=111

histogram[8]=110

histogram[9]=93

openMP编程，求pi的值

求pi的方法是利用积分推导出Pi的值，如下图所示：

/*

 * 普通方式求Pi,不利用多线程技术

*/

#include <stdio.h>

static long num_steps = 100000;//分成1000份

void main()

{

   int i;

   double x, pi, sum = 0.0;

   double  step = 1.0/(double)num_steps;

   for(i=1;i<= num_steps;i++){

       x = (i-0.5)*step;

       sum=sum+4.0/(1.0+x*x);

       }

   pi=step*sum;

   printf("%lf\n",pi);

}

~

3.141593

/*

 *利用 parallel for 进行多线程求解

 * */

#include <stdio.h>

#include <omp.h>

static long num_steps = 100000;

double step;

#define NUM_THREADS 2

void main ()

{

    int i;

    double x, pi, sum[NUM_THREADS];

    double  step = 1.0/(double) num_steps;

    omp_set_num_threads(NUM_THREADS); //设置2线程

    #pragma omp parallel

    {

        double x;

        int id;

        id = omp_get_thread_num();

        sum[id]=0;

        #pragma omp for

        for (i=0;i< num_steps; i++){

            x = (i+0.5)*step;

            sum[id] += 4.0/(1.0+x*x);

        }

   }

   for(i=0, pi=0.0;i<NUM_THREADS;i++)

        pi += sum[i] * step; printf("%lf\n",pi);

}

openMP编程(上篇)之指令和锁的更多相关文章

openMP编程(上篇)之并行程序设计
openMP简介 openMP是一个编译器指令和库函数的集合,主要是为共享式存储计算机上的并行程序设计使用的. 当计算机升级到多核时,程序中创建的线程数量需要随CPU核数变化,如在CPU核数超过线程数 ...
Python中的多线程编程，线程安全与锁(二)
在我的上篇博文Python中的多线程编程,线程安全与锁(一)中,我们熟悉了多线程编程与线程安全相关重要概念, Threading.Lock实现互斥锁的简单示例,两种死锁(迭代死锁和互相等待死锁)情况及 ...
Python 3 并发编程多进程之进程同步（锁）
Python 3 并发编程多进程之进程同步(锁) 进程之间数据不共享,但是共享同一套文件系统,所以访问同一个文件,或同一个打印终端,是没有问题的,竞争带来的结果就是错乱,如何控制,就是加锁处理. 1. ...
并发编程学习笔记(6)----公平锁和ReentrantReadWriteLock使用及原理
(一)公平锁 1.什么是公平锁? 公平锁指的是在某个线程释放锁之后,等待的线程获取锁的策略是以请求获取锁的时间为标准的,即使先请求获取锁的线程先拿到锁. 2.在java中的实现? 在java的并发包中 ...
一个openMP编程处理图像的示例
一个openMP编程处理图像的示例: 从硬盘读入两幅图像,对这两幅图像分别提取特征点,特征点匹配,最后将图像与匹配特征点画出来.理解该例子需要一些图像处理的基本知识,我不在此详细介绍.另外,编译该例需 ...
Java并发编程系列-(4) 显式锁与AQS
4 显示锁和AQS 4.1 Lock接口核心方法 Java在java.util.concurrent.locks包中提供了一系列的显示锁类,其中最基础的就是Lock接口,该接口提供了几个常见的锁相关 ...
StampedLock：一个并发编程中非常重要的票据锁
摘要:一起来聊聊这个在高并发环境下比ReadWriteLock更快的锁--StampedLock. 本文分享自华为云社区<[高并发]一文彻底理解并发编程中非常重要的票据锁--StampedLoc ...
OpenMP编程总结表
本文对OpenMP 2.0的全部语法——Macro(宏定义).Environment Variables(环境变量).Data Types(数据类型).Compiler Directives(编译指导 ...
openMP编程(下篇)之数据私有与任务调度
title: openMP编程(下篇)之数据处理子句与任务调度 tags: ["openMP"] notebook: 分布式程序_Linux --- openMP并行编程中数据的共 ...

随机推荐

GCD浅析
p.p1 { margin: 0.0px 0.0px 0.0px 0.0px; font: 11.0px Helvetica; color: #000000 } span.s1 { } 1.关于GCD ...
Python 之 json 模块
引言对于做web开发的人来说,json文本必须要熟知与熟练使用的.大部分网站的API接口调用返回的数据,就是json格式的.如果看json对象所包含的内容,相信对熟悉Python的人开说,很快就能把 ...
iOS下的界面布局利器-MyLayout布局框架
Swift:TangramKit: https://github.com/youngsoft/TangramKit OC:MyLayout: https://github.com/youngsof ...
1684: [Usaco2005 Oct]Close Encounter
1684: [Usaco2005 Oct]Close Encounter Time Limit: 5 Sec Memory Limit: 64 MBSubmit: 387 Solved: 181[ ...
Python中参数是传值，还是传引用？
在 C/C++ 中,传值和传引用是函数参数传递的两种方式,在Python中参数是如何传递的?回答这个问题前,不如先来看两段代码. 代码段1: def foo(arg): arg = 2 print(a ...
我的Node.js学习历程
学习一门技术,每个人都有每个人的方法.我的方法很简单,做项目. 基本概念在搭建一个node网站之前,还是要掌握一些基本的概念的,这里列举一下,具体的内容大家自己到网上去查: npm bower ex ...
实现input输入时智能搜索
// 智能搜索 function oSearchSuggest(searchFuc) { var input = $('#in'); var suggestWrap = $('#gov_search_ ...
SQL Server中的Merge关键字更新表数据
简介 Merge关键字是一个神奇的DML关键字.它在SQL Server 2008被引入,它能将Insert,Update,Delete简单的并为一句.MSDN对于Merge的解释非常的短小精悍:”根 ...
一文让你从此告别HTTP乱码（二）Response篇
#circle { background-color: #8fcbec; border: 3px } 概述开发Web项目的过程中,经常遇到浏览器中显示的内容乱码,或者服务器获取浏览器请求参数时乱码的 ...
C#实现不影响当前线程情况下间隔一定的时间执行一段代码
大家知道C#间隔一定时间去执行一段代码,常用的有 1. Thread.Sleep(多少毫秒); 2. 使用Timer控件间隔一定的时间,设置执行一次以上两种方法,实现起来不难,弊端在于会阻塞当前线程 ...

openMP编程(上篇)之指令和锁