▶ 一个简单的 Pthreads 程序(不按照《并行程序设计导论》中的程序来写)

● 代码

 #include <stdio.h>
#include <pthread.h>
#pragma comment(lib, "pthreadVC2.lib") const int thread = ; void* work(void* input)
if (input == nullptr)
printf("%d\n", *((int *)input)); // 将得到的 void * 参数转化为需要的数据类型再进行使用
return nullptr;
} int main()
pthread_t array[thread]; // 产生各线程的 pthread_t 对象
int i, list[thread]; for (i = ; i < thread; i++)
list[i] = i * ;
pthread_create(&array[i], nullptr, work, &list[i]); // 生成各线程来运行函数 work(),函数参数为 list[i],没有线程属性参数
for (i = ; i < thread; i++)
pthread_join(array[i], nullptr); // 终止各线程
return ;

● 输出结果:8 个乱序的数字

● 用到的定义,pthread.h

 typedef struct
void * p; // Pointer to actual object
unsigned int x; // Extra information - reuse count etc
} ptw32_handle_t; typedef ptw32_handle_t pthread_t;
typedef struct pthread_attr_t_ * pthread_attr_t;// 没有其他地方提到了 struct pthread_attr_t_ 的成员 PTW32_DLLPORT int PTW32_CDECL pthread_create(
pthread_t * tid, // 输入 pthread_t 的指针
const pthread_attr_t * attr, // pthread_t 对象的属性
void *(PTW32_CDECL *start) (void *), // 工作函数,输入参数和返回类型均为 void *
void *arg // 工作函数的输入参数
); PTW32_DLLPORT int PTW32_CDECL pthread_join(
pthread_t thread, // 需要等待终止的 pthread_t 对象,注意不是指针
void **value_ptr // ?

▶ 使用直接访问、忙等待和互斥量计算 π 的值,使用公式 π / 4 = 1 - 1 / 3 + 1 / 5 - 1 / 7 + ...,Mathematica 的精确结果为 0.78539816339744830962

● 直接访问临界区,代码

 #include <stdio.h>
#include <pthread.h>
#include <time.h>
#pragma comment(lib, "pthreadVC2.lib") const int count = , thread = ;// 使用 8 个线程来计算 2^30 项
double sum; void* threadSum_naïve(void* rank)
const long long localRank = (long long)rank;// 使用 long long 类型,可以直接与 void* 相互转化
const int localCount = count / thread;
int i;
double sign;
if (localCount * localRank % )
sign = -1.0;
sign = 1.0;
for (i = localCount*localRank; i < localCount*(localRank + ); i++, sign = -sign)
sum += sign / ( * i + );
printf("Thread %2d finished.\n", localRank);
return nullptr;
} int main()
pthread_t array[thread];
int i;
long long list[thread];
clock_t time = clock();
for (i = , sum = 0.0; i < thread; i++)
list[i] = i;
pthread_create(&array[i], nullptr, threadSum_naïve, (void *)list[i]);
for (i = ; i < thread; i++)
pthread_join(array[i], nullptr);
time = clock() - time;
printf("\nsum = %2.10f, time = %d ms\n", sum, time);
return ;

● 输出结果,发现与精确结果相差很大,这是由于读写冲突造成的

Thread   finished.
Thread finished.
Thread finished.
Thread finished.
Thread finished.
Thread finished.
Thread finished.
Thread finished. sum = 0.7852892761, time = ms

● 忙等待法,代码

 #include <stdio.h>
#include <pthread.h>
#include <time.h>
#pragma comment(lib, "pthreadVC2.lib") const int count = , thread = ;
double sum;
int flag; void* threadSum(void* rank)
const long long localRank = (long long)rank;
const int localCount = count / thread;
int i;
double sign;
if (localCount * localRank % )
sign = -1.0;
sign = 1.0;
for (i = localCount*localRank; i < localCount*(localRank + ); i++, sign = -sign)
while (flag != localRank); // 等待读写标志等于线程编号时进行写入
sum += sign / ( * i + );
flag = (flag + ) % thread;// 写入完成调整读写标志以便下一个线程写入
printf("Thread %2d finished.\n", localRank);
return nullptr;
} int main()
pthread_t array[thread];
int i;
long long list[thread];
clock_t time = clock();
flag = ;
for (i = , sum = 0.0; i < thread; i++)
list[i] = i;
pthread_create(&array[i], nullptr, threadSum, (void *)list[i]);
for (i = ; i < thread; i++)
pthread_join(array[i], nullptr);
time = clock() - time;
printf("\nsum = %2.10f, time = %d ms\n", sum, time);
return ;

● 输出结果,发现花费的时间非常长,这是因为每个线程每次向结果中写入一个数,造成了极长的等待队列

Thread   finished.
Thread finished.
Thread finished.
Thread finished.
Thread finished.
Thread finished.
Thread finished.
Thread finished. sum = 0.7853981632, time = ms

● 忙等待法 + 局部和,代码

 #include <stdio.h>
#include <pthread.h>
#include <time.h>
#pragma comment(lib, "pthreadVC2.lib") const int count = , thread = ;
double sum;
int flag; void* threadSum(void* rank)
const long long localRank = (long long)rank;
const int localCount = count / thread;
int i;
double sign, localSum;
if (localCount * localRank % )
sign = -1.0;
sign = 1.0;
for (i = localCount * localRank, localSum = 0.0; i < localCount * (localRank + ); localSum += sign / ( * i + ), i++, sign = -sign);
for (; flag != localRank;);// 仍然使用忙等待,但是每个线程仅向总和中写入一次部分和
sum += localSum;
flag = (flag + ) % thread; printf("Thread %2d finished.\n", localRank);
return nullptr;
} int main()
pthread_t array[thread];
int i;
long long list[thread];
clock_t time = clock();
flag = ;
for (i = , sum = 0.0; i < thread; i++)
list[i] = i;
pthread_create(&array[i], nullptr, threadSum, (void *)list[i]);
for (i = ; i < thread; i++)
pthread_join(array[i], nullptr);
time = clock() - time;
printf("\nsum = %2.10f, time = %d ms\n", sum, time);
return ;

● 输出结果,速度大为加快

Thread   finished.
Thread finished.
Thread finished.
Thread finished.
Thread finished.
Thread finished.
Thread finished.
Thread finished. sum = 0.7853981632, time = ms

● 使用互斥量,代码

 #include <stdio.h>
#include <pthread.h>
#include <time.h>
#pragma comment(lib, "pthreadVC2.lib") const int count = , thread = ;
double sum;
pthread_mutex_t pmt; void* threadSum(void* rank)
const long long localRank = (long long)rank;
const int localCount = count / thread;
int i;
double sign, localSum;
if (localCount * localRank % )
sign = -1.0;
sign = 1.0;
for (i = localCount * localRank, localSum = 0.0; i < localCount * (localRank + ); localSum += sign / ( * i + ), i++, sign = -sign);
pthread_mutex_lock(&pmt); // 与使用忙等待相同的办法使用互斥量
sum += localSum;
pthread_mutex_unlock(&pmt); printf("Thread %2d finished.\n", localRank);
return nullptr;
} int main()
pthread_t array[thread];
int i;
long long list[thread];
clock_t time = clock();
pthread_mutex_init(&pmt, nullptr);// 初始化互斥量
for (i = , sum = 0.0; i < thread; i++)
list[i] = i;
pthread_create(&array[i], nullptr, threadSum, (void *)list[i]);
for (i = ; i < thread; i++)
pthread_join(array[i], nullptr);
pthread_mutex_destroy(&pmt); // 销毁互斥量
time = clock() - time;
printf("\nsum = %2.10f, time = %d ms\n", sum, time);
return ;

● 输出结果,速度与使用忙等待相近

Thread   finished.
Thread finished.
Thread finished.
Thread finished.
Thread finished.
Thread finished.
Thread finished.
Thread finished. sum = 0.7853981632, time = ms

● 用到的定义,pthread.h

 typedef struct pthread_mutex_t_ * pthread_mutex_t;
typedef struct pthread_mutexattr_t_ * pthread_mutexattr_t; PTW32_DLLPORT int PTW32_CDECL pthread_mutex_init(pthread_mutex_t * mutex, const pthread_mutexattr_t * attr);
// 初始化互斥量,输入已经声明的一个 pthread_mutex_t 变量的指针及一个 pthread_mutexattr_t 属性指针,初始化完成后互斥量为解锁状态 PTW32_DLLPORT int PTW32_CDECL pthread_mutex_destroy(pthread_mutex_t * mutex); // 销毁用完的互斥量 PTW32_DLLPORT int PTW32_CDECL pthread_mutex_lock(pthread_mutex_t * mutex); // 锁定互斥量以访问临界区 PTW32_DLLPORT int PTW32_CDECL pthread_mutex_unlock(pthread_mutex_t * mutex); // 解锁互斥量以离开临界区访问

