▶ 《并行程序设计导论》第六章中讨论了 n 体问题,分别使用了 MPI,Pthreads,OpenMP 来进行实现,这里是 OpenMP 的代码,分为基本算法和简化算法(引力计算量为基本算法的一半,但是消息传递较为复杂)

● 基本算法

  1. // omp_nbody_basic.c,OpenMP 基本算法
  2. #include <stdio.h>
  3. #include <stdlib.h>
  4. #include <string.h>
  5. #include <math.h>
  6. #include <omp.h>
  7.  
  8. #define OUTPUT
  9. #define DEBUG
  10. #define DIM 2
  11. #define X 0
  12. #define Y 1
  13. typedef double vect_t[DIM];
  14. const double G = 6.673e-11;
  15. struct particle_s // 使用一个结构来同时保存一个颗粒的质量,位置,速度
  16. {
  17. double m;
  18. vect_t s;
  19. vect_t v;
  20. };
  21.  
  22. void Usage(char* prog_name)
  23. {
  24. fprintf(stderr, "usage: %s <nThreads> <nParticles> <nTimestep> <sizeTimestep> <outputFrequency> <g|i>\n", prog_name);
  25. fprintf(stderr, " 'g': inite condition by random\n 'i': inite condition from stdin\n");
  26. exit();
  27. }
  28.  
  29. void Get_args(int argc, char* argv[], int* thread_count_p, int* n_p, int* n_steps_p, double* delta_t_p, int* output_freq_p, char* g_i_p)
  30. {
  31. if (argc != )
  32. Usage(argv[]);
  33. *thread_count_p = strtol(argv[], NULL, );
  34. *n_p = strtol(argv[], NULL, );
  35. *n_steps_p = strtol(argv[], NULL, );
  36. *delta_t_p = strtod(argv[], NULL);
  37. *output_freq_p = strtol(argv[], NULL, );
  38. *g_i_p = argv[][];
  39. if (*thread_count_p < || *n_p <= || *n_p % *thread_count_p || *n_steps_p < || *delta_t_p <= || *g_i_p != 'g' && *g_i_p != 'i')
  40. {
  41. Usage(argv[]);
  42. exit();
  43. }
  44. # ifdef DEBUG
  45. printf("Get_args, nThread%2d n %d n_steps %d delta_t %e output_freq %d g_i %c\n",
  46. *thread_count_p, *n_p, *n_steps_p, *delta_t_p, *output_freq_p, *g_i_p);
  47. fflush(stdout);
  48. # endif
  49. }
  50.  
  51. void Gen_init_cond(struct particle_s curr[], int n)
  52. {
  53. const double mass = 5.0e24, gap = 1.0e5, speed = 3.0e4;
  54. //srand(2);
  55. for (int i = ; i < n; i++)
  56. {
  57. curr[i].m = mass;
  58. curr[i].s[X] = i * gap;
  59. curr[i].s[Y] = 0.0;
  60. curr[i].v[X] = 0.0;
  61. // vel[i][Y] = speed * (2 * rand() / (double)RAND_MAX) - 1);
  62. curr[i].v[Y] = (i % ) ? -speed : speed;
  63. }
  64. }
  65.  
  66. void Get_init_cond(struct particle_s curr[], int n)
  67. {
  68. printf("For each particle, enter (in order): mass x-coord y-coord x-velocity y-velocity\n");
  69. for (int i = ; i < n; i++)
  70. {
  71. scanf_s("%lf", &curr[i].m);
  72. scanf_s("%lf", &curr[i].s[X]);
  73. scanf_s("%lf", &curr[i].s[Y]);
  74. scanf_s("%lf", &curr[i].v[X]);
  75. scanf_s("%lf", &curr[i].v[Y]);
  76. }
  77. }
  78.  
  79. void Output_state(double time, struct particle_s curr[], int n)
  80. {
  81. printf("Output_state, time = %.2f\n", time);
  82. for (int i = ; i < n; i++)
  83. printf("%2d %10.3e %10.3e %10.3e %10.3e %10.3e\n", i, curr[i].m, curr[i].s[X], curr[i].s[Y], curr[i].v[X], curr[i].v[Y]);
  84. printf("\n");
  85. fflush(stdout);
  86. }
  87.  
  88. void Compute_force(int part, vect_t forces[], struct particle_s curr[], int n)
  89. {
  90. int k;
  91. vect_t f_part_k;
  92. double len, fact;
  93. for (forces[part][X] = forces[part][Y] = 0.0, k = ; k < n; k++)
  94. {
  95. if (k != part)
  96. {
  97. f_part_k[X] = curr[part].s[X] - curr[k].s[X];
  98. f_part_k[Y] = curr[part].s[Y] - curr[k].s[Y];
  99. len = sqrt(f_part_k[X] * f_part_k[X] + f_part_k[Y] * f_part_k[Y]);
  100. fact = -G * curr[part].m * curr[k].m / (len * len * len);
  101. f_part_k[X] *= fact;
  102. f_part_k[Y] *= fact;
  103. forces[part][X] += f_part_k[X];
  104. forces[part][Y] += f_part_k[Y];
  105. # ifdef DEBUG
  106. printf("Compute_force, %d, %2d> %10.3e %10.3e %10.3e %10.3e\n", k, len, fact, f_part_k[X], f_part_k[Y]);
  107. fflush(stdout);
  108. # endif
  109. }
  110. }
  111. }
  112.  
  113. void Update_part(int part, vect_t forces[], struct particle_s curr[], int n, double delta_t)
  114. {
  115. const double fact = delta_t / curr[part].m;
  116. # ifdef DEBUG
  117. printf("Update_part before, part%2d %10.3e %10.3e %10.3e %10.3e %10.3e %10.3e\n",
  118. part, curr[part].s[X], curr[part].s[Y], curr[part].v[X], curr[part].v[Y], forces[part][X], forces[part][Y]);
  119. fflush(stdout);
  120. # endif
  121. curr[part].s[X] += delta_t * curr[part].v[X];
  122. curr[part].s[Y] += delta_t * curr[part].v[Y];
  123. curr[part].v[X] += fact * forces[part][X];
  124. curr[part].v[Y] += fact * forces[part][Y];
  125. # ifdef DEBUG
  126. printf("Update_part after, part%2d %10.3e %10.3e %10.3e %10.3e)\n",
  127. part, curr[part].s[X], curr[part].s[Y], curr[part].v[X], curr[part].v[Y]);
  128. # endif
  129. }
  130.  
  131. void Compute_energy(struct particle_s curr[], int n, double* kin_en_p, double* pot_en_p)// 计算系统轨道能量,没用到
  132. {
  133. int i, j;
  134. vect_t diff;
  135. double temp;
  136. for (i = , temp = 0.0; i < n; temp += curr[i].m * curr[i].v[X] * curr[i].v[X] + curr[i].v[Y] * curr[i].v[Y], i++);
  137. *kin_en_p = temp * 0.5;
  138. for (i = , temp = .; i < n - ; i++)
  139. {
  140. for (j = i + ; j < n; j++)
  141. {
  142. diff[X] = curr[i].s[X] - curr[j].s[X];
  143. diff[Y] = curr[i].s[Y] - curr[j].s[Y];
  144. temp += -G * curr[i].m * curr[j].m / sqrt(diff[X] * diff[X] + diff[Y] * diff[Y]);
  145. }
  146. }
  147. *pot_en_p = temp;
  148. }
  149.  
  150. int main(int argc, char* argv[])
  151. {
  152. int n, part;
  153. int n_steps, step;
  154. double delta_t;
  155. int output_freq;
  156. struct particle_s* curr; // 颗粒信息
  157. vect_t* forces; // 引力信息
  158. int thread_count; // 线程数,用于函数参数和 omp 子句,不是全局变量
  159. char g_i;
  160. double start, finish;
  161.  
  162. Get_args(argc, argv, &thread_count, &n, &n_steps, &delta_t, &output_freq, &g_i);
  163. curr = (particle_s*)malloc(n * sizeof(struct particle_s));
  164. forces = (vect_t*)malloc(n * sizeof(vect_t));
  165. if (g_i == 'g')
  166. Gen_init_cond(curr, n);
  167. else
  168. Get_init_cond(curr, n);
  169.  
  170. start = omp_get_wtime();
  171. # ifdef OUTPUT
  172. Output_state(, curr, n);
  173. # endif
  174. # pragma omp parallel num_threads(thread_count) default(none) shared(curr, forces, thread_count, delta_t, n, n_steps, output_freq) private(step, part)
  175. for (step = ; step <= n_steps; step++)
  176. {
  177. // memset(forces, 0, n*sizeof(vect_t));
  178. # pragma omp for
  179. for (part = ; part < n; part++)
  180. Compute_force(part, forces, curr, n);
  181. # pragma omp for
  182. for (part = ; part < n; part++)
  183. Update_part(part, forces, curr, n, delta_t);
  184. # ifdef OUTPUT
  185. # pragma omp single
  186. if (step % output_freq == )
  187. Output_state(step * delta_t, curr, n);
  188. # endif
  189. }
  190.  
  191. finish = omp_get_wtime();
  192. printf("Elapsed time = %e ms\n", (finish - start) * );
  193. free(curr);
  194. free(forces);
  195. return ;
  196. }

● 输出结果。8 进程 16 体,3 秒,时间步长 1 秒,舍去 debug 输出  1.094827e+01 ms;8 进程 1024 体,3600 秒,时间步长 1 秒,舍去 output 和 debug 输出 1.485764e+04 ms

  1. D:\Code\OpenMP\OpenMPProjectTemp\x64\Debug>OpenMPProjectTemp.exe g
  2. Output_state, time = 1.00
  3. 5.000e+24 0.000e+00 3.000e+04 5.273e+04 3.000e+04
  4. 5.000e+24 1.000e+05 -3.000e+04 1.922e+04 -3.000e+04
  5. 5.000e+24 2.000e+05 3.000e+04 1.071e+04 3.000e+04
  6. 5.000e+24 3.000e+05 -3.000e+04 6.802e+03 -3.000e+04
  7. 5.000e+24 4.000e+05 3.000e+04 4.485e+03 3.000e+04
  8. 5.000e+24 5.000e+05 -3.000e+04 2.875e+03 -3.000e+04
  9. 5.000e+24 6.000e+05 3.000e+04 1.614e+03 3.000e+04
  10. 5.000e+24 7.000e+05 -3.000e+04 5.213e+02 -3.000e+04
  11. 5.000e+24 8.000e+05 3.000e+04 -5.213e+02 3.000e+04
  12. 5.000e+24 9.000e+05 -3.000e+04 -1.614e+03 -3.000e+04
  13. 5.000e+24 1.000e+06 3.000e+04 -2.875e+03 3.000e+04
  14. 5.000e+24 1.100e+06 -3.000e+04 -4.485e+03 -3.000e+04
  15. 5.000e+24 1.200e+06 3.000e+04 -6.802e+03 3.000e+04
  16. 5.000e+24 1.300e+06 -3.000e+04 -1.071e+04 -3.000e+04
  17. 5.000e+24 1.400e+06 3.000e+04 -1.922e+04 3.000e+04
  18. 5.000e+24 1.500e+06 -3.000e+04 -5.273e+04 -3.000e+04
  19.  
  20. Output_state, time = 2.00
  21. 5.000e+24 5.273e+04 6.000e+04 9.288e+04 1.641e+04
  22. 5.000e+24 1.192e+05 -6.000e+04 3.818e+04 -3.791e+03
  23. 5.000e+24 2.107e+05 6.000e+04 2.116e+04 3.791e+03
  24. 5.000e+24 3.068e+05 -6.000e+04 1.356e+04 -3.101e+03
  25. 5.000e+24 4.045e+05 6.000e+04 8.930e+03 3.101e+03
  26. 5.000e+24 5.029e+05 -6.000e+04 5.739e+03 -2.959e+03
  27. 5.000e+24 6.016e+05 6.000e+04 3.218e+03 2.959e+03
  28. 5.000e+24 7.005e+05 -6.000e+04 1.043e+03 -2.929e+03
  29. 5.000e+24 7.995e+05 6.000e+04 -1.043e+03 2.929e+03
  30. 5.000e+24 8.984e+05 -6.000e+04 -3.218e+03 -2.959e+03
  31. 5.000e+24 9.971e+05 6.000e+04 -5.739e+03 2.959e+03
  32. 5.000e+24 1.096e+06 -6.000e+04 -8.930e+03 -3.101e+03
  33. 5.000e+24 1.193e+06 6.000e+04 -1.356e+04 3.101e+03
  34. 5.000e+24 1.289e+06 -6.000e+04 -2.116e+04 -3.791e+03
  35. 5.000e+24 1.381e+06 6.000e+04 -3.818e+04 3.791e+03
  36. 5.000e+24 1.447e+06 -6.000e+04 -9.288e+04 -1.641e+04
  37.  
  38. Output_state, time = 3.00
  39. 5.000e+24 1.456e+05 7.641e+04 1.273e+05 -1.575e+03
  40. 5.000e+24 1.574e+05 -6.379e+04 5.867e+04 2.528e+04
  41. 5.000e+24 2.319e+05 6.379e+04 2.685e+04 -2.069e+04
  42. 5.000e+24 3.204e+05 -6.310e+04 1.884e+04 2.228e+04
  43. 5.000e+24 4.134e+05 6.310e+04 1.235e+04 -2.151e+04
  44. 5.000e+24 5.086e+05 -6.296e+04 8.111e+03 2.179e+04
  45. 5.000e+24 6.048e+05 6.296e+04 4.537e+03 -2.163e+04
  46. 5.000e+24 7.016e+05 -6.293e+04 1.493e+03 2.169e+04
  47. 5.000e+24 7.984e+05 6.293e+04 -1.493e+03 -2.169e+04
  48. 5.000e+24 8.952e+05 -6.296e+04 -4.537e+03 2.163e+04
  49. 5.000e+24 9.914e+05 6.296e+04 -8.111e+03 -2.179e+04
  50. 5.000e+24 1.087e+06 -6.310e+04 -1.235e+04 2.151e+04
  51. 5.000e+24 1.180e+06 6.310e+04 -1.884e+04 -2.228e+04
  52. 5.000e+24 1.268e+06 -6.379e+04 -2.685e+04 2.069e+04
  53. 5.000e+24 1.343e+06 6.379e+04 -5.867e+04 -2.528e+04
  54. 5.000e+24 1.354e+06 -7.641e+04 -1.273e+05 1.575e+03
  55.  
  56. Elapsed time = 1.094827e-02 seconds

● 简化算法

  1. // omp_nbody_red.c,OpenMP 简化算法
  2. #include <stdio.h>
  3. #include <stdlib.h>
  4. #include <string.h>
  5. #include <math.h>
  6. #include <omp.h>
  7.  
  8. #define OUTPUT
  9. //#define DEBUG
  10. #define DIM 2
  11. #define X 0
  12. #define Y 1
  13. typedef double vect_t[DIM];
  14. const double G = 6.673e-11;
  15. struct particle_s
  16. {
  17. double m;
  18. vect_t s;
  19. vect_t v;
  20. };
  21.  
  22. void Usage(char* prog_name)
  23. {
  24. fprintf(stderr, "usage: %s <nThreads> <nParticles> <nTimestep> <sizeTimestep> <outputFrequency> <g|i>\n", prog_name);
  25. fprintf(stderr, " 'g': inite condition by random\n 'i': inite condition from stdin\n");
  26. exit();
  27. }
  28.  
  29. void Get_args(int argc, char* argv[], int* thread_count_p, int* n_p, int* n_steps_p, double* delta_t_p, int* output_freq_p, char* g_i_p)
  30. {
  31. if (argc != )
  32. Usage(argv[]);
  33. *thread_count_p = strtol(argv[], NULL, );
  34. *n_p = strtol(argv[], NULL, );
  35. *n_steps_p = strtol(argv[], NULL, );
  36. *delta_t_p = strtod(argv[], NULL);
  37. *output_freq_p = strtol(argv[], NULL, );
  38. *g_i_p = argv[][];
  39. if (*thread_count_p < || *n_p <= || *n_p % *thread_count_p || *n_steps_p < || *delta_t_p <= || *g_i_p != 'g' && *g_i_p != 'i')
  40. {
  41. Usage(argv[]);
  42. exit();
  43. }
  44. # ifdef DEBUG
  45. printf("Get_args, nThread%2d n %d n_steps %d delta_t %e output_freq %d g_i %c\n",
  46. *thread_count_p, *n_p, *n_steps_p, *delta_t_p, *output_freq_p, *g_i_p);
  47. fflush(stdout);
  48. # endif
  49. }
  50.  
  51. void Gen_init_cond(struct particle_s curr[], int n)
  52. {
  53. const double mass = 5.0e24, gap = 1.0e5, speed = 3.0e4;
  54. //srand(2);
  55. for (int i = ; i < n; i++)
  56. {
  57. curr[i].m = mass;
  58. curr[i].s[X] = i * gap;
  59. curr[i].s[Y] = 0.0;
  60. curr[i].v[X] = 0.0;
  61. // vel[i][Y] = speed * (2 * rand() / (double)RAND_MAX) - 1);
  62. curr[i].v[Y] = (i % ) ? -speed : speed;
  63. }
  64. }
  65.  
  66. void Get_init_cond(struct particle_s curr[], int n)
  67. {
  68. printf("For each particle, enter (in order): mass x-coord y-coord x-velocity y-velocity\n");
  69. for (int i = ; i < n; i++)
  70. {
  71. scanf_s("%lf", &curr[i].m);
  72. scanf_s("%lf", &curr[i].s[X]);
  73. scanf_s("%lf", &curr[i].s[Y]);
  74. scanf_s("%lf", &curr[i].v[X]);
  75. scanf_s("%lf", &curr[i].v[Y]);
  76. }
  77. }
  78.  
  79. void Output_state(double time, struct particle_s curr[], int n)
  80. {
  81. printf("Output_state, time = %.2f\n", time);
  82. for (int i = ; i < n; i++)
  83. printf("%2d %10.3e %10.3e %10.3e %10.3e %10.3e\n", i, curr[i].m, curr[i].s[X], curr[i].s[Y], curr[i].v[X], curr[i].v[Y]);
  84. printf("\n");
  85. fflush(stdout);
  86. }
  87.  
  88. void Compute_force(int part, vect_t forces[], struct particle_s curr[], int n)
  89. {
  90. vect_t f_part_k;
  91. double len, fact;
  92. for (int k = part + ; k < n; k++)
  93. {
  94. f_part_k[X] = curr[part].s[X] - curr[k].s[X];
  95. f_part_k[Y] = curr[part].s[Y] - curr[k].s[Y];
  96. len = sqrt(f_part_k[X] * f_part_k[X] + f_part_k[Y] * f_part_k[Y]);
  97. fact = -G * curr[part].m * curr[k].m / (len * len * len);
  98. f_part_k[X] *= fact;
  99. f_part_k[Y] *= fact;
  100. forces[part][X] += f_part_k[X]; // 靠前的颗粒加上本次计算结果的相反数
  101. forces[part][Y] += f_part_k[Y];
  102. forces[k][X] -= f_part_k[X]; // 靠后的颗粒加上本次计算结果
  103. forces[k][Y] -= f_part_k[Y];
  104. # ifdef DEBUG
  105. printf("Compute_force, k%2d> %10.3e %10.3e %10.3e %10.3e\n", k, len, fact, f_part_k[X], f_part_k[Y]);
  106. # endif
  107. }
  108. }
  109.  
  110. void Update_part(int part, vect_t forces[], struct particle_s curr[], int n, double delta_t)
  111. {
  112. const double fact = delta_t / curr[part].m;
  113. # ifdef DEBUG
  114. printf("Update_part before, part%2d %10.3e %10.3e %10.3e %10.3e %10.3e %10.3e\n",
  115. part, curr[part].s[X], curr[part].s[Y], curr[part].v[X], curr[part].v[Y], forces[part][X], forces[part][Y]);
  116. fflush(stdout);
  117. # endif
  118. curr[part].s[X] += delta_t * curr[part].v[X];
  119. curr[part].s[Y] += delta_t * curr[part].v[Y];
  120. curr[part].v[X] += fact * forces[part][X];
  121. curr[part].v[Y] += fact * forces[part][Y];
  122. # ifdef DEBUG
  123. printf("Update_part after, part%2d %10.3e %10.3e %10.3e %10.3e)\n",
  124. part, curr[part].s[X], curr[part].s[Y], curr[part].v[X], curr[part].v[Y]);
  125. # endif
  126. }
  127.  
  128. int main(int argc, char* argv[])
  129. {
  130. int n, part;
  131. int n_steps, step;
  132. double delta_t;
  133. int output_freq;
  134. struct particle_s* curr; // 颗粒信息
  135. vect_t* forces; // 引力信息
  136. int nThread, thread, my_rank; // 线程数,用于函数参数和 omp 子句,不是全局变量,当前线程编号(循环变量)
  137. char g_i;
  138. double start, finish;
  139. vect_t* loc_forces; // 每个线程局部引力
  140.  
  141. Get_args(argc, argv, &nThread, &n, &n_steps, &delta_t, &output_freq, &g_i);
  142. curr = (particle_s*)malloc(n * sizeof(struct particle_s));
  143. forces = (vect_t*)malloc(n * sizeof(vect_t));
  144. loc_forces = (vect_t*)malloc(nThread*n * sizeof(vect_t));
  145. if (g_i == 'g')
  146. Gen_init_cond(curr, n);
  147. else
  148. Get_init_cond(curr, n);
  149.  
  150. start = omp_get_wtime();
  151. # ifdef OUTPUT
  152. Output_state(, curr, n);
  153. # endif
  154. # pragma omp parallel num_threads(nThread) default(none) shared(curr, forces, nThread, delta_t, n, n_steps, output_freq, loc_forces) private(step, part,thread, my_rank)
  155. {
  156. my_rank = omp_get_thread_num();
  157. for (step = ; step <= n_steps; step++)
  158. {
  159. // memset(loc_forces + my_rank*n, 0, n*sizeof(vect_t));
  160. # pragma omp for
  161. for (part = ; part < nThread * n; part++)
  162. loc_forces[part][X] = loc_forces[part][Y] = 0.0;
  163. # pragma omp for schedule(static, ) // 注意使用循环调度
  164. for (part = ; part < n - ; part++)
  165. Compute_force(part, loc_forces + my_rank * n, curr, n);
  166. # pragma omp for
  167. for (part = ; part < n; part++) // 计算引力
  168. {
  169. forces[part][X] = forces[part][Y] = 0.0;
  170. for (thread = ; thread < nThread; thread++)
  171. {
  172. forces[part][X] += loc_forces[thread * n + part][X];
  173. forces[part][Y] += loc_forces[thread * n + part][Y];
  174. }
  175. }
  176. # pragma omp for
  177. for (part = ; part < n; part++) // 更新位置
  178. Update_part(part, forces, curr, n, delta_t);
  179. # ifdef OUTPUT
  180. if (step % output_freq == )
  181. {
  182. # pragma omp single
  183. Output_state(step*delta_t, curr, n);
  184. }
  185. # endif
  186. }
  187. }
  188.  
  189. finish = omp_get_wtime();
  190. printf("Elapsed time = %e ms\n", (finish - start) * );
  191. free(curr);
  192. free(forces);
  193. free(loc_forces);
  194. return ;
  195. }

● 输出结果,与基本算法类似。8 进程 16 体,3 秒,时间步长 1 秒,舍去 debug 输出 9.887694e-00 ms;8 进程 1024 体,3600 秒,时间步长 1 秒,舍去 output 和 debug 输出 7.857710e+03 ms。数据规模扩大后计算时间趋近基本算法的一半,说明计算花费时间较多,而数据传输花费时间较少。

OpenMP n 体问题的更多相关文章

  1. Pthreads n 体问题

    ▶ <并行程序设计导论>第六章中讨论了 n 体问题,分别使用了 MPI,Pthreads,OpenMP 来进行实现,这里是 Pthreads 的代码,分为基本算法和简化算法(引力计算量为基 ...

  2. MPI n 体问题

    ▶ <并行程序设计导论>第六章中讨论了 n 体问题,分别使用了 MPI,Pthreads,OpenMP 来进行实现,这里是 MPI 的代码,分为基本算法和简化算法(引力计算量为基本算法的一 ...

  3. 应用OpenMP的一个简单的设计模式

    小喵的唠叨话:最近很久没写博客了,一是因为之前写的LSoftmax后馈一直没有成功,所以在等作者的源码.二是最近没什么想写的东西.前两天,在预处理图片的时候,发现处理200w张图片,跑了一晚上也才处理 ...

  4. openmp 的使用

    http://blog.csdn.net/gengshenghong/article/details/7003110 说明:这部分内容比较基础,主要是分析几个容易混淆的OpenMP函数,加以理解. ( ...

  5. OpenMP编程总结表

    本文对OpenMP 2.0的全部语法——Macro(宏定义).Environment Variables(环境变量).Data Types(数据类型).Compiler Directives(编译指导 ...

  6. OpenMP共享内存并行编程详解

    实验平台:win7, VS2010 1. 介绍 平行计算机可以简单分为共享内存和分布式内存,共享内存就是多个核心共享一个内存,目前的PC就是这类(不管是只有一个多核CPU还是可以插多个CPU,它们都有 ...

  7. OpenMP并行构造的schedule子句详解 (转载)

    原文:http://blog.csdn.net/gengshenghong/article/details/7000979 schedule的语法为: schedule(kind, [chunk_si ...

  8. 利用OpenMP实现埃拉托斯特尼(Eratosthenes)素数筛法并行化 分类: 算法与数据结构 2015-05-09 12:24 157人阅读 评论(0) 收藏

    1.算法简介 1.1筛法起源 筛法是一种简单检定素数的算法.据说是古希腊的埃拉托斯特尼(Eratosthenes,约公元前274-194年)发明的,又称埃拉托斯特尼筛法(sieve of Eratos ...

  9. 大数据并行计算利器之MPI/OpenMP

    大数据集群计算利器之MPI/OpenMP ---以连通域标记算法并行化为例 1 背景 图像连通域标记算法是从一幅栅格图像(通常为二值图像)中,将互相邻接(4邻接或8邻接)的具有非背景值的像素集合提取出 ...

随机推荐

  1. uva10480最小割集

    求最小割集 dinic处理后用dfs对所有点进行标记,遍历整个联接边,起点访问了,终点没访问或者起点没访问,终点访问了就是最小割集之一 #include<map> #include< ...

  2. torchnet+VGG16计算patch之间相似度

    torchnet+VGG16计算patch之间相似度 torch VGG16 similarity 本来打算使用VGG实现siamese CNN的,但是没想明白怎么使用torchnet对模型进行微调. ...

  3. ssh隧道(通过跳板机)连接mysql

    案例: A服务器   B服务器   C服务器mysql 现在mysql服务器C只能通过内网访问,B服务器就能通过内网连接访问到mysql A服务器无法直接连接C服务器mysql,所以要通过跳板机(跳板 ...

  4. MissingNumber缺失的数字,FirstMissingPositive第一个缺失的正数

    MissingNumber问题描述:给定一个数组,数组数字范围是0-n,找到缺失的数字.例如nums={0,1,3},return2. 算法分析:第一种方法,对数组进行排序,然后找到和下标不一致的数字 ...

  5. Ctrl+K,Ctrl+D

    先按下Ctrl+K,然后按下Ctrl+D可以自动调整代码.

  6. JS代码执行机制

    JS代码从编译到执行 我们写出一段JS代码,JS的引擎并不是按照我们书写的顺序从上到下顺序编译并且执行的,首先是按照自己的规则对我们的代码先进行编译,然后从上到下执行编译的代码. 在全局作用域中,JS ...

  7. 网络编程I/O函数介绍

    read和write #include <unistd.h> ssize_t read(int fd, void *buf, size_t count); ssize_t write(in ...

  8. Wordpress 加载 js 文件到底部

    wp_enqueue_script wp_enqueue_script( string $handle, string $src = '', array $deps = array(), string ...

  9. java编程之常见的排序算法

    java常见的排序算法 第一种:插入排序 直接插入排序 1, 直接插入排序 (1)基本思想:在要排序的一组数中,假设前面(n-1)[n>=2] 个数已经是排 好顺序的,现在要把第n个数插到前面的 ...

  10. test20181024 ming

    题意 分析 考场做法 考虑二分答案,R开到1e9就能过了. 判断答案合法,就判断时间和是否超过拥有的时间就行了.但要把di从小到大排序,不然容易验证贪心是错的. 时间复杂度\(O(n \log n)\ ...