▶ 书上第十三章,用一系列步骤优化一个云水参数化方案。用于熟悉 Fortran 以及 OpenACC 在旗下的表现

● 代码,文件较多,放在一起了

  1. ! main.f90
  2. PROGRAM main
  3. USE m_config, ONLY: nstop
  4. USE m_physics, ONLY: physics
  5. USE m_io, ONLY: write_output
  6. USE m_setup, ONLY: initialize, cleanup
  7. USE m_timing, ONLY: start_timer, end_timer, print_timers
  8.  
  9. IMPLICIT NONE
  10.  
  11. INTEGER :: ntstep
  12. INTEGER, parameter :: itimloop =
  13.  
  14. CALL initialize() ! 初始化计时器和设备
  15.  
  16. WRITE(*,"(A)") "Start of time loop"
  17. CALL start_timer(itimloop, "Time loop")
  18.  
  19. DO ntstep = , nstop ! 计算
  20. CALL physics()
  21. CALL write_output( ntstep )
  22. END DO
  23.  
  24. CALL end_timer( itimloop )
  25. WRITE(*,"(A)") "End of time loop"
  26.  
  27. CALL print_timers()
  28. CALL cleanup()
  29.  
  30. END PROGRAM main
  31.  
  32. ! m_config.f90,运行参数
  33. MODULE m_config
  34. INTEGER, parameter :: nx = ! 经度网格数
  35. INTEGER, parameter :: ny = ! 纬度网格数
  36. INTEGER, parameter :: nz = ! 海拔网格数
  37. INTEGER, parameter :: nstop = ! 时间步数
  38. INTEGER, parameter :: nout = ! 输出间隔
  39.  
  40. END MODULE m_config
  41.  
  42. ! m_fields.f90,场参数
  43. MODULE m_fields
  44. REAL*, ALLOCATABLE :: qv(:,:,:) ! 水蒸汽含量
  45. REAL*, ALLOCATABLE :: t(:,:,:) ! 温度
  46.  
  47. END MODULE m_fields
  48.  
  49. ! m_io.f90,输入输出函数
  50. MODULE m_io
  51. USE m_config, ONLY: nout, nx, ny, nz
  52. USE m_fields, ONLY: qv
  53.  
  54. IMPLICIT NONE
  55.  
  56. CONTAINS
  57. SUBROUTINE write_output(ntstep)
  58. IMPLICIT NONE
  59.  
  60. INTEGER, INTENT(IN) :: ntstep ! 当前时间片
  61. INTEGER :: i, j, k
  62. REAL* :: qv_mean ! 水蒸汽含量平均值(标量)
  63.  
  64. IF (MOD(ntstep, nout) /= ) RETURN ! 当前时间片不作输出
  65.  
  66. qv_mean = .0D0 ! 计算均值并输出
  67. DO k = , nz
  68. DO j = , ny
  69. DO i = , nx
  70. qv_mean = qv_mean + qv(i,j,k)
  71. END DO
  72. END DO
  73. END DO
  74. qv_mean = qv_mean / REAL(nx * ny * nz, KIND(qv_mean))
  75.  
  76. WRITE(*,"(A,I6,A,ES18.8)") "Step: ", ntstep, ", mean(qv) =", qv_mean
  77. END SUBROUTINE write_output
  78.  
  79. END MODULE m_io
  80.  
  81. ! m_parametrizations.f90,参数化方案
  82. MODULE m_parametrizations
  83. IMPLICIT NONE
  84.  
  85. REAL*, parameter :: cs1 = 1.0D-, cs2 = .02D0, cs3 = .2D0, cs4=.1D0, t0=.0D0
  86. REAL*, parameter :: cm1 = 1.0D-, cm2=.0D0, cm3=.2D0, cm4=.0D0
  87.  
  88. CONTAINS
  89. SUBROUTINE saturation_adjustment(npx, npy, nlev, t, qc, qv) ! 参数化方案一
  90. IMPLICIT NONE
  91.  
  92. INTEGER, INTENT(IN) :: npx, npy, nlev ! 输入维度
  93. REAL*, INTENT(IN) :: t(:,:,:) ! 温度
  94. REAL*, INTENT(OUT) :: qc(:,:,:) ! 云水含量
  95. REAL*, INTENT(INOUT) :: qv(:,:,:) ! 水蒸汽含量
  96. INTEGER :: i, j, k
  97.  
  98. DO k = , nlev
  99. DO j = , npy
  100. DO i = , npx
  101. qv(i,j,k) = qv(i,j,k) + cs1*EXP(cs2*( t(i,j,k) - t0 )/( t(i,j,k) - cs3) )
  102. qc(i,j,k) = cs4 * qv(i,j,k)
  103. END DO
  104. END DO
  105. END DO
  106. END SUBROUTINE saturation_adjustment
  107.  
  108. SUBROUTINE microphysics(npx, npy, nlev, t, qc, qv) ! 参数化方案二
  109. IMPLICIT NONE
  110.  
  111. INTEGER, INTENT(IN) :: npx, npy, nlev
  112. REAL*, INTENT(INOUT) :: t(:,:,:)
  113. REAL*, INTENT(IN) :: qc(:,:,:)
  114. REAL*, INTENT(INOUT) :: qv(:,:,:)
  115. INTEGER :: i, j, k
  116.  
  117. DO k = , nlev
  118. DO j = , npy
  119. DO i = , npx
  120. qv(i, j, k) = qv(i,j,k-) + cm1*(t(i,j,k)-cm2)**cm3
  121. t(i, j, k) = t(i, j, k)*( .0D0 - cm4*qc(i,j,k)+qv(i,j,k) )
  122. END DO
  123. END DO
  124. END DO
  125. END SUBROUTINE microphysics
  126.  
  127. END MODULE m_parametrizations
  128.  
  129. ! m_physics.f90,参数化方案的执行
  130. MODULE m_physics
  131. USE m_config, ONLY: nx, ny, nz
  132. USE m_fields, ONLY: qv, t
  133. USE m_parametrizations, ONLY: saturation_adjustment, microphysics
  134.  
  135. IMPLICIT NONE
  136.  
  137. CONTAINS
  138. SUBROUTINE physics()
  139. IMPLICIT NONE
  140. REAL* :: qc(nx,ny,nz) ! 云水含量临时变量
  141. CALL saturation_adjustment(nx, ny, nz, t, qc, qv) ! 第一物理参数化
  142. CALL microphysics(nx, ny, nz, t, qc, qv) ! 第二物理参数化
  143. END SUBROUTINE physics
  144.  
  145. END MODULE m_physics
  146.  
  147. ! m_timming.f90,计时器
  148. MODULE m_timing
  149. IMPLICIT NONE
  150.  
  151. INTEGER, PARAMETER :: ntimer= ! 计时器数量
  152. REAL* :: rtimer(ntimer) ! 计时器
  153. CHARACTER() :: timertag(ntimer) ! 计时器标签
  154. INTEGER :: icountold(ntimer), & ! tick (start of timer section)
  155. icountrate, & ! countrate of SYSTEM_CLOCK()
  156. icountmax ! maximum counter value of SYSTEM_CLOCK()
  157.  
  158. CONTAINS
  159. SUBROUTINE init_timers() ! 初始化计时器
  160. IMPLICIT NONE
  161.  
  162. rtimer(:) = .0D0
  163. timertag(:) = ""
  164. icountold(:) =
  165.  
  166. CALL SYSTEM_CLOCK( COUNT_RATE=icountrate, COUNT_MAX=icountmax )
  167. END SUBROUTINE init_timers
  168.  
  169. SUBROUTINE start_timer(id, tag) ! 开始计时
  170. IMPLICIT NONE
  171.  
  172. INTEGER, INTENT(IN) :: id
  173. CHARACTER(*), INTENT(IN) :: tag
  174.  
  175. IF (id < .OR. id > ntimer) THEN ! 检查计时器编号范围
  176. WRITE(*,"(A,I4,A,I4)") "Error: timer id=", id, "exceeds maximum timer number", ntimer
  177. STOP
  178. END IF
  179.  
  180. IF (LEN_TRIM(timertag(id)) /= ) THEN ! 检查计时器是否已经开始运行
  181. WRITE(*,"(A,I4)") "Error: timer already started previously, id:", id
  182. STOP
  183. END IF
  184.  
  185. IF (LEN_TRIM(tag) == ) THEN ! 检查计时器标签是否非空
  186. WRITE(*,"(A,I4)") "Error: empty tag provided, id:", id
  187. STOP
  188. END IF
  189.  
  190. timertag(id) = TRIM(tag) ! 保存标签
  191. !$acc wait
  192.  
  193. CALL SYSTEM_CLOCK( COUNT=icountold(id) ) ! 开始计时
  194. END SUBROUTINE start_timer
  195.  
  196. SUBROUTINE end_timer(id) ! 结束计时
  197. IMPLICIT NONE
  198.  
  199. INTEGER, INTENT(IN) :: id
  200. INTEGER :: icountnew
  201.  
  202. IF (id < .OR. id > ntimer) THEN ! 检查计时器编号范围
  203. WRITE(*,"(A,I4,A,I4)") "Error: timer id=", id, "exceed max timer number", ntimer
  204. STOP
  205. END IF
  206.  
  207. IF (LEN_TRIM(timertag(id)) == ) THEN ! 检查计时器是否已经开始运行
  208. WRITE(*,"(A,I4)") "Error: Need to call start_timer before end_timing, id:", id
  209. STOP
  210. END IF
  211. !$acc wait
  212.  
  213. CALL SYSTEM_CLOCK( COUNT=icountnew ) ! 获取当前时间,计算耗时
  214. rtimer(id) = ( REAL(icountnew - icountold(id), KIND(rtimer(id))) ) / REAL(icountrate, KIND(rtimer(id)))
  215. END SUBROUTINE end_timer
  216.  
  217. SUBROUTINE print_timers() ! 打印计时
  218. IMPLICIT NONE
  219.  
  220. INTEGER :: id
  221.  
  222. WRITE(*,"(A)") "----------------------------"
  223. WRITE(*,"(A)") "Timers:"
  224. WRITE(*,"(A)") "----------------------------"
  225. DO id = , ntimer
  226. IF ( rtimer(id) > .0D0 ) THEN
  227. WRITE(*,"(A15,A2,F8.2,A)") timertag(id), ": ", rtimer(id)*.0D3, " ms"
  228. END IF
  229. END DO
  230. WRITE(*,"(A)") "----------------------------"
  231. END SUBROUTINE print_timers
  232.  
  233. END MODULE m_timing
  234.  
  235. ! m_setup.f90,初始化和清理
  236. MODULE m_setup
  237. USE m_config, ONLY: nstop, nout, nx, ny, nz
  238. USE m_fields, ONLY: t,qv
  239. USE m_timing, ONLY: init_timers, start_timer, end_timer
  240.  
  241. IMPLICIT NONE
  242.  
  243. CONTAINS
  244. SUBROUTINE initialize() ! 初始化计时器和设备
  245. IMPLICIT NONE
  246.  
  247. INTEGER, PARAMETER :: itiminit = ! 计时器编号
  248. INTEGER :: i, j, k
  249.  
  250. #ifdef _OPENACC
  251. WRITE(*,"(A)") "Running with OpenACC"
  252. #else
  253. WRITE(*,"(A)") "Running without OpenACC"
  254. #endif
  255.  
  256. WRITE(*,"(A)") "Initialize"
  257.  
  258. CALL init_timers()
  259. CALL start_timer( itiminit, "Initialization" )
  260. ALLOCATE( t(nx,ny,nz), qv(nx,ny,nz) )
  261.  
  262. DO k =, nz
  263. DO j = , ny
  264. DO i = , nx
  265. t(i,j,k) = .0D0 * (.2D0 + .07D0 * COS(.2D0 * REAL(i+j+k) / REAL(nx+ny+nz)))
  266. qv(i,j,k) = 1.0D- * (.1D0 + .13D0 * COS(.3D0 * REAL(i+j+k) / REAL(nx*ny*nz)))
  267. END DO
  268. END DO
  269. END DO
  270.  
  271. #ifdef _OPENACC
  272. CALL initialize_gpu()
  273. #endif
  274.  
  275. CALL end_timer( itiminit )
  276. END SUBROUTINE initialize
  277.  
  278. SUBROUTINE initialize_gpu()! GPU 跑一个小内核来初始化
  279. IMPLICIT NONE
  280.  
  281. INTEGER :: temp()
  282. INTEGER :: i
  283.  
  284. !$acc parallel loop
  285. DO i = ,
  286. temp(i) =
  287. END DO
  288.  
  289. IF (SUM(temp) == ) THEN
  290. WRITE(*,"(A)") "GPU initialized"
  291. ELSE
  292. WRITE(*,"(A,I4)") "Error: Problem encountered initializing the GPU"
  293. STOP
  294. END IF
  295. END SUBROUTINE initialize_gpu
  296.  
  297. SUBROUTINE cleanup()! 清扫 t qv 的内存
  298. IMPLICIT NONE
  299.  
  300. DEALLOCATE( t, qv )
  301. END SUBROUTINE cleanup
  302.  
  303. END MODULE m_setup

● OpenMP 优化,改了 m_io.f90,m_parametrizations.f90,m_setup.f90

  1. ! m_io.f90
  2. MODULE m_io
  3. USE m_config, ONLY: nout, nx, ny, nz
  4. USE m_fields, ONLY: qv
  5.  
  6. IMPLICIT NONE
  7.  
  8. CONTAINS
  9. SUBROUTINE write_output(ntstep)
  10. IMPLICIT NONE
  11.  
  12. INTEGER, INTENT(IN) :: ntstep
  13. INTEGER :: i, j, k
  14. REAL* :: qv_mean
  15.  
  16. IF (MOD(ntstep, nout) /= ) RETURN
  17.  
  18. qv_mean = .0D0
  19. DO k = , nz
  20. !$OMP PARALLEL DO PRIVATE(i,j) SHARED(k,qv) REDUCTION(+:qv_mean)
  21. DO j = , ny
  22. DO i = , nx
  23. qv_mean = qv_mean + qv(i,j,k)
  24. END DO
  25. END DO
  26. END DO
  27. qv_mean = qv_mean / REAL(nx * ny * nz, KIND(qv_mean))
  28.  
  29. WRITE(*,"(A,I6,A,ES18.8)") "Step: ", ntstep, ", mean(qv) =", qv_mean
  30. END SUBROUTINE write_output
  31.  
  32. END MODULE m_io
  33.  
  34. ! m_parametrizations.f90
  35. MODULE m_parametrizations
  36. IMPLICIT NONE
  37.  
  38. REAL*, parameter :: cs1 = 1.0D-, cs2 = .02D0, cs3 = .2D0, cs4=.1D0, t0=.0D0
  39. REAL*, parameter :: cm1 = 1.0D-, cm2=.0D0, cm3=.2D0, cm4=.0D0
  40.  
  41. CONTAINS
  42. SUBROUTINE saturation_adjustment(npx, npy, nlev, t, qc, qv)
  43. IMPLICIT NONE
  44.  
  45. INTEGER, INTENT(IN) :: npx, npy, nlev
  46. REAL*, INTENT(IN) :: t(:,:,:)
  47. REAL*, INTENT(OUT) :: qc(:,:,:)
  48. REAL*, INTENT(INOUT) :: qv(:,:,:)
  49. INTEGER :: i, j, k
  50.  
  51. !$OMP PARALLEL
  52. DO k = , nlev
  53. !$OMP DO PRIVATE(i,j)
  54. DO j = , npy
  55. DO i = , npx
  56. qv(i,j,k) = qv(i,j,k) + cs1*EXP(cs2*( t(i,j,k) - t0 )/( t(i,j,k) - cs3) )
  57. qc(i,j,k) = cs4 * qv(i,j,k)
  58. END DO
  59. END DO
  60. END DO
  61. !$OMP END PARALLEL
  62. END SUBROUTINE saturation_adjustment
  63.  
  64. SUBROUTINE microphysics(npx, npy, nlev, t, qc, qv)
  65. IMPLICIT NONE
  66.  
  67. INTEGER, INTENT(IN) :: npx, npy, nlev
  68. REAL*, INTENT(INOUT) :: t(:,:,:)
  69. REAL*, INTENT(IN) :: qc(:,:,:)
  70. REAL*, INTENT(INOUT) :: qv(:,:,:)
  71. INTEGER :: i, j, k
  72.  
  73. !$OMP PARALLEL
  74. DO k = , nlev
  75. !$OMP DO PRIVATE(i,j)
  76. DO j = , npy
  77. DO i = , npx
  78. qv(i, j, k) = qv(i,j,k-) + cm1*(t(i,j,k)-cm2)**cm3
  79. t(i, j, k) = t(i, j, k)*( .0D0 - cm4*qc(i,j,k)+qv(i,j,k) )
  80. END DO
  81. END DO
  82. END DO
  83. !$OMP END PARALLEL
  84. END SUBROUTINE microphysics
  85.  
  86. END MODULE m_parametrizations
  87.  
  88. ! m_setup.f90
  89. MODULE m_setup
  90. USE m_config, ONLY: nstop, nout, nx, ny, nz
  91. USE m_fields, ONLY: t,qv
  92. USE m_timing, ONLY: init_timers, start_timer, end_timer
  93.  
  94. IMPLICIT NONE
  95.  
  96. CONTAINS
  97. SUBROUTINE initialize() ! 初始化计时器和设备
  98. IMPLICIT NONE
  99.  
  100. INTEGER, PARAMETER :: itiminit = ! timer ID
  101. INTEGER :: i, j, k ! loop indices
  102. INTEGER :: OMP_GET_NUM_THREADS, OMP_GET_THREAD_NUM
  103.  
  104. #ifdef _OPENACC
  105. WRITE(*,"(A)") "Running with OpenACC"
  106. #else
  107. WRITE(*,"(A)") "Running without OpenACC"
  108. #ifdef _OPENMP
  109. !$OMP PARALLEL
  110. IF (OMP_GET_THREAD_NUM()==) THEN
  111. WRITE(*,"(A,I4,A)") "Running with OpenMP with ", OMP_GET_NUM_THREADS(), " threads"
  112. END IF
  113. !$OMP END PARALLEL
  114. #endif
  115. #endif
  116. WRITE(*,"(A)") "Initialize"
  117.  
  118. CALL init_timers()
  119. CALL start_timer( itiminit, "Initialization" )
  120. ALLOCATE( t(nx,ny,nz), qv(nx,ny,nz) )
  121.  
  122. DO k =, nz
  123. DO j = , ny
  124. DO i = , nx
  125. t(i,j,k) = .0D0 * (.2D0 + .07D0 * COS(.2D0 * REAL(i+j+k) / REAL(nx+ny+nz)))
  126. qv(i,j,k) = 1.0D- * (.1D0 + .13D0 * COS(.3D0 * REAL(i+j+k) / REAL(nx*ny*nz)))
  127. END DO
  128. END DO
  129. END DO
  130.  
  131. #ifdef _OPENACC
  132. CALL initialize_gpu()
  133. #endif
  134.  
  135. CALL end_timer( itiminit )
  136. END SUBROUTINE initialize
  137.  
  138. SUBROUTINE initialize_gpu()
  139. IMPLICIT NONE
  140.  
  141. INTEGER :: temp()
  142. INTEGER :: i
  143.  
  144. !$acc parallel loop
  145. DO i = ,
  146. temp(i) =
  147. END DO
  148.  
  149. IF (SUM(temp) == ) THEN
  150. WRITE(*,"(A)") "GPU initialized"
  151. ELSE
  152. WRITE(*,"(A,I4)") "Error: Problem encountered initializing the GPU"
  153. STOP
  154. END IF
  155. END SUBROUTINE initialize_gpu
  156.  
  157. SUBROUTINE cleanup()
  158. IMPLICIT NONE
  159.  
  160. DEALLOCATE( t, qv )
  161. END SUBROUTINE cleanup
  162.  
  163. END MODULE m_setup

● OpenACC 优化,改了 m_io.f90,m_parametrizations.f90,m_physics.f90,m_setup.f90。树上的优化 04 设计算法改动,没有参与比较

  1. ! m_io.f90
  2. MODULE m_io
  3. USE m_config, ONLY: nout, nx, ny, nz
  4. USE m_fields, ONLY: qv
  5.  
  6. IMPLICIT NONE
  7.  
  8. CONTAINS
  9. SUBROUTINE write_output(ntstep)
  10. IMPLICIT NONE
  11.  
  12. INTEGER, INTENT(IN) :: ntstep
  13. INTEGER :: i, j, k
  14. REAL* :: qv_mean
  15.  
  16. IF (MOD(ntstep, nout) /= ) RETURN
  17.  
  18. !$acc data present(qv)
  19. qv_mean = .0D0
  20. !$acc parallel
  21. !$acc loop gang vector collapse() reduction(+:qv_mean)
  22. DO k = , nz
  23. DO j = , ny
  24. DO i = , nx
  25. qv_mean = qv_mean + qv(i,j,k)
  26. END DO
  27. END DO
  28. END DO
  29. !$acc end parallel
  30. !$acc end data
  31. qv_mean = qv_mean / REAL(nx * ny * nz, KIND(qv_mean))
  32.  
  33. WRITE(*,"(A,I6,A,ES18.8)") "Step: ", ntstep, ", mean(qv) =", qv_mean
  34. END SUBROUTINE write_output
  35.  
  36. END MODULE m_io
  37.  
  38. ! m_parametrizations.f90
  39. MODULE m_parametrizations
  40. IMPLICIT NONE
  41.  
  42. REAL*, parameter :: cs1 = 1.0D-, cs2 = .02D0, cs3 = .2D0, cs4=.1D0, t0=.0D0
  43. REAL*, parameter :: cm1 = 1.0D-, cm2=.0D0, cm3=.2D0, cm4=.0D0
  44.  
  45. CONTAINS
  46. SUBROUTINE saturation_adjustment(npx, npy, nlev, t, qc, qv)
  47. IMPLICIT NONE
  48.  
  49. INTEGER, INTENT(IN) :: npx, npy, nlev
  50. REAL*, INTENT(IN) :: t(:,:,:)
  51. REAL*, INTENT(OUT) :: qc(:,:,:)
  52. REAL*, INTENT(INOUT) :: qv(:,:,:)
  53. INTEGER :: i, j, k
  54.  
  55. !$acc data present(t,qv,qc)
  56. !$acc parallel
  57. !$acc loop gang vector collapse()
  58. DO k = , nlev
  59. DO j = , npy
  60. DO i = , npx
  61. qv(i,j,k) = qv(i,j,k) + cs1*EXP(cs2*( t(i,j,k) - t0 )/( t(i,j,k) - cs3) )
  62. qc(i,j,k) = cs4 * qv(i,j,k)
  63. END DO
  64. END DO
  65. END DO
  66. !$acc end parallel
  67. !$acc end data
  68. END SUBROUTINE saturation_adjustment
  69.  
  70. SUBROUTINE microphysics(npx, npy, nlev, t, qc, qv)
  71. IMPLICIT NONE
  72.  
  73. INTEGER, INTENT(IN) :: npx, npy, nlev
  74. REAL*, INTENT(INOUT) :: t(:,:,:)
  75. REAL*, INTENT(IN) :: qc(:,:,:)
  76. REAL*, INTENT(INOUT) :: qv(:,:,:)
  77. INTEGER :: i, j, k
  78. !$acc data present(t,qv,qc)
  79. !$acc parallel
  80. !$acc loop seq
  81. DO k = , nlev
  82. !$acc loop gang
  83. DO j = , npy
  84. !$acc loop vector
  85. DO i = , npx
  86. qv(i, j, k) = qv(i,j,k-) + cm1*(t(i,j,k)-cm2)**cm3
  87. t(i, j, k) = t(i, j, k)*( .0D0 - cm4*qc(i,j,k)+qv(i,j,k) )
  88. END DO
  89. END DO
  90. END DO
  91. !$acc end parallel
  92. !$acc end data
  93. END SUBROUTINE microphysics
  94.  
  95. END MODULE m_parametrizations
  96.  
  97. ! m_physics.f90
  98. MODULE m_physics
  99. USE m_config, ONLY: nx, ny, nz
  100. USE m_fields, ONLY: qv, t
  101. USE m_parametrizations, ONLY: saturation_adjustment, microphysics
  102.  
  103. IMPLICIT NONE
  104.  
  105. REAL*, ALLOCATABLE :: qc(:,:,:) ! 提前声明,由 init_physics finalize_physics 来申请和释放
  106.  
  107. CONTAINS
  108. SUBROUTINE physics()
  109. IMPLICIT NONE
  110.  
  111. CALL saturation_adjustment(nx, ny, nz, t, qc, qv)
  112. CALL microphysics(nx, ny, nz, t, qc, qv)
  113. END SUBROUTINE physics
  114.  
  115. SUBROUTINE init_physics()
  116. IMPLICIT NONE
  117.  
  118. ALLOCATE( qc(nx,ny,nz) )
  119. !$acc enter data create(qc)
  120. END SUBROUTINE init_physics
  121.  
  122. SUBROUTINE finalize_physics()
  123. IMPLICIT NONE
  124.  
  125. !$acc exit data delete(qc)
  126. DEALLOCATE(qc)
  127. END SUBROUTINE finalize_physics
  128.  
  129. END MODULE m_physics
  130.  
  131. ! m_setup.f90
  132. MODULE m_setup
  133. USE m_config, ONLY: nstop, nout, nx, ny, nz
  134. USE m_fields, ONLY: t,qv
  135. USE m_timing, ONLY: init_timers, start_timer, end_timer
  136. USE m_physics, ONLY: init_physics, finalize_physics
  137. IMPLICIT NONE
  138.  
  139. CONTAINS
  140. SUBROUTINE initialize() ! 初始化计时器和设备
  141. IMPLICIT NONE
  142.  
  143. INTEGER, PARAMETER :: itiminit =
  144. INTEGER :: i, j, k
  145.  
  146. #ifdef _OPENACC
  147. WRITE(*,"(A)") "Running with OpenACC"
  148. #else
  149. WRITE(*,"(A)") "Running without OpenACC"
  150. #endif
  151.  
  152. WRITE(*,"(A)") "Initialize"
  153.  
  154. CALL init_timers()
  155. CALL start_timer( itiminit, "Initialization" )
  156. ALLOCATE( t(nx,ny,nz), qv(nx,ny,nz) )
  157.  
  158. !$acc enter data create(t,qv)
  159. DO k =, nz
  160. DO j = , ny
  161. DO i = , nx
  162. t(i,j,k) = .0D0 * (.2D0 + .07D0 * COS(.2D0 * REAL(i+j+k) / REAL(nx+ny+nz)))
  163. qv(i,j,k) = 1.0D- * (.1D0 + .13D0 * COS(.3D0 * REAL(i+j+k) / REAL(nx*ny*nz)))
  164. END DO
  165. END DO
  166. END DO
  167. !$acc update device(t,qv)
  168.  
  169. #ifdef _OPENACC
  170. CALL initialize_gpu()
  171. #endif
  172. CALL init_physics()
  173. CALL end_timer( itiminit )
  174. END SUBROUTINE initialize
  175.  
  176. SUBROUTINE initialize_gpu()
  177. IMPLICIT NONE
  178.  
  179. INTEGER :: temp()
  180. INTEGER :: i
  181.  
  182. !$acc parallel loop
  183. DO i = ,
  184. temp(i) =
  185. END DO
  186.  
  187. IF (SUM(temp) == ) THEN
  188. WRITE(*,"(A)") "GPU initialized"
  189. ELSE
  190. WRITE(*,"(A,I4)") "Error: Problem encountered initializing the GPU"
  191. STOP
  192. END IF
  193. END SUBROUTINE initialize_gpu
  194.  
  195. SUBROUTINE cleanup()
  196. IMPLICIT NONE
  197.  
  198. !$acc exit data delete(t,qv)
  199. DEALLOCATE( t, qv )
  200. CALL finalize_physics()
  201. END SUBROUTINE cleanup
  202.  
  203. END MODULE m_setup

● 所有的输出结果。单独编译一个模式(而不使用默认的 makefile)时,在命令 pgf90 中要使用参数 -Mpreprocess,意思是将预编译器作用到 fortran 文件中,否则 m_setup.f90 中的 # 预编译命令会被当成错误

  1. cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ make example_serial example_openmp example_openacc1 example_openacc2 example_openacc3 example_openacc4
  2. make[]: Entering directory '/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13/example_serial'
  3. compiling m_config.f90
  4. compiling m_fields.f90
  5. compiling m_io.f90
  6. compiling m_parametrizations.f90
  7. compiling m_physics.f90
  8. compiling m_timing.f90
  9. compiling m_setup.f90
  10. compiling main.f90
  11. make[]: Leaving directory '/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13/example_serial'
  12. make[]: Entering directory '/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13/example_openmp'
  13.  
  14. ... ! 类似上面的过程
  15.  
  16. make[]: Leaving directory '/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13/example_openacc4'
  17. cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_serial/example_serial
  18. Running without OpenACC
  19. Initialize
  20. Start of time loop
  21. Step: , mean(qv) = 1.14302104E-04
  22. Step: , mean(qv) = 1.34041461E-04
  23. Step: , mean(qv) = 1.53710207E-04
  24. Step: , mean(qv) = 1.73309068E-04
  25. Step: , mean(qv) = 1.92838848E-04
  26. End of time loop
  27. ----------------------------
  28. Timers:
  29. ----------------------------
  30. Initialization : 17.28 ms
  31. Time loop : 978.08 ms
  32. ----------------------------
  33. cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openmp/example_openmp
  34. Running without OpenACC
  35. Running with OpenMP with threads
  36. Initialize
  37. Start of time loop
  38. Step: , mean(qv) = 1.14302104E-04
  39. Step: , mean(qv) = 1.34041461E-04
  40. Step: , mean(qv) = 1.53710207E-04
  41. Step: , mean(qv) = 1.73309068E-04
  42. Step: , mean(qv) = 1.92838848E-04
  43. End of time loop
  44. ----------------------------
  45. Timers:
  46. ----------------------------
  47. Initialization : 17.96 ms
  48. Time loop : 898.92 ms
  49. ----------------------------
  50. cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openacc1/example_openacc1
  51. Running with OpenACC
  52. Initialize
  53. GPU initialized
  54. Start of time loop
  55. Step: , mean(qv) = 1.14302104E-04
  56. Step: , mean(qv) = 1.34041461E-04
  57. Step: , mean(qv) = 1.53710207E-04
  58. Step: , mean(qv) = 1.73309068E-04
  59. Step: , mean(qv) = 1.92838848E-04
  60. End of time loop
  61. ----------------------------
  62. Timers:
  63. ----------------------------
  64. Initialization : 191.11 ms
  65. Time loop : 1044.35 ms
  66. ----------------------------
  67. cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openacc2/example_openacc2
  68. Running with OpenACC
  69. Initialize
  70. GPU initialized
  71. Start of time loop
  72. Step: , mean(qv) = 1.14302104E-04
  73. Step: , mean(qv) = 1.34041461E-04
  74. Step: , mean(qv) = 1.53710207E-04
  75. Step: , mean(qv) = 1.73309068E-04
  76. Step: , mean(qv) = 1.92838848E-04
  77. End of time loop
  78. ----------------------------
  79. Timers:
  80. ----------------------------
  81. Initialization : 176.72 ms
  82. Time loop : 142.11 ms
  83. ----------------------------
  84. cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openacc3/example_openacc3
  85. Running with OpenACC
  86. Initialize
  87. GPU initialized
  88. Start of time loop
  89. Step: , mean(qv) = 1.14302104E-04
  90. Step: , mean(qv) = 1.34041461E-04
  91. Step: , mean(qv) = 1.53710207E-04
  92. Step: , mean(qv) = 1.73309068E-04
  93. Step: , mean(qv) = 1.92838848E-04
  94. End of time loop
  95. ----------------------------
  96. Timers:
  97. ----------------------------
  98. Initialization : 162.15 ms
  99. Time loop : 121.77 ms
  100. ----------------------------
  101. cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openacc4/example_openacc4
  102. Running with OpenACC
  103. Initialize
  104. GPU initialized
  105. Start of time loop
  106. Step: , mean(qv) = 1.14302104E-04
  107. Step: , mean(qv) = 1.34041461E-04
  108. Step: , mean(qv) = 1.53710207E-04
  109. Step: , mean(qv) = 1.73309068E-04
  110. Step: , mean(qv) = 1.92838848E-04
  111. End of time loop
  112. ----------------------------
  113. Timers:
  114. ----------------------------
  115. Initialization : 152.47 ms
  116. Time loop : 166.53 ms
  117. ----------------------------

● 所有的结果在 nvprof 中的图形。三张图分别为 “仅计算优化无数据优化”,“计算优化与数据优化”,“手工优化变量”

OpenACC 云水参数化方案的更多相关文章

  1. Linux云主机 监控方案浅析

    1.为何需要监控 监控是运维工程师的眼睛,它可帮助运维工程师第一时间发现系统的问题. 对于服务器的整个生命周期,都要和监控打交道: 当有服务器上架,都需要加入比如CPU负载.内存.网络.磁盘等基础监控 ...

  2. 开源网站云查杀方案,搭建自己的云杀毒-搭建ClamAV服务器

    开源网站云查杀方案,搭建自己的云杀毒 搭建ClamAV服务器 1        前言: 在上一篇我们已经演示了整个方案,传送门<开源网站云查杀方案,搭建自己的云杀毒>:https://ww ...

  3. Kube-OVN:大型银行技术团队推荐的金融级云原生网络方案

    近日,由TWT社区主办的2021容器云职业技能大赛团队赛的冠军作品:<适用于大中型银行的云原生技术体系建设方案>中,Kube-OVN成为银行技术团队推荐的金融级云原生网络最佳实践.本文部分 ...

  4. xx云网络实施方案案例

    由于xx云在我公司进行试用,对接我方存储,于是乎就负责网络实施方案,下面是具体方案介绍 ip分配 具体网络拓扑如下: 下面是两台交换机配置 IPMI交换机配置如下: [Quidway]di cu !S ...

  5. 理解 OpenStack 高可用(HA)(1):OpenStack 高可用和灾备方案 [OpenStack HA and DR]

    本系列会分析OpenStack 的高可用性(HA)概念和解决方案: (1)OpenStack 高可用方案概述 (2)Neutron L3 Agent HA - VRRP (虚拟路由冗余协议) (3)N ...

  6. 实现跨云应用——基于DNS的负载均衡

    “公有云可以作为传统IT资源的延展,能帮助客户应对不断变化的需求”——这是我们在向客户介绍公有云产品时经常说的一句话.我们来看一个具体的需求: 某客户有一个web站点,部署在自有的数据中心(on-pr ...

  7. HP PCS 云监控大数据解决方案

    ——把数据从分散统一集中到数据中心 基于HP分布式并行计算/存储技术构建的云监控系统即是通过“云高清摄像机”及IaaS和PaaS监控系统平台,根据用户所需(SaaS)将多路监控数据流传送给“云端”,除 ...

  8. OpenStack 虚拟机监控方案确定

    Contents [hide] 1 监控方案调研过程 1.1 1. 虚拟机里内置监控模块 1.2 2. 通过libvirt获取虚拟机数据监控. 2 a.测试openstack的自待组件ceilomet ...

  9. 免费ERP之云实施

    近日,普实渠道在AIO5软件免费一周年之际,推出了重磅的动作:启动AIO5云实施,推广小微企业免费ERP落地应用. 这无疑是推动客户免费应用ERP的重大里程碑. 当前,在中国小微企业信息化方面,应用情 ...

随机推荐

  1. BZOJ4767: 两双手【组合数学+容斥原理】

    Description 老W是个棋艺高超的棋手,他最喜欢的棋子是马,更具体地,他更加喜欢马所行走的方式.老W下棋时觉得无聊,便决定加强马所行走的方式,更具体地,他有两双手,其中一双手能让马从(u,v) ...

  2. List和json数组的转换(赋源代码)

    public class a11111111 { //参数obj可以是 json对象,字符串, list public static void fun(Object obj){ JSONArray d ...

  3. $.ajax()方法详解--极快瑞中的阿贾克斯函数

    ajax() 函数和那些abs()函数 sin()这些函数都是底层实现好了,封装好了,可以直接拿来用的 使用者只需要传入参数,然后就可以得到相应的结果 $.ajax({name:value, name ...

  4. restheart 基本使用

    restheart 是一个方便基于mongodb的restapi 开发框架 参考项目 https://github.com/rongfengliang/restheart-docker-compose ...

  5. Robots.txt 编写

    搜索引擎Robots协议,是放置在网站根目录下robots.txt文本文件,在文件中可以设定搜索引擎蜘蛛爬行规则.设置搜索引擎蜘蛛Spider抓取内容规则.下面Seoer惜缘举例robots写法规则与 ...

  6. 解决在sass中使用calc不能包含变量的问题。

    今天写sass的时候,发现在sass中使用calc,如果calc中包含一个变量,不会产生效果,看代码: .app-inner { display: flex; height: calc(100% - ...

  7. Python网络爬虫-xpath模块

    一.正解解析 单字符: . : 除换行以外所有字符 [] :[aoe] [a-w] 匹配集合中任意一个字符 \d :数字 [0-9] \D : 非数字 \w :数字.字母.下划线.中文 \W : 非\ ...

  8. nginx和apache最核心的区别在于apache是同步多进程模型,一个连接对应一个进程;nginx是异步的,多个连接(万级别)可以对应一个进程

    nginx和apache的一些优缺点比较,摘自网络,加自己的一些整理. nginx相对于apache的优点: 1.轻量级,同样是web 服务,比apache 占用更少的内存及资源 2.抗并发,ngin ...

  9. gc之六--Minor GC、Major GC、Full GC以及Mixed GC之间的区别

    目录: GC之一--GC 的算法分析.垃圾收集器.内存分配策略介绍 GC之二--GC日志分析(jdk1.8)整理中 GC之三--GC 触发Full GC执行的情况及应对策略 gc之四--Minor G ...

  10. Tensorflow笔记——神经网络图像识别(五)手写数字识别