OpenACC 云水参数化方案
▶ 书上第十三章,用一系列步骤优化一个云水参数化方案。用于熟悉 Fortran 以及 OpenACC 在旗下的表现
● 代码,文件较多,放在一起了
! main.f90
PROGRAM main
USE m_config, ONLY: nstop
USE m_physics, ONLY: physics
USE m_io, ONLY: write_output
USE m_setup, ONLY: initialize, cleanup
USE m_timing, ONLY: start_timer, end_timer, print_timers IMPLICIT NONE INTEGER :: ntstep
INTEGER, parameter :: itimloop = CALL initialize() ! 初始化计时器和设备 WRITE(*,"(A)") "Start of time loop"
CALL start_timer(itimloop, "Time loop") DO ntstep = , nstop ! 计算
CALL physics()
CALL write_output( ntstep )
END DO CALL end_timer( itimloop )
WRITE(*,"(A)") "End of time loop" CALL print_timers()
CALL cleanup() END PROGRAM main ! m_config.f90,运行参数
MODULE m_config
INTEGER, parameter :: nx = ! 经度网格数
INTEGER, parameter :: ny = ! 纬度网格数
INTEGER, parameter :: nz = ! 海拔网格数
INTEGER, parameter :: nstop = ! 时间步数
INTEGER, parameter :: nout = ! 输出间隔 END MODULE m_config ! m_fields.f90,场参数
MODULE m_fields
REAL*, ALLOCATABLE :: qv(:,:,:) ! 水蒸汽含量
REAL*, ALLOCATABLE :: t(:,:,:) ! 温度 END MODULE m_fields ! m_io.f90,输入输出函数
MODULE m_io
USE m_config, ONLY: nout, nx, ny, nz
USE m_fields, ONLY: qv IMPLICIT NONE CONTAINS
SUBROUTINE write_output(ntstep)
IMPLICIT NONE INTEGER, INTENT(IN) :: ntstep ! 当前时间片
INTEGER :: i, j, k
REAL* :: qv_mean ! 水蒸汽含量平均值(标量) IF (MOD(ntstep, nout) /= ) RETURN ! 当前时间片不作输出 qv_mean = .0D0 ! 计算均值并输出
DO k = , nz
DO j = , ny
DO i = , nx
qv_mean = qv_mean + qv(i,j,k)
END DO
END DO
END DO
qv_mean = qv_mean / REAL(nx * ny * nz, KIND(qv_mean)) WRITE(*,"(A,I6,A,ES18.8)") "Step: ", ntstep, ", mean(qv) =", qv_mean
END SUBROUTINE write_output END MODULE m_io ! m_parametrizations.f90,参数化方案
MODULE m_parametrizations
IMPLICIT NONE REAL*, parameter :: cs1 = 1.0D-, cs2 = .02D0, cs3 = .2D0, cs4=.1D0, t0=.0D0
REAL*, parameter :: cm1 = 1.0D-, cm2=.0D0, cm3=.2D0, cm4=.0D0 CONTAINS
SUBROUTINE saturation_adjustment(npx, npy, nlev, t, qc, qv) ! 参数化方案一
IMPLICIT NONE INTEGER, INTENT(IN) :: npx, npy, nlev ! 输入维度
REAL*, INTENT(IN) :: t(:,:,:) ! 温度
REAL*, INTENT(OUT) :: qc(:,:,:) ! 云水含量
REAL*, INTENT(INOUT) :: qv(:,:,:) ! 水蒸汽含量
INTEGER :: i, j, k DO k = , nlev
DO j = , npy
DO i = , npx
qv(i,j,k) = qv(i,j,k) + cs1*EXP(cs2*( t(i,j,k) - t0 )/( t(i,j,k) - cs3) )
qc(i,j,k) = cs4 * qv(i,j,k)
END DO
END DO
END DO
END SUBROUTINE saturation_adjustment SUBROUTINE microphysics(npx, npy, nlev, t, qc, qv) ! 参数化方案二
IMPLICIT NONE INTEGER, INTENT(IN) :: npx, npy, nlev
REAL*, INTENT(INOUT) :: t(:,:,:)
REAL*, INTENT(IN) :: qc(:,:,:)
REAL*, INTENT(INOUT) :: qv(:,:,:)
INTEGER :: i, j, k DO k = , nlev
DO j = , npy
DO i = , npx
qv(i, j, k) = qv(i,j,k-) + cm1*(t(i,j,k)-cm2)**cm3
t(i, j, k) = t(i, j, k)*( .0D0 - cm4*qc(i,j,k)+qv(i,j,k) )
END DO
END DO
END DO
END SUBROUTINE microphysics END MODULE m_parametrizations ! m_physics.f90,参数化方案的执行
MODULE m_physics
USE m_config, ONLY: nx, ny, nz
USE m_fields, ONLY: qv, t
USE m_parametrizations, ONLY: saturation_adjustment, microphysics IMPLICIT NONE CONTAINS
SUBROUTINE physics()
IMPLICIT NONE
REAL* :: qc(nx,ny,nz) ! 云水含量临时变量
CALL saturation_adjustment(nx, ny, nz, t, qc, qv) ! 第一物理参数化
CALL microphysics(nx, ny, nz, t, qc, qv) ! 第二物理参数化
END SUBROUTINE physics END MODULE m_physics ! m_timming.f90,计时器
MODULE m_timing
IMPLICIT NONE INTEGER, PARAMETER :: ntimer= ! 计时器数量
REAL* :: rtimer(ntimer) ! 计时器
CHARACTER() :: timertag(ntimer) ! 计时器标签
INTEGER :: icountold(ntimer), & ! tick (start of timer section)
icountrate, & ! countrate of SYSTEM_CLOCK()
icountmax ! maximum counter value of SYSTEM_CLOCK() CONTAINS
SUBROUTINE init_timers() ! 初始化计时器
IMPLICIT NONE rtimer(:) = .0D0
timertag(:) = ""
icountold(:) = CALL SYSTEM_CLOCK( COUNT_RATE=icountrate, COUNT_MAX=icountmax )
END SUBROUTINE init_timers SUBROUTINE start_timer(id, tag) ! 开始计时
IMPLICIT NONE INTEGER, INTENT(IN) :: id
CHARACTER(*), INTENT(IN) :: tag IF (id < .OR. id > ntimer) THEN ! 检查计时器编号范围
WRITE(*,"(A,I4,A,I4)") "Error: timer id=", id, "exceeds maximum timer number", ntimer
STOP
END IF IF (LEN_TRIM(timertag(id)) /= ) THEN ! 检查计时器是否已经开始运行
WRITE(*,"(A,I4)") "Error: timer already started previously, id:", id
STOP
END IF IF (LEN_TRIM(tag) == ) THEN ! 检查计时器标签是否非空
WRITE(*,"(A,I4)") "Error: empty tag provided, id:", id
STOP
END IF timertag(id) = TRIM(tag) ! 保存标签
!$acc wait CALL SYSTEM_CLOCK( COUNT=icountold(id) ) ! 开始计时
END SUBROUTINE start_timer SUBROUTINE end_timer(id) ! 结束计时
IMPLICIT NONE INTEGER, INTENT(IN) :: id
INTEGER :: icountnew IF (id < .OR. id > ntimer) THEN ! 检查计时器编号范围
WRITE(*,"(A,I4,A,I4)") "Error: timer id=", id, "exceed max timer number", ntimer
STOP
END IF IF (LEN_TRIM(timertag(id)) == ) THEN ! 检查计时器是否已经开始运行
WRITE(*,"(A,I4)") "Error: Need to call start_timer before end_timing, id:", id
STOP
END IF
!$acc wait CALL SYSTEM_CLOCK( COUNT=icountnew ) ! 获取当前时间,计算耗时
rtimer(id) = ( REAL(icountnew - icountold(id), KIND(rtimer(id))) ) / REAL(icountrate, KIND(rtimer(id)))
END SUBROUTINE end_timer SUBROUTINE print_timers() ! 打印计时
IMPLICIT NONE INTEGER :: id WRITE(*,"(A)") "----------------------------"
WRITE(*,"(A)") "Timers:"
WRITE(*,"(A)") "----------------------------"
DO id = , ntimer
IF ( rtimer(id) > .0D0 ) THEN
WRITE(*,"(A15,A2,F8.2,A)") timertag(id), ": ", rtimer(id)*.0D3, " ms"
END IF
END DO
WRITE(*,"(A)") "----------------------------"
END SUBROUTINE print_timers END MODULE m_timing ! m_setup.f90,初始化和清理
MODULE m_setup
USE m_config, ONLY: nstop, nout, nx, ny, nz
USE m_fields, ONLY: t,qv
USE m_timing, ONLY: init_timers, start_timer, end_timer IMPLICIT NONE CONTAINS
SUBROUTINE initialize() ! 初始化计时器和设备
IMPLICIT NONE INTEGER, PARAMETER :: itiminit = ! 计时器编号
INTEGER :: i, j, k #ifdef _OPENACC
WRITE(*,"(A)") "Running with OpenACC"
#else
WRITE(*,"(A)") "Running without OpenACC"
#endif WRITE(*,"(A)") "Initialize" CALL init_timers()
CALL start_timer( itiminit, "Initialization" )
ALLOCATE( t(nx,ny,nz), qv(nx,ny,nz) ) DO k =, nz
DO j = , ny
DO i = , nx
t(i,j,k) = .0D0 * (.2D0 + .07D0 * COS(.2D0 * REAL(i+j+k) / REAL(nx+ny+nz)))
qv(i,j,k) = 1.0D- * (.1D0 + .13D0 * COS(.3D0 * REAL(i+j+k) / REAL(nx*ny*nz)))
END DO
END DO
END DO #ifdef _OPENACC
CALL initialize_gpu()
#endif CALL end_timer( itiminit )
END SUBROUTINE initialize SUBROUTINE initialize_gpu()! 让 GPU 跑一个小内核来初始化
IMPLICIT NONE INTEGER :: temp()
INTEGER :: i !$acc parallel loop
DO i = ,
temp(i) =
END DO IF (SUM(temp) == ) THEN
WRITE(*,"(A)") "GPU initialized"
ELSE
WRITE(*,"(A,I4)") "Error: Problem encountered initializing the GPU"
STOP
END IF
END SUBROUTINE initialize_gpu SUBROUTINE cleanup()! 清扫 t 和 qv 的内存
IMPLICIT NONE DEALLOCATE( t, qv )
END SUBROUTINE cleanup END MODULE m_setup
● OpenMP 优化,改了 m_io.f90,m_parametrizations.f90,m_setup.f90
! m_io.f90
MODULE m_io
USE m_config, ONLY: nout, nx, ny, nz
USE m_fields, ONLY: qv IMPLICIT NONE CONTAINS
SUBROUTINE write_output(ntstep)
IMPLICIT NONE INTEGER, INTENT(IN) :: ntstep
INTEGER :: i, j, k
REAL* :: qv_mean IF (MOD(ntstep, nout) /= ) RETURN qv_mean = .0D0
DO k = , nz
!$OMP PARALLEL DO PRIVATE(i,j) SHARED(k,qv) REDUCTION(+:qv_mean)
DO j = , ny
DO i = , nx
qv_mean = qv_mean + qv(i,j,k)
END DO
END DO
END DO
qv_mean = qv_mean / REAL(nx * ny * nz, KIND(qv_mean)) WRITE(*,"(A,I6,A,ES18.8)") "Step: ", ntstep, ", mean(qv) =", qv_mean
END SUBROUTINE write_output END MODULE m_io ! m_parametrizations.f90
MODULE m_parametrizations
IMPLICIT NONE REAL*, parameter :: cs1 = 1.0D-, cs2 = .02D0, cs3 = .2D0, cs4=.1D0, t0=.0D0
REAL*, parameter :: cm1 = 1.0D-, cm2=.0D0, cm3=.2D0, cm4=.0D0 CONTAINS
SUBROUTINE saturation_adjustment(npx, npy, nlev, t, qc, qv)
IMPLICIT NONE INTEGER, INTENT(IN) :: npx, npy, nlev
REAL*, INTENT(IN) :: t(:,:,:)
REAL*, INTENT(OUT) :: qc(:,:,:)
REAL*, INTENT(INOUT) :: qv(:,:,:)
INTEGER :: i, j, k !$OMP PARALLEL
DO k = , nlev
!$OMP DO PRIVATE(i,j)
DO j = , npy
DO i = , npx
qv(i,j,k) = qv(i,j,k) + cs1*EXP(cs2*( t(i,j,k) - t0 )/( t(i,j,k) - cs3) )
qc(i,j,k) = cs4 * qv(i,j,k)
END DO
END DO
END DO
!$OMP END PARALLEL
END SUBROUTINE saturation_adjustment SUBROUTINE microphysics(npx, npy, nlev, t, qc, qv)
IMPLICIT NONE INTEGER, INTENT(IN) :: npx, npy, nlev
REAL*, INTENT(INOUT) :: t(:,:,:)
REAL*, INTENT(IN) :: qc(:,:,:)
REAL*, INTENT(INOUT) :: qv(:,:,:)
INTEGER :: i, j, k !$OMP PARALLEL
DO k = , nlev
!$OMP DO PRIVATE(i,j)
DO j = , npy
DO i = , npx
qv(i, j, k) = qv(i,j,k-) + cm1*(t(i,j,k)-cm2)**cm3
t(i, j, k) = t(i, j, k)*( .0D0 - cm4*qc(i,j,k)+qv(i,j,k) )
END DO
END DO
END DO
!$OMP END PARALLEL
END SUBROUTINE microphysics END MODULE m_parametrizations ! m_setup.f90
MODULE m_setup
USE m_config, ONLY: nstop, nout, nx, ny, nz
USE m_fields, ONLY: t,qv
USE m_timing, ONLY: init_timers, start_timer, end_timer IMPLICIT NONE CONTAINS
SUBROUTINE initialize() ! 初始化计时器和设备
IMPLICIT NONE INTEGER, PARAMETER :: itiminit = ! timer ID
INTEGER :: i, j, k ! loop indices
INTEGER :: OMP_GET_NUM_THREADS, OMP_GET_THREAD_NUM #ifdef _OPENACC
WRITE(*,"(A)") "Running with OpenACC"
#else
WRITE(*,"(A)") "Running without OpenACC"
#ifdef _OPENMP
!$OMP PARALLEL
IF (OMP_GET_THREAD_NUM()==) THEN
WRITE(*,"(A,I4,A)") "Running with OpenMP with ", OMP_GET_NUM_THREADS(), " threads"
END IF
!$OMP END PARALLEL
#endif
#endif
WRITE(*,"(A)") "Initialize" CALL init_timers()
CALL start_timer( itiminit, "Initialization" )
ALLOCATE( t(nx,ny,nz), qv(nx,ny,nz) ) DO k =, nz
DO j = , ny
DO i = , nx
t(i,j,k) = .0D0 * (.2D0 + .07D0 * COS(.2D0 * REAL(i+j+k) / REAL(nx+ny+nz)))
qv(i,j,k) = 1.0D- * (.1D0 + .13D0 * COS(.3D0 * REAL(i+j+k) / REAL(nx*ny*nz)))
END DO
END DO
END DO #ifdef _OPENACC
CALL initialize_gpu()
#endif CALL end_timer( itiminit )
END SUBROUTINE initialize SUBROUTINE initialize_gpu()
IMPLICIT NONE INTEGER :: temp()
INTEGER :: i !$acc parallel loop
DO i = ,
temp(i) =
END DO IF (SUM(temp) == ) THEN
WRITE(*,"(A)") "GPU initialized"
ELSE
WRITE(*,"(A,I4)") "Error: Problem encountered initializing the GPU"
STOP
END IF
END SUBROUTINE initialize_gpu SUBROUTINE cleanup()
IMPLICIT NONE DEALLOCATE( t, qv )
END SUBROUTINE cleanup END MODULE m_setup
● OpenACC 优化,改了 m_io.f90,m_parametrizations.f90,m_physics.f90,m_setup.f90。树上的优化 04 设计算法改动,没有参与比较
! m_io.f90
MODULE m_io
USE m_config, ONLY: nout, nx, ny, nz
USE m_fields, ONLY: qv IMPLICIT NONE CONTAINS
SUBROUTINE write_output(ntstep)
IMPLICIT NONE INTEGER, INTENT(IN) :: ntstep
INTEGER :: i, j, k
REAL* :: qv_mean IF (MOD(ntstep, nout) /= ) RETURN !$acc data present(qv)
qv_mean = .0D0
!$acc parallel
!$acc loop gang vector collapse() reduction(+:qv_mean)
DO k = , nz
DO j = , ny
DO i = , nx
qv_mean = qv_mean + qv(i,j,k)
END DO
END DO
END DO
!$acc end parallel
!$acc end data
qv_mean = qv_mean / REAL(nx * ny * nz, KIND(qv_mean)) WRITE(*,"(A,I6,A,ES18.8)") "Step: ", ntstep, ", mean(qv) =", qv_mean
END SUBROUTINE write_output END MODULE m_io ! m_parametrizations.f90
MODULE m_parametrizations
IMPLICIT NONE REAL*, parameter :: cs1 = 1.0D-, cs2 = .02D0, cs3 = .2D0, cs4=.1D0, t0=.0D0
REAL*, parameter :: cm1 = 1.0D-, cm2=.0D0, cm3=.2D0, cm4=.0D0 CONTAINS
SUBROUTINE saturation_adjustment(npx, npy, nlev, t, qc, qv)
IMPLICIT NONE INTEGER, INTENT(IN) :: npx, npy, nlev
REAL*, INTENT(IN) :: t(:,:,:)
REAL*, INTENT(OUT) :: qc(:,:,:)
REAL*, INTENT(INOUT) :: qv(:,:,:)
INTEGER :: i, j, k !$acc data present(t,qv,qc)
!$acc parallel
!$acc loop gang vector collapse()
DO k = , nlev
DO j = , npy
DO i = , npx
qv(i,j,k) = qv(i,j,k) + cs1*EXP(cs2*( t(i,j,k) - t0 )/( t(i,j,k) - cs3) )
qc(i,j,k) = cs4 * qv(i,j,k)
END DO
END DO
END DO
!$acc end parallel
!$acc end data
END SUBROUTINE saturation_adjustment SUBROUTINE microphysics(npx, npy, nlev, t, qc, qv)
IMPLICIT NONE INTEGER, INTENT(IN) :: npx, npy, nlev
REAL*, INTENT(INOUT) :: t(:,:,:)
REAL*, INTENT(IN) :: qc(:,:,:)
REAL*, INTENT(INOUT) :: qv(:,:,:)
INTEGER :: i, j, k
!$acc data present(t,qv,qc)
!$acc parallel
!$acc loop seq
DO k = , nlev
!$acc loop gang
DO j = , npy
!$acc loop vector
DO i = , npx
qv(i, j, k) = qv(i,j,k-) + cm1*(t(i,j,k)-cm2)**cm3
t(i, j, k) = t(i, j, k)*( .0D0 - cm4*qc(i,j,k)+qv(i,j,k) )
END DO
END DO
END DO
!$acc end parallel
!$acc end data
END SUBROUTINE microphysics END MODULE m_parametrizations ! m_physics.f90
MODULE m_physics
USE m_config, ONLY: nx, ny, nz
USE m_fields, ONLY: qv, t
USE m_parametrizations, ONLY: saturation_adjustment, microphysics IMPLICIT NONE REAL*, ALLOCATABLE :: qc(:,:,:) ! 提前声明,由 init_physics 和 finalize_physics 来申请和释放 CONTAINS
SUBROUTINE physics()
IMPLICIT NONE CALL saturation_adjustment(nx, ny, nz, t, qc, qv)
CALL microphysics(nx, ny, nz, t, qc, qv)
END SUBROUTINE physics SUBROUTINE init_physics()
IMPLICIT NONE ALLOCATE( qc(nx,ny,nz) )
!$acc enter data create(qc)
END SUBROUTINE init_physics SUBROUTINE finalize_physics()
IMPLICIT NONE !$acc exit data delete(qc)
DEALLOCATE(qc)
END SUBROUTINE finalize_physics END MODULE m_physics ! m_setup.f90
MODULE m_setup
USE m_config, ONLY: nstop, nout, nx, ny, nz
USE m_fields, ONLY: t,qv
USE m_timing, ONLY: init_timers, start_timer, end_timer
USE m_physics, ONLY: init_physics, finalize_physics
IMPLICIT NONE CONTAINS
SUBROUTINE initialize() ! 初始化计时器和设备
IMPLICIT NONE INTEGER, PARAMETER :: itiminit =
INTEGER :: i, j, k #ifdef _OPENACC
WRITE(*,"(A)") "Running with OpenACC"
#else
WRITE(*,"(A)") "Running without OpenACC"
#endif WRITE(*,"(A)") "Initialize" CALL init_timers()
CALL start_timer( itiminit, "Initialization" )
ALLOCATE( t(nx,ny,nz), qv(nx,ny,nz) ) !$acc enter data create(t,qv)
DO k =, nz
DO j = , ny
DO i = , nx
t(i,j,k) = .0D0 * (.2D0 + .07D0 * COS(.2D0 * REAL(i+j+k) / REAL(nx+ny+nz)))
qv(i,j,k) = 1.0D- * (.1D0 + .13D0 * COS(.3D0 * REAL(i+j+k) / REAL(nx*ny*nz)))
END DO
END DO
END DO
!$acc update device(t,qv) #ifdef _OPENACC
CALL initialize_gpu()
#endif
CALL init_physics()
CALL end_timer( itiminit )
END SUBROUTINE initialize SUBROUTINE initialize_gpu()
IMPLICIT NONE INTEGER :: temp()
INTEGER :: i !$acc parallel loop
DO i = ,
temp(i) =
END DO IF (SUM(temp) == ) THEN
WRITE(*,"(A)") "GPU initialized"
ELSE
WRITE(*,"(A,I4)") "Error: Problem encountered initializing the GPU"
STOP
END IF
END SUBROUTINE initialize_gpu SUBROUTINE cleanup()
IMPLICIT NONE !$acc exit data delete(t,qv)
DEALLOCATE( t, qv )
CALL finalize_physics()
END SUBROUTINE cleanup END MODULE m_setup
● 所有的输出结果。单独编译一个模式(而不使用默认的 makefile)时,在命令 pgf90 中要使用参数 -Mpreprocess,意思是将预编译器作用到 fortran 文件中,否则 m_setup.f90 中的 # 预编译命令会被当成错误
cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ make example_serial example_openmp example_openacc1 example_openacc2 example_openacc3 example_openacc4
make[]: Entering directory '/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13/example_serial'
compiling m_config.f90
compiling m_fields.f90
compiling m_io.f90
compiling m_parametrizations.f90
compiling m_physics.f90
compiling m_timing.f90
compiling m_setup.f90
compiling main.f90
make[]: Leaving directory '/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13/example_serial'
make[]: Entering directory '/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13/example_openmp' ... ! 类似上面的过程 make[]: Leaving directory '/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13/example_openacc4'
cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_serial/example_serial
Running without OpenACC
Initialize
Start of time loop
Step: , mean(qv) = 1.14302104E-04
Step: , mean(qv) = 1.34041461E-04
Step: , mean(qv) = 1.53710207E-04
Step: , mean(qv) = 1.73309068E-04
Step: , mean(qv) = 1.92838848E-04
End of time loop
----------------------------
Timers:
----------------------------
Initialization : 17.28 ms
Time loop : 978.08 ms
----------------------------
cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openmp/example_openmp
Running without OpenACC
Running with OpenMP with threads
Initialize
Start of time loop
Step: , mean(qv) = 1.14302104E-04
Step: , mean(qv) = 1.34041461E-04
Step: , mean(qv) = 1.53710207E-04
Step: , mean(qv) = 1.73309068E-04
Step: , mean(qv) = 1.92838848E-04
End of time loop
----------------------------
Timers:
----------------------------
Initialization : 17.96 ms
Time loop : 898.92 ms
----------------------------
cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openacc1/example_openacc1
Running with OpenACC
Initialize
GPU initialized
Start of time loop
Step: , mean(qv) = 1.14302104E-04
Step: , mean(qv) = 1.34041461E-04
Step: , mean(qv) = 1.53710207E-04
Step: , mean(qv) = 1.73309068E-04
Step: , mean(qv) = 1.92838848E-04
End of time loop
----------------------------
Timers:
----------------------------
Initialization : 191.11 ms
Time loop : 1044.35 ms
----------------------------
cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openacc2/example_openacc2
Running with OpenACC
Initialize
GPU initialized
Start of time loop
Step: , mean(qv) = 1.14302104E-04
Step: , mean(qv) = 1.34041461E-04
Step: , mean(qv) = 1.53710207E-04
Step: , mean(qv) = 1.73309068E-04
Step: , mean(qv) = 1.92838848E-04
End of time loop
----------------------------
Timers:
----------------------------
Initialization : 176.72 ms
Time loop : 142.11 ms
----------------------------
cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openacc3/example_openacc3
Running with OpenACC
Initialize
GPU initialized
Start of time loop
Step: , mean(qv) = 1.14302104E-04
Step: , mean(qv) = 1.34041461E-04
Step: , mean(qv) = 1.53710207E-04
Step: , mean(qv) = 1.73309068E-04
Step: , mean(qv) = 1.92838848E-04
End of time loop
----------------------------
Timers:
----------------------------
Initialization : 162.15 ms
Time loop : 121.77 ms
----------------------------
cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openacc4/example_openacc4
Running with OpenACC
Initialize
GPU initialized
Start of time loop
Step: , mean(qv) = 1.14302104E-04
Step: , mean(qv) = 1.34041461E-04
Step: , mean(qv) = 1.53710207E-04
Step: , mean(qv) = 1.73309068E-04
Step: , mean(qv) = 1.92838848E-04
End of time loop
----------------------------
Timers:
----------------------------
Initialization : 152.47 ms
Time loop : 166.53 ms
----------------------------
● 所有的结果在 nvprof 中的图形。三张图分别为 “仅计算优化无数据优化”,“计算优化与数据优化”,“手工优化变量”



OpenACC 云水参数化方案的更多相关文章
- Linux云主机 监控方案浅析
1.为何需要监控 监控是运维工程师的眼睛,它可帮助运维工程师第一时间发现系统的问题. 对于服务器的整个生命周期,都要和监控打交道: 当有服务器上架,都需要加入比如CPU负载.内存.网络.磁盘等基础监控 ...
- 开源网站云查杀方案,搭建自己的云杀毒-搭建ClamAV服务器
开源网站云查杀方案,搭建自己的云杀毒 搭建ClamAV服务器 1 前言: 在上一篇我们已经演示了整个方案,传送门<开源网站云查杀方案,搭建自己的云杀毒>:https://ww ...
- Kube-OVN:大型银行技术团队推荐的金融级云原生网络方案
近日,由TWT社区主办的2021容器云职业技能大赛团队赛的冠军作品:<适用于大中型银行的云原生技术体系建设方案>中,Kube-OVN成为银行技术团队推荐的金融级云原生网络最佳实践.本文部分 ...
- xx云网络实施方案案例
由于xx云在我公司进行试用,对接我方存储,于是乎就负责网络实施方案,下面是具体方案介绍 ip分配 具体网络拓扑如下: 下面是两台交换机配置 IPMI交换机配置如下: [Quidway]di cu !S ...
- 理解 OpenStack 高可用(HA)(1):OpenStack 高可用和灾备方案 [OpenStack HA and DR]
本系列会分析OpenStack 的高可用性(HA)概念和解决方案: (1)OpenStack 高可用方案概述 (2)Neutron L3 Agent HA - VRRP (虚拟路由冗余协议) (3)N ...
- 实现跨云应用——基于DNS的负载均衡
“公有云可以作为传统IT资源的延展,能帮助客户应对不断变化的需求”——这是我们在向客户介绍公有云产品时经常说的一句话.我们来看一个具体的需求: 某客户有一个web站点,部署在自有的数据中心(on-pr ...
- HP PCS 云监控大数据解决方案
——把数据从分散统一集中到数据中心 基于HP分布式并行计算/存储技术构建的云监控系统即是通过“云高清摄像机”及IaaS和PaaS监控系统平台,根据用户所需(SaaS)将多路监控数据流传送给“云端”,除 ...
- OpenStack 虚拟机监控方案确定
Contents [hide] 1 监控方案调研过程 1.1 1. 虚拟机里内置监控模块 1.2 2. 通过libvirt获取虚拟机数据监控. 2 a.测试openstack的自待组件ceilomet ...
- 免费ERP之云实施
近日,普实渠道在AIO5软件免费一周年之际,推出了重磅的动作:启动AIO5云实施,推广小微企业免费ERP落地应用. 这无疑是推动客户免费应用ERP的重大里程碑. 当前,在中国小微企业信息化方面,应用情 ...
随机推荐
- Java8函数式编程学习笔记(初探)
编程语言的整个目的就在于操作值,要是按照历史上编程语言的传统,这些值被成为一等值,而编程语言中的其他结构也许有助于表示值的结构,但在程序执行期间不能传递,因此为二等值,比如方法和类等则是二等值,类可以 ...
- 洛谷 1192:台阶问题(递推,DP)
题目描述 有 N 级的台阶,你一开始在底部,每次可以向上迈最多 K 级台阶(最少 1 级),问到达第 N 级台阶有多少种不同方式. 输入输出格式 输入格式: 两个正整数N,K. 输出格式: 一个正整数 ...
- HTML5和CSS3阶段,我是如何学习的?
经过一个月的学习,我收获了许多,今天的测验是做一个企业中文网站,令我自己感到吃惊的是,我前前后后用了4个小时就完成了,这在一个月前根本不可能,因为对布局属性的理解还不够深刻,常常会在调试中浪费大量时间 ...
- ITelephony.aidl
在src下先建立包名为com.android.internal.telephony(右键src > new > package,create package-info.java打钩),然后 ...
- 使用pip install XX 命令时报错
在使用pip命令安装的时候,我遇到这样的报错: C:\Users\86962>pip install Appium-Python-Client Collecting Appium-Python- ...
- window.location.href跳转问题2
"window.location.href"."location.href"是本页面跳转 "parent.location.href"是上一 ...
- Django 数据库操作进阶F和Q操作
Model中的F F 的操作通常的应用场景在于:公司对于每个员工,都涨500的工资.这个时候F就可以作为查询条件 1 2 3 from django.db.models import F models ...
- Linux内核中的机制学习总结
一.驱动中的poll机制 1.简介:select()和poll()系统调用的本质一样,前者在 BSD UNIX 中引入的,后者在 System V 中引入的. 应用程序使用 select() 或 po ...
- 用vmware安装gho文件心得
在卡饭学到了不少知识,下面是我的一个心得分享,希望大家能用的上. 用vmware安装gho文件心得 方法1:diskgenius+ghostexp用vm新建一个空白硬盘虚拟机, 记住虚拟机文件的存储位 ...
- bat删除系统默认共享
在我们的系统中,有很多默认的共享是开启的,可以设置一个bat文件在每次开机的时候把共享删除. net share c$ /del net share d$ /del net share e$ /del ...