OpenACC 云水参数化方案
▶ 书上第十三章,用一系列步骤优化一个云水参数化方案。用于熟悉 Fortran 以及 OpenACC 在旗下的表现
● 代码,文件较多,放在一起了
- ! main.f90
- PROGRAM main
- USE m_config, ONLY: nstop
- USE m_physics, ONLY: physics
- USE m_io, ONLY: write_output
- USE m_setup, ONLY: initialize, cleanup
- USE m_timing, ONLY: start_timer, end_timer, print_timers
- IMPLICIT NONE
- INTEGER :: ntstep
- INTEGER, parameter :: itimloop =
- CALL initialize() ! 初始化计时器和设备
- WRITE(*,"(A)") "Start of time loop"
- CALL start_timer(itimloop, "Time loop")
- DO ntstep = , nstop ! 计算
- CALL physics()
- CALL write_output( ntstep )
- END DO
- CALL end_timer( itimloop )
- WRITE(*,"(A)") "End of time loop"
- CALL print_timers()
- CALL cleanup()
- END PROGRAM main
- ! m_config.f90,运行参数
- MODULE m_config
- INTEGER, parameter :: nx = ! 经度网格数
- INTEGER, parameter :: ny = ! 纬度网格数
- INTEGER, parameter :: nz = ! 海拔网格数
- INTEGER, parameter :: nstop = ! 时间步数
- INTEGER, parameter :: nout = ! 输出间隔
- END MODULE m_config
- ! m_fields.f90,场参数
- MODULE m_fields
- REAL*, ALLOCATABLE :: qv(:,:,:) ! 水蒸汽含量
- REAL*, ALLOCATABLE :: t(:,:,:) ! 温度
- END MODULE m_fields
- ! m_io.f90,输入输出函数
- MODULE m_io
- USE m_config, ONLY: nout, nx, ny, nz
- USE m_fields, ONLY: qv
- IMPLICIT NONE
- CONTAINS
- SUBROUTINE write_output(ntstep)
- IMPLICIT NONE
- INTEGER, INTENT(IN) :: ntstep ! 当前时间片
- INTEGER :: i, j, k
- REAL* :: qv_mean ! 水蒸汽含量平均值(标量)
- IF (MOD(ntstep, nout) /= ) RETURN ! 当前时间片不作输出
- qv_mean = .0D0 ! 计算均值并输出
- DO k = , nz
- DO j = , ny
- DO i = , nx
- qv_mean = qv_mean + qv(i,j,k)
- END DO
- END DO
- END DO
- qv_mean = qv_mean / REAL(nx * ny * nz, KIND(qv_mean))
- WRITE(*,"(A,I6,A,ES18.8)") "Step: ", ntstep, ", mean(qv) =", qv_mean
- END SUBROUTINE write_output
- END MODULE m_io
- ! m_parametrizations.f90,参数化方案
- MODULE m_parametrizations
- IMPLICIT NONE
- REAL*, parameter :: cs1 = 1.0D-, cs2 = .02D0, cs3 = .2D0, cs4=.1D0, t0=.0D0
- REAL*, parameter :: cm1 = 1.0D-, cm2=.0D0, cm3=.2D0, cm4=.0D0
- CONTAINS
- SUBROUTINE saturation_adjustment(npx, npy, nlev, t, qc, qv) ! 参数化方案一
- IMPLICIT NONE
- INTEGER, INTENT(IN) :: npx, npy, nlev ! 输入维度
- REAL*, INTENT(IN) :: t(:,:,:) ! 温度
- REAL*, INTENT(OUT) :: qc(:,:,:) ! 云水含量
- REAL*, INTENT(INOUT) :: qv(:,:,:) ! 水蒸汽含量
- INTEGER :: i, j, k
- DO k = , nlev
- DO j = , npy
- DO i = , npx
- qv(i,j,k) = qv(i,j,k) + cs1*EXP(cs2*( t(i,j,k) - t0 )/( t(i,j,k) - cs3) )
- qc(i,j,k) = cs4 * qv(i,j,k)
- END DO
- END DO
- END DO
- END SUBROUTINE saturation_adjustment
- SUBROUTINE microphysics(npx, npy, nlev, t, qc, qv) ! 参数化方案二
- IMPLICIT NONE
- INTEGER, INTENT(IN) :: npx, npy, nlev
- REAL*, INTENT(INOUT) :: t(:,:,:)
- REAL*, INTENT(IN) :: qc(:,:,:)
- REAL*, INTENT(INOUT) :: qv(:,:,:)
- INTEGER :: i, j, k
- DO k = , nlev
- DO j = , npy
- DO i = , npx
- qv(i, j, k) = qv(i,j,k-) + cm1*(t(i,j,k)-cm2)**cm3
- t(i, j, k) = t(i, j, k)*( .0D0 - cm4*qc(i,j,k)+qv(i,j,k) )
- END DO
- END DO
- END DO
- END SUBROUTINE microphysics
- END MODULE m_parametrizations
- ! m_physics.f90,参数化方案的执行
- MODULE m_physics
- USE m_config, ONLY: nx, ny, nz
- USE m_fields, ONLY: qv, t
- USE m_parametrizations, ONLY: saturation_adjustment, microphysics
- IMPLICIT NONE
- CONTAINS
- SUBROUTINE physics()
- IMPLICIT NONE
- REAL* :: qc(nx,ny,nz) ! 云水含量临时变量
- CALL saturation_adjustment(nx, ny, nz, t, qc, qv) ! 第一物理参数化
- CALL microphysics(nx, ny, nz, t, qc, qv) ! 第二物理参数化
- END SUBROUTINE physics
- END MODULE m_physics
- ! m_timming.f90,计时器
- MODULE m_timing
- IMPLICIT NONE
- INTEGER, PARAMETER :: ntimer= ! 计时器数量
- REAL* :: rtimer(ntimer) ! 计时器
- CHARACTER() :: timertag(ntimer) ! 计时器标签
- INTEGER :: icountold(ntimer), & ! tick (start of timer section)
- icountrate, & ! countrate of SYSTEM_CLOCK()
- icountmax ! maximum counter value of SYSTEM_CLOCK()
- CONTAINS
- SUBROUTINE init_timers() ! 初始化计时器
- IMPLICIT NONE
- rtimer(:) = .0D0
- timertag(:) = ""
- icountold(:) =
- CALL SYSTEM_CLOCK( COUNT_RATE=icountrate, COUNT_MAX=icountmax )
- END SUBROUTINE init_timers
- SUBROUTINE start_timer(id, tag) ! 开始计时
- IMPLICIT NONE
- INTEGER, INTENT(IN) :: id
- CHARACTER(*), INTENT(IN) :: tag
- IF (id < .OR. id > ntimer) THEN ! 检查计时器编号范围
- WRITE(*,"(A,I4,A,I4)") "Error: timer id=", id, "exceeds maximum timer number", ntimer
- STOP
- END IF
- IF (LEN_TRIM(timertag(id)) /= ) THEN ! 检查计时器是否已经开始运行
- WRITE(*,"(A,I4)") "Error: timer already started previously, id:", id
- STOP
- END IF
- IF (LEN_TRIM(tag) == ) THEN ! 检查计时器标签是否非空
- WRITE(*,"(A,I4)") "Error: empty tag provided, id:", id
- STOP
- END IF
- timertag(id) = TRIM(tag) ! 保存标签
- !$acc wait
- CALL SYSTEM_CLOCK( COUNT=icountold(id) ) ! 开始计时
- END SUBROUTINE start_timer
- SUBROUTINE end_timer(id) ! 结束计时
- IMPLICIT NONE
- INTEGER, INTENT(IN) :: id
- INTEGER :: icountnew
- IF (id < .OR. id > ntimer) THEN ! 检查计时器编号范围
- WRITE(*,"(A,I4,A,I4)") "Error: timer id=", id, "exceed max timer number", ntimer
- STOP
- END IF
- IF (LEN_TRIM(timertag(id)) == ) THEN ! 检查计时器是否已经开始运行
- WRITE(*,"(A,I4)") "Error: Need to call start_timer before end_timing, id:", id
- STOP
- END IF
- !$acc wait
- CALL SYSTEM_CLOCK( COUNT=icountnew ) ! 获取当前时间,计算耗时
- rtimer(id) = ( REAL(icountnew - icountold(id), KIND(rtimer(id))) ) / REAL(icountrate, KIND(rtimer(id)))
- END SUBROUTINE end_timer
- SUBROUTINE print_timers() ! 打印计时
- IMPLICIT NONE
- INTEGER :: id
- WRITE(*,"(A)") "----------------------------"
- WRITE(*,"(A)") "Timers:"
- WRITE(*,"(A)") "----------------------------"
- DO id = , ntimer
- IF ( rtimer(id) > .0D0 ) THEN
- WRITE(*,"(A15,A2,F8.2,A)") timertag(id), ": ", rtimer(id)*.0D3, " ms"
- END IF
- END DO
- WRITE(*,"(A)") "----------------------------"
- END SUBROUTINE print_timers
- END MODULE m_timing
- ! m_setup.f90,初始化和清理
- MODULE m_setup
- USE m_config, ONLY: nstop, nout, nx, ny, nz
- USE m_fields, ONLY: t,qv
- USE m_timing, ONLY: init_timers, start_timer, end_timer
- IMPLICIT NONE
- CONTAINS
- SUBROUTINE initialize() ! 初始化计时器和设备
- IMPLICIT NONE
- INTEGER, PARAMETER :: itiminit = ! 计时器编号
- INTEGER :: i, j, k
- #ifdef _OPENACC
- WRITE(*,"(A)") "Running with OpenACC"
- #else
- WRITE(*,"(A)") "Running without OpenACC"
- #endif
- WRITE(*,"(A)") "Initialize"
- CALL init_timers()
- CALL start_timer( itiminit, "Initialization" )
- ALLOCATE( t(nx,ny,nz), qv(nx,ny,nz) )
- DO k =, nz
- DO j = , ny
- DO i = , nx
- t(i,j,k) = .0D0 * (.2D0 + .07D0 * COS(.2D0 * REAL(i+j+k) / REAL(nx+ny+nz)))
- qv(i,j,k) = 1.0D- * (.1D0 + .13D0 * COS(.3D0 * REAL(i+j+k) / REAL(nx*ny*nz)))
- END DO
- END DO
- END DO
- #ifdef _OPENACC
- CALL initialize_gpu()
- #endif
- CALL end_timer( itiminit )
- END SUBROUTINE initialize
- SUBROUTINE initialize_gpu()! 让 GPU 跑一个小内核来初始化
- IMPLICIT NONE
- INTEGER :: temp()
- INTEGER :: i
- !$acc parallel loop
- DO i = ,
- temp(i) =
- END DO
- IF (SUM(temp) == ) THEN
- WRITE(*,"(A)") "GPU initialized"
- ELSE
- WRITE(*,"(A,I4)") "Error: Problem encountered initializing the GPU"
- STOP
- END IF
- END SUBROUTINE initialize_gpu
- SUBROUTINE cleanup()! 清扫 t 和 qv 的内存
- IMPLICIT NONE
- DEALLOCATE( t, qv )
- END SUBROUTINE cleanup
- END MODULE m_setup
● OpenMP 优化,改了 m_io.f90,m_parametrizations.f90,m_setup.f90
- ! m_io.f90
- MODULE m_io
- USE m_config, ONLY: nout, nx, ny, nz
- USE m_fields, ONLY: qv
- IMPLICIT NONE
- CONTAINS
- SUBROUTINE write_output(ntstep)
- IMPLICIT NONE
- INTEGER, INTENT(IN) :: ntstep
- INTEGER :: i, j, k
- REAL* :: qv_mean
- IF (MOD(ntstep, nout) /= ) RETURN
- qv_mean = .0D0
- DO k = , nz
- !$OMP PARALLEL DO PRIVATE(i,j) SHARED(k,qv) REDUCTION(+:qv_mean)
- DO j = , ny
- DO i = , nx
- qv_mean = qv_mean + qv(i,j,k)
- END DO
- END DO
- END DO
- qv_mean = qv_mean / REAL(nx * ny * nz, KIND(qv_mean))
- WRITE(*,"(A,I6,A,ES18.8)") "Step: ", ntstep, ", mean(qv) =", qv_mean
- END SUBROUTINE write_output
- END MODULE m_io
- ! m_parametrizations.f90
- MODULE m_parametrizations
- IMPLICIT NONE
- REAL*, parameter :: cs1 = 1.0D-, cs2 = .02D0, cs3 = .2D0, cs4=.1D0, t0=.0D0
- REAL*, parameter :: cm1 = 1.0D-, cm2=.0D0, cm3=.2D0, cm4=.0D0
- CONTAINS
- SUBROUTINE saturation_adjustment(npx, npy, nlev, t, qc, qv)
- IMPLICIT NONE
- INTEGER, INTENT(IN) :: npx, npy, nlev
- REAL*, INTENT(IN) :: t(:,:,:)
- REAL*, INTENT(OUT) :: qc(:,:,:)
- REAL*, INTENT(INOUT) :: qv(:,:,:)
- INTEGER :: i, j, k
- !$OMP PARALLEL
- DO k = , nlev
- !$OMP DO PRIVATE(i,j)
- DO j = , npy
- DO i = , npx
- qv(i,j,k) = qv(i,j,k) + cs1*EXP(cs2*( t(i,j,k) - t0 )/( t(i,j,k) - cs3) )
- qc(i,j,k) = cs4 * qv(i,j,k)
- END DO
- END DO
- END DO
- !$OMP END PARALLEL
- END SUBROUTINE saturation_adjustment
- SUBROUTINE microphysics(npx, npy, nlev, t, qc, qv)
- IMPLICIT NONE
- INTEGER, INTENT(IN) :: npx, npy, nlev
- REAL*, INTENT(INOUT) :: t(:,:,:)
- REAL*, INTENT(IN) :: qc(:,:,:)
- REAL*, INTENT(INOUT) :: qv(:,:,:)
- INTEGER :: i, j, k
- !$OMP PARALLEL
- DO k = , nlev
- !$OMP DO PRIVATE(i,j)
- DO j = , npy
- DO i = , npx
- qv(i, j, k) = qv(i,j,k-) + cm1*(t(i,j,k)-cm2)**cm3
- t(i, j, k) = t(i, j, k)*( .0D0 - cm4*qc(i,j,k)+qv(i,j,k) )
- END DO
- END DO
- END DO
- !$OMP END PARALLEL
- END SUBROUTINE microphysics
- END MODULE m_parametrizations
- ! m_setup.f90
- MODULE m_setup
- USE m_config, ONLY: nstop, nout, nx, ny, nz
- USE m_fields, ONLY: t,qv
- USE m_timing, ONLY: init_timers, start_timer, end_timer
- IMPLICIT NONE
- CONTAINS
- SUBROUTINE initialize() ! 初始化计时器和设备
- IMPLICIT NONE
- INTEGER, PARAMETER :: itiminit = ! timer ID
- INTEGER :: i, j, k ! loop indices
- INTEGER :: OMP_GET_NUM_THREADS, OMP_GET_THREAD_NUM
- #ifdef _OPENACC
- WRITE(*,"(A)") "Running with OpenACC"
- #else
- WRITE(*,"(A)") "Running without OpenACC"
- #ifdef _OPENMP
- !$OMP PARALLEL
- IF (OMP_GET_THREAD_NUM()==) THEN
- WRITE(*,"(A,I4,A)") "Running with OpenMP with ", OMP_GET_NUM_THREADS(), " threads"
- END IF
- !$OMP END PARALLEL
- #endif
- #endif
- WRITE(*,"(A)") "Initialize"
- CALL init_timers()
- CALL start_timer( itiminit, "Initialization" )
- ALLOCATE( t(nx,ny,nz), qv(nx,ny,nz) )
- DO k =, nz
- DO j = , ny
- DO i = , nx
- t(i,j,k) = .0D0 * (.2D0 + .07D0 * COS(.2D0 * REAL(i+j+k) / REAL(nx+ny+nz)))
- qv(i,j,k) = 1.0D- * (.1D0 + .13D0 * COS(.3D0 * REAL(i+j+k) / REAL(nx*ny*nz)))
- END DO
- END DO
- END DO
- #ifdef _OPENACC
- CALL initialize_gpu()
- #endif
- CALL end_timer( itiminit )
- END SUBROUTINE initialize
- SUBROUTINE initialize_gpu()
- IMPLICIT NONE
- INTEGER :: temp()
- INTEGER :: i
- !$acc parallel loop
- DO i = ,
- temp(i) =
- END DO
- IF (SUM(temp) == ) THEN
- WRITE(*,"(A)") "GPU initialized"
- ELSE
- WRITE(*,"(A,I4)") "Error: Problem encountered initializing the GPU"
- STOP
- END IF
- END SUBROUTINE initialize_gpu
- SUBROUTINE cleanup()
- IMPLICIT NONE
- DEALLOCATE( t, qv )
- END SUBROUTINE cleanup
- END MODULE m_setup
● OpenACC 优化,改了 m_io.f90,m_parametrizations.f90,m_physics.f90,m_setup.f90。树上的优化 04 设计算法改动,没有参与比较
- ! m_io.f90
- MODULE m_io
- USE m_config, ONLY: nout, nx, ny, nz
- USE m_fields, ONLY: qv
- IMPLICIT NONE
- CONTAINS
- SUBROUTINE write_output(ntstep)
- IMPLICIT NONE
- INTEGER, INTENT(IN) :: ntstep
- INTEGER :: i, j, k
- REAL* :: qv_mean
- IF (MOD(ntstep, nout) /= ) RETURN
- !$acc data present(qv)
- qv_mean = .0D0
- !$acc parallel
- !$acc loop gang vector collapse() reduction(+:qv_mean)
- DO k = , nz
- DO j = , ny
- DO i = , nx
- qv_mean = qv_mean + qv(i,j,k)
- END DO
- END DO
- END DO
- !$acc end parallel
- !$acc end data
- qv_mean = qv_mean / REAL(nx * ny * nz, KIND(qv_mean))
- WRITE(*,"(A,I6,A,ES18.8)") "Step: ", ntstep, ", mean(qv) =", qv_mean
- END SUBROUTINE write_output
- END MODULE m_io
- ! m_parametrizations.f90
- MODULE m_parametrizations
- IMPLICIT NONE
- REAL*, parameter :: cs1 = 1.0D-, cs2 = .02D0, cs3 = .2D0, cs4=.1D0, t0=.0D0
- REAL*, parameter :: cm1 = 1.0D-, cm2=.0D0, cm3=.2D0, cm4=.0D0
- CONTAINS
- SUBROUTINE saturation_adjustment(npx, npy, nlev, t, qc, qv)
- IMPLICIT NONE
- INTEGER, INTENT(IN) :: npx, npy, nlev
- REAL*, INTENT(IN) :: t(:,:,:)
- REAL*, INTENT(OUT) :: qc(:,:,:)
- REAL*, INTENT(INOUT) :: qv(:,:,:)
- INTEGER :: i, j, k
- !$acc data present(t,qv,qc)
- !$acc parallel
- !$acc loop gang vector collapse()
- DO k = , nlev
- DO j = , npy
- DO i = , npx
- qv(i,j,k) = qv(i,j,k) + cs1*EXP(cs2*( t(i,j,k) - t0 )/( t(i,j,k) - cs3) )
- qc(i,j,k) = cs4 * qv(i,j,k)
- END DO
- END DO
- END DO
- !$acc end parallel
- !$acc end data
- END SUBROUTINE saturation_adjustment
- SUBROUTINE microphysics(npx, npy, nlev, t, qc, qv)
- IMPLICIT NONE
- INTEGER, INTENT(IN) :: npx, npy, nlev
- REAL*, INTENT(INOUT) :: t(:,:,:)
- REAL*, INTENT(IN) :: qc(:,:,:)
- REAL*, INTENT(INOUT) :: qv(:,:,:)
- INTEGER :: i, j, k
- !$acc data present(t,qv,qc)
- !$acc parallel
- !$acc loop seq
- DO k = , nlev
- !$acc loop gang
- DO j = , npy
- !$acc loop vector
- DO i = , npx
- qv(i, j, k) = qv(i,j,k-) + cm1*(t(i,j,k)-cm2)**cm3
- t(i, j, k) = t(i, j, k)*( .0D0 - cm4*qc(i,j,k)+qv(i,j,k) )
- END DO
- END DO
- END DO
- !$acc end parallel
- !$acc end data
- END SUBROUTINE microphysics
- END MODULE m_parametrizations
- ! m_physics.f90
- MODULE m_physics
- USE m_config, ONLY: nx, ny, nz
- USE m_fields, ONLY: qv, t
- USE m_parametrizations, ONLY: saturation_adjustment, microphysics
- IMPLICIT NONE
- REAL*, ALLOCATABLE :: qc(:,:,:) ! 提前声明,由 init_physics 和 finalize_physics 来申请和释放
- CONTAINS
- SUBROUTINE physics()
- IMPLICIT NONE
- CALL saturation_adjustment(nx, ny, nz, t, qc, qv)
- CALL microphysics(nx, ny, nz, t, qc, qv)
- END SUBROUTINE physics
- SUBROUTINE init_physics()
- IMPLICIT NONE
- ALLOCATE( qc(nx,ny,nz) )
- !$acc enter data create(qc)
- END SUBROUTINE init_physics
- SUBROUTINE finalize_physics()
- IMPLICIT NONE
- !$acc exit data delete(qc)
- DEALLOCATE(qc)
- END SUBROUTINE finalize_physics
- END MODULE m_physics
- ! m_setup.f90
- MODULE m_setup
- USE m_config, ONLY: nstop, nout, nx, ny, nz
- USE m_fields, ONLY: t,qv
- USE m_timing, ONLY: init_timers, start_timer, end_timer
- USE m_physics, ONLY: init_physics, finalize_physics
- IMPLICIT NONE
- CONTAINS
- SUBROUTINE initialize() ! 初始化计时器和设备
- IMPLICIT NONE
- INTEGER, PARAMETER :: itiminit =
- INTEGER :: i, j, k
- #ifdef _OPENACC
- WRITE(*,"(A)") "Running with OpenACC"
- #else
- WRITE(*,"(A)") "Running without OpenACC"
- #endif
- WRITE(*,"(A)") "Initialize"
- CALL init_timers()
- CALL start_timer( itiminit, "Initialization" )
- ALLOCATE( t(nx,ny,nz), qv(nx,ny,nz) )
- !$acc enter data create(t,qv)
- DO k =, nz
- DO j = , ny
- DO i = , nx
- t(i,j,k) = .0D0 * (.2D0 + .07D0 * COS(.2D0 * REAL(i+j+k) / REAL(nx+ny+nz)))
- qv(i,j,k) = 1.0D- * (.1D0 + .13D0 * COS(.3D0 * REAL(i+j+k) / REAL(nx*ny*nz)))
- END DO
- END DO
- END DO
- !$acc update device(t,qv)
- #ifdef _OPENACC
- CALL initialize_gpu()
- #endif
- CALL init_physics()
- CALL end_timer( itiminit )
- END SUBROUTINE initialize
- SUBROUTINE initialize_gpu()
- IMPLICIT NONE
- INTEGER :: temp()
- INTEGER :: i
- !$acc parallel loop
- DO i = ,
- temp(i) =
- END DO
- IF (SUM(temp) == ) THEN
- WRITE(*,"(A)") "GPU initialized"
- ELSE
- WRITE(*,"(A,I4)") "Error: Problem encountered initializing the GPU"
- STOP
- END IF
- END SUBROUTINE initialize_gpu
- SUBROUTINE cleanup()
- IMPLICIT NONE
- !$acc exit data delete(t,qv)
- DEALLOCATE( t, qv )
- CALL finalize_physics()
- END SUBROUTINE cleanup
- END MODULE m_setup
● 所有的输出结果。单独编译一个模式(而不使用默认的 makefile)时,在命令 pgf90 中要使用参数 -Mpreprocess,意思是将预编译器作用到 fortran 文件中,否则 m_setup.f90 中的 # 预编译命令会被当成错误
- cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ make example_serial example_openmp example_openacc1 example_openacc2 example_openacc3 example_openacc4
- make[]: Entering directory '/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13/example_serial'
- compiling m_config.f90
- compiling m_fields.f90
- compiling m_io.f90
- compiling m_parametrizations.f90
- compiling m_physics.f90
- compiling m_timing.f90
- compiling m_setup.f90
- compiling main.f90
- make[]: Leaving directory '/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13/example_serial'
- make[]: Entering directory '/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13/example_openmp'
- ... ! 类似上面的过程
- make[]: Leaving directory '/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13/example_openacc4'
- cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_serial/example_serial
- Running without OpenACC
- Initialize
- Start of time loop
- Step: , mean(qv) = 1.14302104E-04
- Step: , mean(qv) = 1.34041461E-04
- Step: , mean(qv) = 1.53710207E-04
- Step: , mean(qv) = 1.73309068E-04
- Step: , mean(qv) = 1.92838848E-04
- End of time loop
- ----------------------------
- Timers:
- ----------------------------
- Initialization : 17.28 ms
- Time loop : 978.08 ms
- ----------------------------
- cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openmp/example_openmp
- Running without OpenACC
- Running with OpenMP with threads
- Initialize
- Start of time loop
- Step: , mean(qv) = 1.14302104E-04
- Step: , mean(qv) = 1.34041461E-04
- Step: , mean(qv) = 1.53710207E-04
- Step: , mean(qv) = 1.73309068E-04
- Step: , mean(qv) = 1.92838848E-04
- End of time loop
- ----------------------------
- Timers:
- ----------------------------
- Initialization : 17.96 ms
- Time loop : 898.92 ms
- ----------------------------
- cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openacc1/example_openacc1
- Running with OpenACC
- Initialize
- GPU initialized
- Start of time loop
- Step: , mean(qv) = 1.14302104E-04
- Step: , mean(qv) = 1.34041461E-04
- Step: , mean(qv) = 1.53710207E-04
- Step: , mean(qv) = 1.73309068E-04
- Step: , mean(qv) = 1.92838848E-04
- End of time loop
- ----------------------------
- Timers:
- ----------------------------
- Initialization : 191.11 ms
- Time loop : 1044.35 ms
- ----------------------------
- cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openacc2/example_openacc2
- Running with OpenACC
- Initialize
- GPU initialized
- Start of time loop
- Step: , mean(qv) = 1.14302104E-04
- Step: , mean(qv) = 1.34041461E-04
- Step: , mean(qv) = 1.53710207E-04
- Step: , mean(qv) = 1.73309068E-04
- Step: , mean(qv) = 1.92838848E-04
- End of time loop
- ----------------------------
- Timers:
- ----------------------------
- Initialization : 176.72 ms
- Time loop : 142.11 ms
- ----------------------------
- cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openacc3/example_openacc3
- Running with OpenACC
- Initialize
- GPU initialized
- Start of time loop
- Step: , mean(qv) = 1.14302104E-04
- Step: , mean(qv) = 1.34041461E-04
- Step: , mean(qv) = 1.53710207E-04
- Step: , mean(qv) = 1.73309068E-04
- Step: , mean(qv) = 1.92838848E-04
- End of time loop
- ----------------------------
- Timers:
- ----------------------------
- Initialization : 162.15 ms
- Time loop : 121.77 ms
- ----------------------------
- cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openacc4/example_openacc4
- Running with OpenACC
- Initialize
- GPU initialized
- Start of time loop
- Step: , mean(qv) = 1.14302104E-04
- Step: , mean(qv) = 1.34041461E-04
- Step: , mean(qv) = 1.53710207E-04
- Step: , mean(qv) = 1.73309068E-04
- Step: , mean(qv) = 1.92838848E-04
- End of time loop
- ----------------------------
- Timers:
- ----------------------------
- Initialization : 152.47 ms
- Time loop : 166.53 ms
- ----------------------------
● 所有的结果在 nvprof 中的图形。三张图分别为 “仅计算优化无数据优化”,“计算优化与数据优化”,“手工优化变量”
OpenACC 云水参数化方案的更多相关文章
- Linux云主机 监控方案浅析
1.为何需要监控 监控是运维工程师的眼睛,它可帮助运维工程师第一时间发现系统的问题. 对于服务器的整个生命周期,都要和监控打交道: 当有服务器上架,都需要加入比如CPU负载.内存.网络.磁盘等基础监控 ...
- 开源网站云查杀方案,搭建自己的云杀毒-搭建ClamAV服务器
开源网站云查杀方案,搭建自己的云杀毒 搭建ClamAV服务器 1 前言: 在上一篇我们已经演示了整个方案,传送门<开源网站云查杀方案,搭建自己的云杀毒>:https://ww ...
- Kube-OVN:大型银行技术团队推荐的金融级云原生网络方案
近日,由TWT社区主办的2021容器云职业技能大赛团队赛的冠军作品:<适用于大中型银行的云原生技术体系建设方案>中,Kube-OVN成为银行技术团队推荐的金融级云原生网络最佳实践.本文部分 ...
- xx云网络实施方案案例
由于xx云在我公司进行试用,对接我方存储,于是乎就负责网络实施方案,下面是具体方案介绍 ip分配 具体网络拓扑如下: 下面是两台交换机配置 IPMI交换机配置如下: [Quidway]di cu !S ...
- 理解 OpenStack 高可用(HA)(1):OpenStack 高可用和灾备方案 [OpenStack HA and DR]
本系列会分析OpenStack 的高可用性(HA)概念和解决方案: (1)OpenStack 高可用方案概述 (2)Neutron L3 Agent HA - VRRP (虚拟路由冗余协议) (3)N ...
- 实现跨云应用——基于DNS的负载均衡
“公有云可以作为传统IT资源的延展,能帮助客户应对不断变化的需求”——这是我们在向客户介绍公有云产品时经常说的一句话.我们来看一个具体的需求: 某客户有一个web站点,部署在自有的数据中心(on-pr ...
- HP PCS 云监控大数据解决方案
——把数据从分散统一集中到数据中心 基于HP分布式并行计算/存储技术构建的云监控系统即是通过“云高清摄像机”及IaaS和PaaS监控系统平台,根据用户所需(SaaS)将多路监控数据流传送给“云端”,除 ...
- OpenStack 虚拟机监控方案确定
Contents [hide] 1 监控方案调研过程 1.1 1. 虚拟机里内置监控模块 1.2 2. 通过libvirt获取虚拟机数据监控. 2 a.测试openstack的自待组件ceilomet ...
- 免费ERP之云实施
近日,普实渠道在AIO5软件免费一周年之际,推出了重磅的动作:启动AIO5云实施,推广小微企业免费ERP落地应用. 这无疑是推动客户免费应用ERP的重大里程碑. 当前,在中国小微企业信息化方面,应用情 ...
随机推荐
- BZOJ4767: 两双手【组合数学+容斥原理】
Description 老W是个棋艺高超的棋手,他最喜欢的棋子是马,更具体地,他更加喜欢马所行走的方式.老W下棋时觉得无聊,便决定加强马所行走的方式,更具体地,他有两双手,其中一双手能让马从(u,v) ...
- List和json数组的转换(赋源代码)
public class a11111111 { //参数obj可以是 json对象,字符串, list public static void fun(Object obj){ JSONArray d ...
- $.ajax()方法详解--极快瑞中的阿贾克斯函数
ajax() 函数和那些abs()函数 sin()这些函数都是底层实现好了,封装好了,可以直接拿来用的 使用者只需要传入参数,然后就可以得到相应的结果 $.ajax({name:value, name ...
- restheart 基本使用
restheart 是一个方便基于mongodb的restapi 开发框架 参考项目 https://github.com/rongfengliang/restheart-docker-compose ...
- Robots.txt 编写
搜索引擎Robots协议,是放置在网站根目录下robots.txt文本文件,在文件中可以设定搜索引擎蜘蛛爬行规则.设置搜索引擎蜘蛛Spider抓取内容规则.下面Seoer惜缘举例robots写法规则与 ...
- 解决在sass中使用calc不能包含变量的问题。
今天写sass的时候,发现在sass中使用calc,如果calc中包含一个变量,不会产生效果,看代码: .app-inner { display: flex; height: calc(100% - ...
- Python网络爬虫-xpath模块
一.正解解析 单字符: . : 除换行以外所有字符 [] :[aoe] [a-w] 匹配集合中任意一个字符 \d :数字 [0-9] \D : 非数字 \w :数字.字母.下划线.中文 \W : 非\ ...
- nginx和apache最核心的区别在于apache是同步多进程模型,一个连接对应一个进程;nginx是异步的,多个连接(万级别)可以对应一个进程
nginx和apache的一些优缺点比较,摘自网络,加自己的一些整理. nginx相对于apache的优点: 1.轻量级,同样是web 服务,比apache 占用更少的内存及资源 2.抗并发,ngin ...
- gc之六--Minor GC、Major GC、Full GC以及Mixed GC之间的区别
目录: GC之一--GC 的算法分析.垃圾收集器.内存分配策略介绍 GC之二--GC日志分析(jdk1.8)整理中 GC之三--GC 触发Full GC执行的情况及应对策略 gc之四--Minor G ...
- Tensorflow笔记——神经网络图像识别(五)手写数字识别