Skip to content

Commit e4b5bd4

Browse files
committed
Merge branch 'framework/acc_mpas_reconstruct_2d' into develop (PR #1289)
This merge adds OpenACC directives to mpas_reconstruct_2d so it can execute on GPUs. Timing for the OpenACC data transfers in this routine is captured in the log file by a new timer: mpas_reconstruct_2d [ACC_data_xfer]. Note two points: - The use of the sin and cos functions on the GPU causes answer differences from previous GPU results. - The time-invariant fields in this routine (coeffs_reconstruct, nEdgesOnCell, edgesOnCell, latCell, and lonCell) are handled in both mpas_reconstruct_2d and in mpas_atm_dynamics_{init,finalize}, since mpas_reconstruct_2d is called once before the first call to mpas_atm_dynamics_init. * framework/acc_mpas_reconstruct_2d: Initial OpenACC port of mpas_vector_reconstruct_2d Prepare for OpenACC porting in mpas_reconstruct_2d
2 parents 9f79b36 + 607d858 commit e4b5bd4

2 files changed

Lines changed: 100 additions & 21 deletions

File tree

src/core_atmosphere/dynamics/mpas_atm_time_integration.F

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,9 @@ subroutine mpas_atm_dynamics_init(domain)
245245
real (kind=RKIND), dimension(:), pointer :: specZoneMaskCell
246246
real (kind=RKIND), dimension(:), pointer :: meshScalingRegionalCell
247247
real (kind=RKIND), dimension(:), pointer :: meshScalingRegionalEdge
248+
real (kind=RKIND), dimension(:), pointer :: latCell
249+
real (kind=RKIND), dimension(:), pointer :: lonCell
250+
real (kind=RKIND), dimension(:,:,:), pointer :: coeffs_reconstruct
248251
#endif
249252

250253

@@ -403,6 +406,15 @@ subroutine mpas_atm_dynamics_init(domain)
403406

404407
call mpas_pool_get_array(mesh, 'meshScalingRegionalEdge', meshScalingRegionalEdge)
405408
!$acc enter data copyin(meshScalingRegionalEdge)
409+
410+
call mpas_pool_get_array(mesh, 'latCell', latCell)
411+
!$acc enter data copyin(latCell)
412+
413+
call mpas_pool_get_array(mesh, 'lonCell', lonCell)
414+
!$acc enter data copyin(lonCell)
415+
416+
call mpas_pool_get_array(mesh, 'coeffs_reconstruct', coeffs_reconstruct)
417+
!$acc enter data copyin(coeffs_reconstruct)
406418
#endif
407419

408420
end subroutine mpas_atm_dynamics_init
@@ -484,6 +496,9 @@ subroutine mpas_atm_dynamics_finalize(domain)
484496
real (kind=RKIND), dimension(:), pointer :: specZoneMaskCell
485497
real (kind=RKIND), dimension(:), pointer :: meshScalingRegionalCell
486498
real (kind=RKIND), dimension(:), pointer :: meshScalingRegionalEdge
499+
real (kind=RKIND), dimension(:), pointer :: latCell
500+
real (kind=RKIND), dimension(:), pointer :: lonCell
501+
real (kind=RKIND), dimension(:,:,:), pointer :: coeffs_reconstruct
487502
#endif
488503

489504

@@ -642,6 +657,15 @@ subroutine mpas_atm_dynamics_finalize(domain)
642657

643658
call mpas_pool_get_array(mesh, 'meshScalingRegionalEdge', meshScalingRegionalEdge)
644659
!$acc exit data delete(meshScalingRegionalEdge)
660+
661+
call mpas_pool_get_array(mesh, 'latCell', latCell)
662+
!$acc exit data delete(latCell)
663+
664+
call mpas_pool_get_array(mesh, 'lonCell', lonCell)
665+
!$acc exit data delete(lonCell)
666+
667+
call mpas_pool_get_array(mesh, 'coeffs_reconstruct', coeffs_reconstruct)
668+
!$acc exit data delete(coeffs_reconstruct)
645669
#endif
646670

647671
end subroutine mpas_atm_dynamics_finalize

src/operators/mpas_vector_reconstruction.F

Lines changed: 76 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,16 @@ module mpas_vector_reconstruction
2424
use mpas_rbf_interpolation
2525
use mpas_vector_operations
2626

27+
#ifdef MPAS_OPENACC
28+
! For use in regions ported with OpenACC to track in-function transfers
29+
use mpas_timer, only : mpas_timer_start, mpas_timer_stop
30+
#define MPAS_ACC_TIMER_START(X) call mpas_timer_start(X)
31+
#define MPAS_ACC_TIMER_STOP(X) call mpas_timer_stop(X)
32+
#else
33+
#define MPAS_ACC_TIMER_START(X)
34+
#define MPAS_ACC_TIMER_STOP(X)
35+
#endif
36+
2737
implicit none
2838

2939
public :: mpas_init_reconstruct, mpas_reconstruct
@@ -207,10 +217,11 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon
207217

208218
! temporary arrays needed in the compute procedure
209219
logical :: includeHalosLocal
210-
integer, pointer :: nCells
220+
integer, pointer :: nCells_ptr, nVertLevels_ptr
221+
integer :: nCells, nVertLevels
211222
integer, dimension(:,:), pointer :: edgesOnCell
212223
integer, dimension(:), pointer :: nEdgesOnCell
213-
integer :: iCell,iEdge, i
224+
integer :: iCell,iEdge, i, k
214225
real(kind=RKIND), dimension(:), pointer :: latCell, lonCell
215226

216227
real (kind=RKIND), dimension(:,:,:), pointer :: coeffs_reconstruct
@@ -233,64 +244,108 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon
233244
call mpas_pool_get_array(meshPool, 'edgesOnCell', edgesOnCell)
234245

235246
if ( includeHalosLocal ) then
236-
call mpas_pool_get_dimension(meshPool, 'nCells', nCells)
247+
call mpas_pool_get_dimension(meshPool, 'nCells', nCells_ptr)
237248
else
238-
call mpas_pool_get_dimension(meshPool, 'nCellsSolve', nCells)
249+
call mpas_pool_get_dimension(meshPool, 'nCellsSolve', nCells_ptr)
239250
end if
251+
call mpas_pool_get_dimension(meshPool, 'nVertLevels', nVertLevels_ptr)
252+
! Dereference scalar (single-value) pointers to ensure OpenACC copies the value pointed to implicitly
253+
nCells = nCells_ptr
254+
nVertLevels = nVertLevels_ptr
240255

241256
call mpas_pool_get_array(meshPool, 'latCell', latCell)
242257
call mpas_pool_get_array(meshPool, 'lonCell', lonCell)
243258

244259
call mpas_pool_get_config(meshPool, 'on_a_sphere', on_a_sphere)
245260

261+
MPAS_ACC_TIMER_START('mpas_reconstruct_2d [ACC_data_xfer]')
262+
! Only use sections needed, nCells may be all cells or only non-halo cells
263+
!$acc enter data copyin(coeffs_reconstruct(:,:,1:nCells),nEdgesOnCell(1:nCells), &
264+
!$acc edgesOnCell(:,1:nCells),latCell(1:nCells),lonCell(1:nCells))
265+
!$acc enter data copyin(u(:,:))
266+
!$acc enter data create(uReconstructX(:,1:nCells),uReconstructY(:,1:nCells), &
267+
!$acc uReconstructZ(:,1:nCells),uReconstructZonal(:,1:nCells), &
268+
!$acc uReconstructMeridional(:,1:nCells))
269+
MPAS_ACC_TIMER_STOP('mpas_reconstruct_2d [ACC_data_xfer]')
270+
246271
! loop over cell centers
247272
!$omp do schedule(runtime)
273+
!$acc parallel default(present)
274+
!$acc loop gang
248275
do iCell = 1, nCells
249276
! initialize the reconstructed vectors
250-
uReconstructX(:,iCell) = 0.0
251-
uReconstructY(:,iCell) = 0.0
252-
uReconstructZ(:,iCell) = 0.0
277+
!$acc loop vector
278+
do k = 1, nVertLevels
279+
uReconstructX(k,iCell) = 0.0
280+
uReconstructY(k,iCell) = 0.0
281+
uReconstructZ(k,iCell) = 0.0
282+
end do
253283

254284
! a more efficient reconstruction where rbf_values*matrix_reconstruct
255285
! has been precomputed in coeffs_reconstruct
256-
do i=1,nEdgesOnCell(iCell)
286+
!$acc loop seq
287+
do i = 1, nEdgesOnCell(iCell)
257288
iEdge = edgesOnCell(i,iCell)
258-
uReconstructX(:,iCell) = uReconstructX(:,iCell) &
259-
+ coeffs_reconstruct(1,i,iCell) * u(:,iEdge)
260-
uReconstructY(:,iCell) = uReconstructY(:,iCell) &
261-
+ coeffs_reconstruct(2,i,iCell) * u(:,iEdge)
262-
uReconstructZ(:,iCell) = uReconstructZ(:,iCell) &
263-
+ coeffs_reconstruct(3,i,iCell) * u(:,iEdge)
289+
!$acc loop vector
290+
do k = 1, nVertLevels
291+
uReconstructX(k,iCell) = uReconstructX(k,iCell) &
292+
+ coeffs_reconstruct(1,i,iCell) * u(k,iEdge)
293+
uReconstructY(k,iCell) = uReconstructY(k,iCell) &
294+
+ coeffs_reconstruct(2,i,iCell) * u(k,iEdge)
295+
uReconstructZ(k,iCell) = uReconstructZ(k,iCell) &
296+
+ coeffs_reconstruct(3,i,iCell) * u(k,iEdge)
297+
end do
264298

265299
enddo
266300
enddo ! iCell
301+
!$acc end parallel
267302
!$omp end do
268303

269304
call mpas_threading_barrier()
270305

271306
if (on_a_sphere) then
272307
!$omp do schedule(runtime)
308+
!$acc parallel default(present)
309+
!$acc loop gang
273310
do iCell = 1, nCells
274311
clat = cos(latCell(iCell))
275312
slat = sin(latCell(iCell))
276313
clon = cos(lonCell(iCell))
277314
slon = sin(lonCell(iCell))
278-
uReconstructZonal(:,iCell) = -uReconstructX(:,iCell)*slon + &
279-
uReconstructY(:,iCell)*clon
280-
uReconstructMeridional(:,iCell) = -(uReconstructX(:,iCell)*clon &
281-
+ uReconstructY(:,iCell)*slon)*slat &
282-
+ uReconstructZ(:,iCell)*clat
315+
!$acc loop vector
316+
do k = 1, nVertLevels
317+
uReconstructZonal(k,iCell) = -uReconstructX(k,iCell)*slon + &
318+
uReconstructY(k,iCell)*clon
319+
uReconstructMeridional(k,iCell) = -(uReconstructX(k,iCell)*clon &
320+
+ uReconstructY(k,iCell)*slon)*slat &
321+
+ uReconstructZ(k,iCell)*clat
322+
end do
283323
end do
324+
!$acc end parallel
284325
!$omp end do
285326
else
286327
!$omp do schedule(runtime)
328+
!$acc parallel default(present)
329+
!$acc loop gang vector collapse(2)
287330
do iCell = 1, nCells
288-
uReconstructZonal (:,iCell) = uReconstructX(:,iCell)
289-
uReconstructMeridional(:,iCell) = uReconstructY(:,iCell)
331+
do k = 1, nVertLevels
332+
uReconstructZonal (k,iCell) = uReconstructX(k,iCell)
333+
uReconstructMeridional(k,iCell) = uReconstructY(k,iCell)
334+
end do
290335
end do
336+
!$acc end parallel
291337
!$omp end do
292338
end if
293339

340+
MPAS_ACC_TIMER_START('mpas_reconstruct_2d [ACC_data_xfer]')
341+
!$acc exit data delete(coeffs_reconstruct(:,:,1:nCells),nEdgesOnCell(1:nCells), &
342+
!$acc edgesOnCell(:,1:nCells),latCell(1:nCells),lonCell(1:nCells))
343+
!$acc exit data delete(u(:,:))
344+
!$acc exit data copyout(uReconstructX(:,1:nCells),uReconstructY(:,1:nCells), &
345+
!$acc uReconstructZ(:,1:nCells), uReconstructZonal(:,1:nCells), &
346+
!$acc uReconstructMeridional(:,1:nCells))
347+
MPAS_ACC_TIMER_STOP('mpas_reconstruct_2d [ACC_data_xfer]')
348+
294349
end subroutine mpas_reconstruct_2d!}}}
295350

296351

0 commit comments

Comments
 (0)