25 #include <fms_platform.h> 31 use mpp_mod,
only : mpp_init, mpp_exit, mpp_pe, mpp_npes, mpp_root_pe, stdout
32 use mpp_mod,
only : mpp_clock_id, mpp_clock_begin, mpp_clock_end, mpp_sync, mpp_malloc
33 use mpp_mod,
only : mpp_declare_pelist, mpp_set_current_pelist, mpp_set_stack_size
38 use mpp_mod,
only : mpp_gsm_malloc, mpp_gsm_free
43 integer,
parameter :: n=1048576
44 real,
allocatable,
dimension(:) :: a, b, c
49 real,
allocatable,
dimension(:) :: d
50 integer(LONG_KIND) :: locd
52 integer :: tick, tick0, ticks_per_sec, id
53 integer :: pe, npes, root, i, j, k, l, m, n2, istat
59 call mpp_set_stack_size(3145746)
65 call test_gather(npes,pe,root,out_unit)
66 call test_gatherv(npes,pe,root,out_unit)
67 call test_gather2dv(npes,pe,root,out_unit)
74 call system_clock( count_rate=ticks_per_sec )
76 allocate( a(n), b(n) )
77 id = mpp_clock_id(
'Random number' )
78 call mpp_clock_begin(id)
80 call mpp_clock_end (id)
85 print *,
'Time mpp_transmit for various lengths...' 87 print *,
'For comparison, times for shmem_get and shmem_put are also provided.' 91 id = mpp_clock_id(
'mpp_transmit' )
92 call mpp_clock_begin(id)
98 call system_clock(tick0)
100 call mpp_transmit( put_data=a(1), plen=l, to_pe=modulo(pe+npes-i,npes), &
101 get_data=b(1), glen=l, from_pe=modulo(pe+i,npes) )
106 call system_clock(tick)
107 dt =
real(tick-tick0)/(npes*ticks_per_sec)
108 dt =
max( dt, epsilon(dt) )
109 if( pe.EQ.root )
write( out_unit,
'(/a,i8,f13.6,f8.2)' )
'MPP_TRANSMIT length, time, bw(Mb/s)=', l, dt, l*8e-6/dt
139 print
'(/a)',
'Time mpp_sum...' 143 call system_clock(tick0)
145 call system_clock(tick)
146 dt =
real(tick-tick0)/ticks_per_sec
147 dt =
max( dt, epsilon(dt) )
148 if( pe.EQ.root )
write( out_unit,
'(a,2i6,f9.1,i8,f13.6,f8.2/)' ) &
149 'mpp_sum: pe, npes, sum(pe+1), length, time, bw(Mb/s)=', pe, npes, a(1), n, dt, n*8e-6/dt
150 call mpp_clock_end(id)
156 print *,
'Test mpp_max...' 159 print *,
'pe, pe+1 =', pe, a(1)
161 print *,
'pe, max(pe+1)=', pe, a(1)
164 call flush(out_unit,istat)
166 if( pe.EQ.root )print *,
'Test of pelists: bcast, sum and max using PEs 0...npes-2 (excluding last PE)' 167 call mpp_declare_pelist( (/(i,i=0,npes-2)/) )
169 if( pe.NE.npes-1 )
call mpp_broadcast( a, n, npes-2, (/(i,i=0,npes-2)/) )
170 print *,
'bcast(npes-1) from 0 to npes-2=', pe, a(1)
172 if( pe.NE.npes-1 )
then 173 call mpp_set_current_pelist( (/(i,i=0,npes-2)/) )
174 id = mpp_clock_id(
'Partial mpp_sum' )
175 call mpp_clock_begin(id)
176 call mpp_sum( a(1:1000), 1000, (/(i,i=0,npes-2)/) )
177 call mpp_clock_end (id)
179 if( pe.EQ.root )print *,
'sum(pe+1) from 0 to npes-2=', a(1)
181 if( pe.NE.npes-1 )
call mpp_max( a(1), (/(i,i=0,npes-2)/) )
182 if( pe.EQ.root )print *,
'max(pe+1) from 0 to npes-2=', a(1)
184 call mpp_set_current_pelist()
186 #ifdef use_CRI_pointers 190 if( modulo(n,npes).EQ.0 )
then 193 if( pe.EQ.root )
call random_number(a(1:n2))
196 call mpp_transmit( put_data=a(1), plen=n2, to_pe=all_pes, &
197 get_data=a(1), glen=n2, from_pe=root )
198 call mpp_sync_self ()
208 print *,
'Test mpp_chksum...' 209 print *,
'This test shows that a whole array and a distributed array give identical checksums.' 211 print *,
'chksum(a(1:1024))=',
mpp_chksum(a(1:n2),(/pe/))
218 call mpp_gsm_malloc( locd, sizeof(d) )
227 call random_number(d)
233 call mpp_gsm_free( locd )
248 subroutine test_broadcast()
249 integer,
parameter :: ARRAYSIZE = 3
250 integer,
parameter :: STRINGSIZE = 256
251 character(len=STRINGSIZE),
dimension(ARRAYSIZE) :: textA, textB
254 texta(1) =
"This is line 1 " 255 texta(2) =
"Here comes the line 2 " 256 texta(3) =
"Finally is line 3 " 261 if(mpp_pe() .NE. mpp_root_pe())
then 268 if(mpp_pe() == mpp_root_pe())
then 270 if(texta(n) .NE. textb(n))
call mpp_error(fatal,
"test_broadcast: on root_pe, textA should equal textB")
274 if(texta(n) == textb(n))
call mpp_error(fatal,
"test_broadcast: on root_pe, textA should not equal textB")
280 if(texta(n) .NE. textb(n))
call mpp_error(fatal,
"test_broadcast: after broadcast, textA should equal textB")
283 write(out_unit,*)
"==> NOTE from test_broadcast: The test is succesful" 285 end subroutine test_broadcast
287 subroutine test_gather(npes,pe,root,out_unit)
288 integer,
intent(in) :: npes,pe,root,out_unit
290 integer :: pelist(npes)
296 write(out_unit,*)
"Minimum of 3 ranks required. Not testing gather; too few ranks." 310 if(int(rdata(i)) /= pelist(i))
then 311 write(6,*)
"Gathered data ",int(rdata(i)),
" NE reference ",pelist(i),
"at i=",i
312 call mpp_error(fatal,
"Test gather uniform vector with global pelist failed")
318 write(out_unit,*)
"Test gather uniform vector with global pelist successful" 321 if(any(pe == pelist(2:npes)))
call mpp_gather((/val/),rdata(2:npes),pelist(2:npes))
322 if(pe == pelist(2))
then 324 if(int(rdata(i)) /= pelist(i))
then 325 write(6,*)
"Gathered data ",int(rdata(i)),
" NE reference ",pelist(i),
"at i=",i
326 call mpp_error(fatal,
"Test gather uniform vector with reduced pelist failed")
331 write(out_unit,*)
"Test gather uniform vector with reduced pelist successful" 333 end subroutine test_gather
336 subroutine test_gatherv(npes,pe,root,out_unit)
338 integer,
intent(in) :: npes,pe,root,out_unit
340 integer :: pelist(npes),rsize(npes)
341 integer :: i,j,k,dsize,ssize
342 real,
allocatable :: sdata(:), rdata(:), ref(:)
345 write(out_unit,*)
"Minimum of 3 ranks required. Not testing gatherV; too few ranks." 347 elseif(npes > 9999)
then 348 write(out_unit,*)
"Maximum of 9999 ranks supported. Not testing gatherV; too many ranks." 354 allocate(sdata(ssize))
356 sdata(i) = pe + 0.0001*i
364 allocate(rdata(dsize),ref(dsize))
369 ref(k) = pelist(j) + 0.0001*i
379 if(rdata(k) /= ref(k))
then 380 write(6,*)
"Gathered data ",rdata(k),
" NE reference ",ref(k),
"at k=",k
381 call mpp_error(fatal,
"Test gatherV global pelist failed")
388 write(out_unit,*)
"Test gatherV with global pelist successful" 393 if(any(pe == pelist(2:npes)))
call mpp_gather(sdata,ssize,rdata(2:),rsize(2:),pelist(2:npes))
395 if(pe == pelist(2))
then 399 if(rdata(k) /= ref(k))
then 400 write(6,*)
"Gathered data ",rdata(k),
" NE reference ",ref(k),
"at k=",k
401 call mpp_error(fatal,
"Test gatherV with reduced pelist failed")
408 write(out_unit,*)
"Test gatherV with reduced pelist successful" 409 deallocate(sdata,rdata,ref)
410 end subroutine test_gatherv
412 subroutine test_gather2dv(npes,pe,root,out_unit)
414 integer,
intent(in) :: npes,pe,root,out_unit
416 integer :: pelist(npes),rsize(npes)
417 integer :: pelist2(npes),rsize2(npes)
418 integer :: i,j,k,l,nz,ssize,nelems
419 real,
allocatable,
dimension(:,:) :: data, cdata, sbuff,rbuff
420 real,
allocatable :: ref(:,:)
421 integer,
parameter :: KSIZE=10
423 real :: sbuff1D(size(sbuff))
424 real :: rbuff1D(size(rbuff))
425 pointer(sptr,sbuff1d); pointer(rptr,rbuff1d)
429 write(out_unit,*)
"Minimum of 3 ranks required. Not testing gather2DV; too few ranks." 431 elseif(npes > 9999)
then 432 write(out_unit,*)
"Maximum of 9999 ranks supported. Not testing gather2DV; too many ranks." 438 allocate(
data(ssize,ksize))
439 do k=1,ksize;
do i=1,ssize
440 data(i,k) = 10000.0*k + pe + 0.0001*i
448 nelems = sum(rsize(:))
450 allocate(rbuff(nz,nelems)); rbuff = -1.0
451 allocate(ref(nelems,nz),
cdata(nelems,nz))
452 ref = 0.0;
cdata = 0.0
458 ref(l,k) = 10000.0*k + pelist(j) + 0.0001*i
462 allocate(
sbuff(nz,ssize))
467 sbuff(i,j) =
data(j,i)
471 sptr = loc(
sbuff); rptr = loc(rbuff)
477 cdata(i,j) = rbuff(j,i)
481 if(
cdata(i,j) /= ref(i,j))
then 482 write(6,*)
"Gathered data ",
cdata(i,j),
" NE reference ",ref(i,j),
"at i,j=",i,j
483 call mpp_error(fatal,
"Test gather2DV global pelist failed")
489 write(out_unit,*)
"Test gather2DV with global pelist successful" 492 pelist2(i) = pelist(npes-i+1)
493 rsize2(i) = rsize(npes-i+1)
497 ref = 0.0;
cdata = 0.0
498 if(pe == pelist2(1))
then 503 ref(l,k) = 10000.0*k + pelist2(j) + 0.0001*i
510 if(pe == pelist2(1))
then 513 cdata(i,j) = rbuff(j,i)
517 if(
cdata(i,j) /= ref(i,j))
then 518 write(6,*)
"Gathered data ",
cdata(i,j),
" NE reference ",ref(i,j),
"at i,j=",i,j
519 call mpp_error(fatal,
"Test gather2DV with reversed pelist failed")
524 write(out_unit,*)
"Test gather2DV with reversed pelist successful" 526 end subroutine test_gather2dv
528 subroutine test_shared_pointers(locd,n)
529 integer(LONG_KIND),
intent(in) :: locd
535 print *,
'TEST_SHARED_POINTERS: pe, locd=', pe, locd
537 print *,
'TEST_SHARED_POINTERS: pe, sum(d)=', pe, sum(dd)
539 end subroutine test_shared_pointers
546 #endif /* test_mpp */
only root needs to know the vector of recv size nz do nelems cdata(i, j)
************************************************************************GNU Lesser General Public License **This file is part of the GFDL Flexible Modeling System(FMS). ! *! *FMS is free software without even the implied warranty of MERCHANTABILITY or *FITNESS FOR A PARTICULAR PURPOSE See the GNU General Public License *for more details **You should have received a copy of the GNU Lesser General Public *License along with FMS If see< http:! ***********************************************************************!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! MPP_TRANSMIT !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! subroutine MPP_TRANSMIT_(put_data, put_len, to_pe, get_data, get_len, from_pe, block, tag, recv_request, send_request)!a message-passing routine intended to be reminiscent equally of both MPI and SHMEM!put_data and get_data are contiguous MPP_TYPE_ arrays!at each call, your put_data array is put to to_pe 's get_data! your get_data array is got from from_pe 's put_data!i.e we assume that typically(e.g updating halo regions) each PE performs a put _and_ a get!special PE designations:! NULL_PE:to disable a put or a get(e.g at boundaries)! ANY_PE:if remote PE for the put or get is to be unspecific! ALL_PES:broadcast and collect operations(collect not yet implemented)!ideally we would not pass length, but this f77-style call performs better(arrays passed by address, not descriptor)!further, this permits< length > contiguous words from an array of any rank to be passed(avoiding f90 rank conformance check)!caller is responsible for completion checks(mpp_sync_self) before and after integer, intent(in) ::put_len, to_pe, get_len, from_pe MPP_TYPE_, intent(in) ::put_data(*) MPP_TYPE_, intent(out) ::get_data(*) logical, intent(in), optional ::block integer, intent(in), optional ::tag integer, intent(out), optional ::recv_request, send_request logical ::block_comm integer ::i MPP_TYPE_, allocatable, save ::local_data(:) !local copy used by non-parallel code(no SHMEM or MPI) integer ::comm_tag integer ::rsize if(.NOT.module_is_initialized) call mpp_error(FATAL, 'MPP_TRANSMIT:You must first call mpp_init.') if(to_pe.EQ.NULL_PE .AND. from_pe.EQ.NULL_PE) return block_comm=.true. if(PRESENT(block)) block_comm=block if(debug) then call SYSTEM_CLOCK(tick) write(stdout_unit,'(a, i18, a, i6, a, 2i6, 2i8)')&'T=', tick, ' PE=', pe, ' MPP_TRANSMIT begin:to_pe, from_pe, put_len, get_len=', to_pe, from_pe, put_len, get_len end if comm_tag=DEFAULT_TAG if(present(tag)) comm_tag=tag!do put first and then get if(to_pe.GE.0 .AND. to_pe.LT.npes) then!use non-blocking sends if(debug .and.(current_clock.NE.0)) call SYSTEM_CLOCK(start_tick)!z1l:truly non-blocking send.! if(request(to_pe).NE.MPI_REQUEST_NULL) then !only one message from pe-> to_pe in queue *PE waiting for to_pe ! call error else get_len get_data(i)
************************************************************************GNU Lesser General Public License **This file is part of the GFDL Flexible Modeling System(FMS). ! *! *FMS is free software without even the implied warranty of MERCHANTABILITY or *FITNESS FOR A PARTICULAR PURPOSE See the GNU General Public License *for more details **You should have received a copy of the GNU Lesser General Public *License along with FMS If see< http:! ***********************************************************************subroutine MPP_WRITE_COMPRESSED_1D_(unit, field, domain, data, nelems_io, tstamp, default_data) integer, intent(in) ::unit type(fieldtype), intent(inout) ::field type(domain2D), intent(inout) ::domain MPP_TYPE_, intent(inout) ::data(:) integer, intent(in) ::nelems_io(:) ! number of compressed elements real, intent(in), optional ::tstamp MPP_TYPE_, intent(in), optional ::default_data MPP_TYPE_ ::data2D(size(data, 1), 1) pointer(ptr, data2D) ptr=LOC(data) call mpp_write_compressed(unit, field, domain, data2D, nelems_io, tstamp, default_data) return end subroutine MPP_WRITE_COMPRESSED_1D_ subroutine MPP_WRITE_COMPRESSED_3D_(unit, field, domain, data, nelems_io, tstamp, default_data) integer, intent(in) ::unit type(fieldtype), intent(inout) ::field type(domain2D), intent(inout) ::domain MPP_TYPE_, intent(inout) ::data(:,:,:) integer, intent(in) ::nelems_io(:) ! number of compressed elements real, intent(in), optional ::tstamp MPP_TYPE_, intent(in), optional ::default_data MPP_TYPE_ ::data2D(size(data, 1), size(data, 2) *size(data, 3)) pointer(ptr, data2D) ptr=LOC(data) call mpp_write_compressed(unit, field, domain, data2D, nelems_io, tstamp, default_data) return end subroutine MPP_WRITE_COMPRESSED_3D_ subroutine MPP_WRITE_COMPRESSED_2D_(unit, field, domain, data, nelems_io, tstamp, default_data) integer, intent(in) ::unit type(fieldtype), intent(inout) ::field type(domain2D), intent(inout) ::domain MPP_TYPE_, intent(inout) ::data(:,:) integer, intent(in) ::nelems_io(:) ! number of compressed elements from each ! member of the io_domain. It MUST have the ! same order as the io_domain pelist. real, intent(in), optional ::tstamp MPP_TYPE_, intent(in), optional ::default_data!cdata is used to store the io-domain compressed data MPP_TYPE_, allocatable, dimension(:,:) ::cdata MPP_TYPE_, allocatable, dimension(:,:) ::sbuff, rbuff MPP_TYPE_ ::fill MPP_TYPE_ ::sbuff1D(size(data)) MPP_TYPE_ ::rbuff1D(size(data, 2) *sum(nelems_io(:))) pointer(sptr, sbuff1D);pointer(rptr, rbuff1D) integer, allocatable ::pelist(:) integer, allocatable ::nz_gather(:) integer ::i, j, nz, nelems, mynelems, idx, npes type(domain2d), pointer ::io_domain=> pelist concise unpack do mynelems do nz sbuff(i, j)