FV3 Bundle
memutils.F90
Go to the documentation of this file.
1 !***********************************************************************
2 !* GNU Lesser General Public License
3 !*
4 !* This file is part of the GFDL Flexible Modeling System (FMS).
5 !*
6 !* FMS is free software: you can redistribute it and/or modify it under
7 !* the terms of the GNU Lesser General Public License as published by
8 !* the Free Software Foundation, either version 3 of the License, or (at
9 !* your option) any later version.
10 !*
11 !* FMS is distributed in the hope that it will be useful, but WITHOUT
12 !* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 !* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 !* for more details.
15 !*
16 !* You should have received a copy of the GNU Lesser General Public
17 !* License along with FMS. If not, see <http://www.gnu.org/licenses/>.
18 !***********************************************************************
20 !Author: Balaji (V.Balaji@noaa.gov)
21 !Various operations for memory management
22 !these currently include efficient methods for memory-to-memory copy
23 !including strided data and arbitrary gather-scatter vectors
24 !also various memory and cache inquiry operators
25  implicit none
26  private
27 #ifdef _CRAYT3E
28  integer :: pe, shmem_my_pe
29 #endif
30 
33 
34  logical :: memutils_initialized=.false.
35 
36  interface memcpy
37  module procedure memcpy_r8
38  module procedure memcpy_r8_gather
39  module procedure memcpy_r8_scatter
40  module procedure memcpy_r8_gather_scatter
41  end interface
42 
44  public :: print_memuse_stats
45 #ifdef _CRAY
46  public :: hplen
47 #endif
48 #ifdef _CRAYT90
49  public :: stklen
50 #endif
51  logical, private :: print_memory_usage=.false.
52  contains
53 
54  subroutine memutils_init(print_flag)
55 !initialize memutils module
56 !currently sets default cache characteristics
57 !(will provide overrides later)
58 !also sets pe to my_pe on t3e
59  logical, optional :: print_flag
60 #ifdef _CRAYT3E
61 !all sizes in bytes
63  l1_cache_size = 8192
66  l2_cache_size = 98304
68 #else
69 !defaults
71  l1_cache_size = 1
74  l2_cache_size = 1
76 #endif
77 #ifdef _CRAYT3E
78  pe = shmem_my_pe()
79 #endif
80  if( PRESENT(print_flag) )print_memory_usage = print_flag
81  memutils_initialized = .true.
82  return
83  end subroutine memutils_init
84 
85 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
86 ! !
87 !MEMCPY routines: <nelems> real*8 words are copied from RHS to LHS !
88 ! Either side can have constant stride (lhs_stride, rhs_stride) !
89 ! or indexed by a gather/scatter array (lhs_indx, rhs_indx) !
90 ! index arrays are 0-based (i.e C-like not fortran-like: this is !
91 ! for compatibility with the SHMEM_IXGET/PUT routines) !
92 ! !
93 ! EXAMPLES: !
94 ! !
95 !Replace !
96 ! a(0:n-1) = b(0:n-1) !
97 !with !
98 ! call memcpy(a,b,n) !
99 ! !
100 !Replace !
101 ! a(0:2*n-1:2) = b(0:3*n-1:3) !
102 !with !
103 ! call memcpy(a,b,dim,n,2,3) !dim.GE.3*n !
104 ! !
105 !Replace !
106 ! a(0:n-1) = b(indx(1:n)) !
107 !with !
108 ! call memcpy(a,b,dim,n,1,indx) !dim.GE.max(indx) !
109 ! !
110 !Replace !
111 ! a(indx(1:n)) = b(0:n-1) !
112 !with !
113 ! call memcpy(a,b,dim,n,indx,1) !dim.GE.max(indx) !
114 ! !
115 !Replace !
116 ! a(indxa(1:n)) = b(indxb(1:n)) !
117 !with !
118 ! call memcpy(a,b,dim,n,indx,indxb) !dim.GE.max(indxa,indxb) !
119 ! !
120 ! There are no error checks!!! (routines are built for speed) !
121 ! Specifically there is no bounds-checking: if the stride or !
122 ! indexing causes you to exceed <dim> you will have done a !
123 ! potentially unsafe memory load !
124 ! !
125 !T3E: we use the shmem routines on-processor to effect the transfer !
126 ! via the (faster) E-registers !
127 ! !
128 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
129  subroutine memcpy_r8( lhs, rhs, dim, nelems, lhs_stride, rhs_stride )
130 !base routine: handles constant stride memcpy
131 !default strides are of course 1
132  integer, intent(in) :: dim
133  real(kind=8), dimension(0:dim-1), intent(in) :: rhs
134  real(kind=8), dimension(0:dim-1), intent(out) :: lhs
135  integer, intent(in), optional :: nelems, lhs_stride, rhs_stride
136  integer :: n, rs, ls
137 
138 !defaults
139  n = dim
140  ls = 1
141  rs = 1
142  if( PRESENT(nelems) )then
143  n = nelems
144 !only check for stride if nelems is present
145  if( PRESENT(lhs_stride) )ls = lhs_stride
146  if( PRESENT(rhs_stride) )rs = rhs_stride
147  endif
148  if( ls.EQ.1 .AND. rs.EQ.1 )then
149 #ifdef _CRAYT3E
150  call shmem_get( lhs(0), rhs(0), n, pe )
151 #else
152  lhs(0:n-1) = rhs(0:n-1)
153 #endif
154  else
155 #ifdef _CRAYT3E
156  call shmem_iget( lhs(0), rhs(0), ls, rs, n, pe )
157 #else
158  lhs(0:n*ls-1:ls) = rhs(0:n*rs-1:rs)
159 #endif
160  endif
161  return
162  end subroutine memcpy_r8
163 
164  subroutine memcpy_r8_gather( lhs, rhs, dim, nelems, lhs_stride, rhs_indx )
165 !memcpy routine with gather: copies nelems words from rhs(indx(:)) to lhs(:)
166  integer, intent(in) :: dim, nelems, lhs_stride
167  real(kind=8), dimension(0:dim-1), intent(in) :: rhs
168  real(kind=8), dimension(0:dim-1), intent(out) :: lhs
169  integer, intent(in), dimension(nelems) :: rhs_indx
170 #ifdef _CRAYT3E
171 !dir$ CACHE_BYPASS lhs, rhs, rhs_indx
172  real(kind=8), dimension(nelems) :: tmp
173 
174  if( lhs_stride.EQ.1 )then
175  call shmem_ixget( lhs(0), rhs(0), rhs_indx, nelems, pe )
176  else
177  call shmem_ixget( tmp, rhs(0), rhs_indx, nelems, pe )
178  call shmem_iget( lhs(0), tmp, lhs_stride, 1, nelems, pe )
179  endif
180 #else
181  lhs(0:nelems*lhs_stride-1:lhs_stride) = rhs(rhs_indx(1:nelems))
182 #endif
183  return
184  end subroutine memcpy_r8_gather
185 
186  subroutine memcpy_r8_scatter( lhs, rhs, dim, nelems, lhs_indx, rhs_stride )
187 !memcpy routine with scatter: copies nelems words from rhs(:) to lhs(indx(:))
188  integer, intent(in) :: dim, nelems, rhs_stride
189  real(kind=8), dimension(0:dim-1), intent(in) :: rhs
190  real(kind=8), dimension(0:dim-1), intent(out) :: lhs
191  integer, intent(in), dimension(nelems) :: lhs_indx
192 #ifdef _CRAYT3E
193 !dir$ CACHE_BYPASS lhs, rhs, lhs_indx
194  real(kind=8), dimension(nelems) :: tmp
195 
196  if( rhs_stride.EQ.1 )then
197  call shmem_ixput( lhs(0), rhs(0), lhs_indx, nelems, pe )
198  else
199  call shmem_iget( tmp, rhs(0), rhs_stride, 1, nelems, pe )
200  call shmem_ixput( lhs(0), tmp, lhs_indx, nelems, pe )
201  endif
202  call shmem_quiet !required to ensure completion of put
203 #else
204  lhs(lhs_indx(1:nelems)) = rhs(0:nelems*rhs_stride-1:rhs_stride)
205 #endif
206  return
207  end subroutine memcpy_r8_scatter
208 
209  subroutine memcpy_r8_gather_scatter( lhs, rhs, dim, nelems, lhs_indx, rhs_indx )
210 !memcpy routine with gather/scatter: copies nelems words from rhs(indx(:)) to lhs(indx(:))
211  integer, intent(in) :: dim, nelems
212  real(kind=8), dimension(0:dim-1), intent(in) :: rhs
213  real(kind=8), dimension(0:dim-1), intent(out) :: lhs
214  integer, intent(in), dimension(nelems) :: lhs_indx, rhs_indx
215 #ifdef _CRAYT3E
216 !dir$ CACHE_BYPASS lhs, rhs, lhs_indx, rhs_indx
217  real(kind=8), dimension(nelems) :: tmp
218 
219  call shmem_ixget( tmp, rhs(0), rhs_indx, nelems, pe )
220  call shmem_ixput( lhs(0), tmp, lhs_indx, nelems, pe )
221  call shmem_quiet !required to ensure completion of put
222 #else
223  lhs(lhs_indx(1:nelems)) = rhs(rhs_indx(1:nelems))
224 #endif
225  return
226  end subroutine memcpy_r8_gather_scatter
227 
228 #ifdef _CRAY
229  integer function hplen( hpalloc, hplargest, hpshrink, hpgrow, hpfirst, hplast )
230 !using IHPSTAT calls from SR-2165 v2.0 p535
231 !with no arguments returns heap length (in words on PVP, bytes on t3e)
232  integer, intent(out), optional :: hpalloc, hplargest, hpshrink, hpgrow, hpfirst, hplast
233  integer :: IHPSTAT
234 
235  hplen = ihpstat(1) !Heap length
236  if( present(hpalloc ) )hpalloc = ihpstat( 4) !Blocks allocated
237  if( present(hplargest) )hplargest = ihpstat(10) !Largest free block size
238  if( present(hpshrink ) )hpshrink = ihpstat(11) !Amount heap can shrink
239  if( present(hpgrow ) )hpgrow = ihpstat(12) !Amount heap can grow
240  if( present(hpfirst ) )hpfirst = ihpstat(13) !First word address
241  if( present(hplast ) )hplast = ihpstat(14) !Last word address
242  return
243  end function hplen
244 #endif /* _CRAY */
245 
246 #ifdef _CRAYT90
247  integer function stklen( stkhiwm, stknumber, stktotal, stkmost, stkgrew, stkgtimes )
248 !using STKSTAT(3C) struct
249  integer, optional, intent(out) :: stkhiwm, stknumber, stktotal, stkmost, stkgrew, stkgtimes
250  integer :: istat(20)
251 
252  call stkstat(istat)
253  stklen = istat(1) !Stack length
254  if( present(stkhiwm ) )stkhiwm = istat(2) !stack hiwatermark
255  if( present(stknumber) )stknumber = istat(3) !current #stacks
256  if( present(stktotal ) )stktotal = istat(4) !total #stacks
257  if( present(stkmost ) )stkmost = istat(5) !most #stacks at one time
258  if( present(stkgrew ) )stkgrew = istat(6) !#stacks that grew
259  if( present(stkgtimes) )stkgtimes = istat(7) !#times stack grew
260  return
261  end function stklen
262 #endif /* _CRAYT90 */
263 
264 !cache utilities: need to write version for other argument types
265  function get_l1_cache_line(a)
266  integer(kind=8) :: get_l1_cache_line
267  real, intent(in) :: a
268  integer(kind=8) :: i
269  i = loc(a)
271  end function get_l1_cache_line
272 
273  function get_l2_cache_line(a)
274  integer(kind=8) :: get_l2_cache_line
275  real, intent(in) :: a
276  integer(kind=8) :: i
277  i = loc(a)
279  end function get_l2_cache_line
280 
281  subroutine print_memuse_stats( text, unit, always )
282  use mpp_mod, only: mpp_pe, mpp_root_pe, mpp_npes, mpp_min, mpp_max, mpp_sum, stderr
283  character(len=*), intent(in) :: text
284  integer, intent(in), optional :: unit
285  logical, intent(in), optional :: always
286  real :: m, mmin, mmax, mavg, mstd
287  integer :: mu
288 !memuse is an external function: works on SGI
289 !use #ifdef to generate equivalent on other platforms.
290  integer :: memuse !default integer OK?
291  character(len=8) :: walldate
292  character(len=10) :: walltime
293  character(len=5) :: wallzone
294  integer :: wallvalues(8)
295 
296  if( PRESENT(always) )then
297  if( .NOT.always )return
298  else
299  if( .NOT.print_memory_usage )return
300  end if
301  mu = stderr(); if( PRESENT(unit) )mu = unit
302 #if defined(__sgi) || defined(__aix) || defined(__SX) || defined(__APPLE__)
303  m = memuse()*1e-3
304 #else
305  call mem_dump(m)
306 #endif
307  mmin = m; call mpp_min(mmin)
308  mmax = m; call mpp_max(mmax)
309  mavg = m; call mpp_sum(mavg); mavg = mavg/mpp_npes()
310  mstd = (m-mavg)**2; call mpp_sum(mstd); mstd = sqrt( mstd/mpp_npes() )
311  if( mpp_pe().EQ.mpp_root_pe() ) then
312  call date_and_time(walldate, walltime, wallzone, wallvalues)
313  write( mu,'(a84,4es11.3)' ) trim(walldate)//' '//trim(walltime)//&
314  ': Memuse(MB) at '//trim(text)//'=', mmin, mmax, mstd, mavg
315  endif
316  return
317  end subroutine print_memuse_stats
318 
319 !#######################################################################
320 
321 subroutine mem_dump ( memuse )
322 use mpp_mod, only : stdout
323 use mpp_io_mod, only : mpp_open, mpp_close, mpp_ascii, mpp_rdonly, &
324  mpp_sequential, mpp_single
325 
326 real, intent(out) :: memuse
327 
328 ! This routine returns the memory usage on Linux systems.
329 ! It does this by querying a system file (file_name below).
330 ! It is intended for use by print_memuse_stats above.
331 
332 character(len=32) :: file_name = '/proc/self/status'
333 character(len=32) :: string
334 integer, save :: mem_unit = -1
335 real :: multiplier
336 
337  memuse = 0.0
338  multiplier = 1.0
339 
340  if(mem_unit == -1) then
341  call mpp_open ( mem_unit, file_name, &
342  form=mpp_ascii, action=mpp_rdonly, &
343  access=mpp_sequential, threading=mpp_single )
344  else
345  rewind(mem_unit)
346  endif
347 
348  do; read (mem_unit,'(a)', end=10) string
349  if ( index( string, 'VmHWM:' ) == 1 ) then
350  read (string(7:len_trim(string)-2),*) memuse
351  exit
352  endif
353  enddo
354 
355  if (trim(string(len_trim(string)-1:)) == "kB" ) &
356  multiplier = 1.0/1024. ! Convert from kB to MB
357 
358 !10 call mpp_close ( mem_unit )
359 10 memuse = memuse * multiplier
360 
361  return
362 end subroutine mem_dump
363 
364 end module memutils_mod
subroutine, public print_memuse_stats(text, unit, always)
Definition: memutils.F90:282
integer(kind=8) function, public get_l1_cache_line(a)
Definition: memutils.F90:266
integer(kind=8) l1_cache_size
Definition: memutils.F90:31
subroutine memcpy_r8(lhs, rhs, dim, nelems, lhs_stride, rhs_stride)
Definition: memutils.F90:130
integer(kind=8) l1_cache_line_size
Definition: memutils.F90:31
integer(kind=8) l2_cache_line_size
Definition: memutils.F90:32
Definition: mpp.F90:39
subroutine, public memutils_init(print_flag)
Definition: memutils.F90:55
integer(kind=8) l2_associativity
Definition: memutils.F90:32
integer(kind=8) function, public get_l2_cache_line(a)
Definition: memutils.F90:274
logical memutils_initialized
Definition: memutils.F90:34
integer(kind=8) l1_associativity
Definition: memutils.F90:31
subroutine memcpy_r8_scatter(lhs, rhs, dim, nelems, lhs_indx, rhs_stride)
Definition: memutils.F90:187
subroutine mem_dump(memuse)
Definition: memutils.F90:322
integer(kind=8) l2_cache_size
Definition: memutils.F90:32
subroutine memcpy_r8_gather(lhs, rhs, dim, nelems, lhs_stride, rhs_indx)
Definition: memutils.F90:165
logical, private print_memory_usage
Definition: memutils.F90:51
subroutine memcpy_r8_gather_scatter(lhs, rhs, dim, nelems, lhs_indx, rhs_indx)
Definition: memutils.F90:210