Improved (but not ideal) timing routines
This commit is contained in:
parent
da2ec75352
commit
32cc862966
@ -3,7 +3,7 @@ c
|
||||
subroutine crsgd(ierr,k,nstr,ecg,ncg,ewt,nwt,mmax,icdep,
|
||||
* nun,imin,imax,a,ia,ja,iu,ip,icg,ifg,
|
||||
* b,ib,jb,ipmn,ipmx,iv,xp,yp,
|
||||
* ndimu,ndimp,ndima,ndimb)
|
||||
* ndimu,ndimp,ndima,ndimb,coarsen_cpu,RPdef_cpu)
|
||||
c
|
||||
c---------------------------------------------------------------------
|
||||
c
|
||||
@ -210,6 +210,11 @@ c
|
||||
dimension iarr(10)
|
||||
c
|
||||
dimension icdep(10,10)
|
||||
|
||||
integer*4 coarsen_cpu
|
||||
integer*4 cpu_ticks
|
||||
integer*4 cpu_old
|
||||
integer*4 RPdef_cpu
|
||||
c
|
||||
c---------------------------------------------------------------------
|
||||
c
|
||||
@ -229,6 +234,11 @@ c
|
||||
|
||||
call idec(nwt,5,ndigit,iarr)
|
||||
iwts =iarr(1)
|
||||
cveh
|
||||
c test of timing routine
|
||||
cveh
|
||||
call amg_cpuclock(cpu_ticks)
|
||||
cpu_old = cpu_ticks
|
||||
c
|
||||
c=== > set array for coupled/dependent coarsening
|
||||
c
|
||||
@ -284,6 +294,10 @@ c=== > test for grid coarse enough
|
||||
c
|
||||
call gtest(k,mmax,imin,imax,icg,ifg,
|
||||
* ndimu,ndimp,ndima,ndimb)
|
||||
cveh
|
||||
call amg_cpuclock(cpu_ticks)
|
||||
coarsen_cpu = coarsen_cpu + (cpu_ticks-cpu_old)
|
||||
cveh
|
||||
c
|
||||
c=== > return if coarsest level
|
||||
c
|
||||
@ -291,6 +305,9 @@ c
|
||||
c
|
||||
c=== > load f-rows of b with strong c-connections
|
||||
c
|
||||
call amg_cpuclock(cpu_ticks)
|
||||
cpu_old = cpu_ticks
|
||||
|
||||
call bloadf(ierr,k,imin,imax,a,ia,ja,icg,b,ib,jb,
|
||||
* ndimu,ndimp,ndima,ndimb)
|
||||
if (ierr .ne. 0) return
|
||||
@ -311,5 +328,7 @@ c
|
||||
call rstdf0(k,imin,imax,icg,b,ib,jb,
|
||||
* ndimu,ndimp,ndima,ndimb)
|
||||
c
|
||||
call amg_cpuclock(cpu_ticks)
|
||||
RPdef_cpu = RPdef_cpu + (cpu_ticks-cpu_old)
|
||||
return
|
||||
end
|
||||
|
||||
@ -23,6 +23,7 @@ f90amg.c
|
||||
f90amg_params.c
|
||||
f90amg_setup.c
|
||||
f90amg_solve.c
|
||||
f90amg_timing.c
|
||||
fortran.h
|
||||
general.h
|
||||
headers.h
|
||||
|
||||
@ -38,6 +38,12 @@ c
|
||||
dimension icdep(10,10)
|
||||
c
|
||||
character*(*) lfname
|
||||
|
||||
integer*4 coarsen_cpu
|
||||
integer*4 opdef_cpu
|
||||
integer*4 RPdef_cpu
|
||||
integer*4 cpu_ticks
|
||||
integer*4 cpu_old
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c initialize the error flag to zero
|
||||
@ -80,6 +86,10 @@ c
|
||||
c
|
||||
c=== > coarsen problem
|
||||
c
|
||||
coarsen_cpu = 0
|
||||
opdef_cpu = 0
|
||||
RPdef_cpu = 0
|
||||
|
||||
20 k=k+1
|
||||
c
|
||||
c => choose coarse grid and define interpolation
|
||||
@ -87,7 +97,7 @@ c
|
||||
call crsgd(ierr,k-1,nstr,ecg,ncg,ewt,nwt,levels,icdep,
|
||||
* nun,imin,imax,a,ia,ja,iu,ip,icg,ifg,
|
||||
* b,ib,jb,ipmn,ipmx,iv,xp,yp,
|
||||
* ndimu,ndimp,ndima,ndimb)
|
||||
* ndimu,ndimp,ndima,ndimb,coarsen_cpu,RPdef_cpu)
|
||||
|
||||
if (ierr .ne. 0) return
|
||||
c
|
||||
@ -97,6 +107,9 @@ c
|
||||
c
|
||||
c => compute coarse grid matrix
|
||||
c
|
||||
call amg_cpuclock(cpu_ticks)
|
||||
cpu_old = cpu_ticks
|
||||
|
||||
call opdfn(ierr,k,levels,ndima,
|
||||
* imin,imax,a,ia,ja,icg,ifg,b,ib,jb)
|
||||
if (ierr .ne. 0) return
|
||||
@ -106,10 +119,14 @@ c
|
||||
call trunc(k,imin,imax,a,ia,ja)
|
||||
|
||||
call symm(k,isymm,imin,imax,a,ia,ja,icg,ifg)
|
||||
|
||||
call amg_cpuclock(cpu_ticks)
|
||||
opdef_cpu = opdef_cpu + (cpu_ticks-cpu_old)
|
||||
c
|
||||
go to 20
|
||||
c
|
||||
30 continue
|
||||
c
|
||||
c
|
||||
c compute & print statistics after coarsening
|
||||
|
||||
@ -118,6 +135,16 @@ c compute & print statistics after coarsening
|
||||
* nun,imin,imax,a,ia,ja,iu,ip,icg,ifg,
|
||||
* b,ib,jb,ipmn,ipmx,iv,xp,yp)
|
||||
|
||||
write (9,100) dfloat(coarsen_cpu)/100.0,
|
||||
* dfloat(opdef_cpu)/100.0,
|
||||
* dfloat(RPdef_cpu)/100.0
|
||||
|
||||
100 format(//'CPU Times for Setup Phase:',
|
||||
* /5x, ' Coarse Gridpoint Selection: ', F10.4,
|
||||
* /5x, ' Defining coarse-grid operator: ', F10.4,
|
||||
* /5x, 'Defining intergrid transfer operators: ', F10.4,//)
|
||||
|
||||
|
||||
close(9)
|
||||
endif
|
||||
|
||||
|
||||
@ -3,7 +3,7 @@ c
|
||||
subroutine crsgd(ierr,k,nstr,ecg,ncg,ewt,nwt,mmax,icdep,
|
||||
* nun,imin,imax,a,ia,ja,iu,ip,icg,ifg,
|
||||
* b,ib,jb,ipmn,ipmx,iv,xp,yp,
|
||||
* ndimu,ndimp,ndima,ndimb)
|
||||
* ndimu,ndimp,ndima,ndimb,coarsen_cpu,RPdef_cpu)
|
||||
c
|
||||
c---------------------------------------------------------------------
|
||||
c
|
||||
@ -210,6 +210,11 @@ c
|
||||
dimension iarr(10)
|
||||
c
|
||||
dimension icdep(10,10)
|
||||
|
||||
integer*4 coarsen_cpu
|
||||
integer*4 cpu_ticks
|
||||
integer*4 cpu_old
|
||||
integer*4 RPdef_cpu
|
||||
c
|
||||
c---------------------------------------------------------------------
|
||||
c
|
||||
@ -229,6 +234,11 @@ c
|
||||
|
||||
call idec(nwt,5,ndigit,iarr)
|
||||
iwts =iarr(1)
|
||||
cveh
|
||||
c test of timing routine
|
||||
cveh
|
||||
call amg_cpuclock(cpu_ticks)
|
||||
cpu_old = cpu_ticks
|
||||
c
|
||||
c=== > set array for coupled/dependent coarsening
|
||||
c
|
||||
@ -284,6 +294,10 @@ c=== > test for grid coarse enough
|
||||
c
|
||||
call gtest(k,mmax,imin,imax,icg,ifg,
|
||||
* ndimu,ndimp,ndima,ndimb)
|
||||
cveh
|
||||
call amg_cpuclock(cpu_ticks)
|
||||
coarsen_cpu = coarsen_cpu + (cpu_ticks-cpu_old)
|
||||
cveh
|
||||
c
|
||||
c=== > return if coarsest level
|
||||
c
|
||||
@ -291,6 +305,9 @@ c
|
||||
c
|
||||
c=== > load f-rows of b with strong c-connections
|
||||
c
|
||||
call amg_cpuclock(cpu_ticks)
|
||||
cpu_old = cpu_ticks
|
||||
|
||||
call bloadf(ierr,k,imin,imax,a,ia,ja,icg,b,ib,jb,
|
||||
* ndimu,ndimp,ndima,ndimb)
|
||||
if (ierr .ne. 0) return
|
||||
@ -311,5 +328,7 @@ c
|
||||
call rstdf0(k,imin,imax,icg,b,ib,jb,
|
||||
* ndimu,ndimp,ndima,ndimb)
|
||||
c
|
||||
call amg_cpuclock(cpu_ticks)
|
||||
RPdef_cpu = RPdef_cpu + (cpu_ticks-cpu_old)
|
||||
return
|
||||
end
|
||||
|
||||
@ -23,6 +23,7 @@ f90amg.c
|
||||
f90amg_params.c
|
||||
f90amg_setup.c
|
||||
f90amg_solve.c
|
||||
f90amg_timing.c
|
||||
fortran.h
|
||||
general.h
|
||||
headers.h
|
||||
|
||||
@ -38,6 +38,12 @@ c
|
||||
dimension icdep(10,10)
|
||||
c
|
||||
character*(*) lfname
|
||||
|
||||
integer*4 coarsen_cpu
|
||||
integer*4 opdef_cpu
|
||||
integer*4 RPdef_cpu
|
||||
integer*4 cpu_ticks
|
||||
integer*4 cpu_old
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c initialize the error flag to zero
|
||||
@ -80,6 +86,10 @@ c
|
||||
c
|
||||
c=== > coarsen problem
|
||||
c
|
||||
coarsen_cpu = 0
|
||||
opdef_cpu = 0
|
||||
RPdef_cpu = 0
|
||||
|
||||
20 k=k+1
|
||||
c
|
||||
c => choose coarse grid and define interpolation
|
||||
@ -87,7 +97,7 @@ c
|
||||
call crsgd(ierr,k-1,nstr,ecg,ncg,ewt,nwt,levels,icdep,
|
||||
* nun,imin,imax,a,ia,ja,iu,ip,icg,ifg,
|
||||
* b,ib,jb,ipmn,ipmx,iv,xp,yp,
|
||||
* ndimu,ndimp,ndima,ndimb)
|
||||
* ndimu,ndimp,ndima,ndimb,coarsen_cpu,RPdef_cpu)
|
||||
|
||||
if (ierr .ne. 0) return
|
||||
c
|
||||
@ -97,6 +107,9 @@ c
|
||||
c
|
||||
c => compute coarse grid matrix
|
||||
c
|
||||
call amg_cpuclock(cpu_ticks)
|
||||
cpu_old = cpu_ticks
|
||||
|
||||
call opdfn(ierr,k,levels,ndima,
|
||||
* imin,imax,a,ia,ja,icg,ifg,b,ib,jb)
|
||||
if (ierr .ne. 0) return
|
||||
@ -106,10 +119,14 @@ c
|
||||
call trunc(k,imin,imax,a,ia,ja)
|
||||
|
||||
call symm(k,isymm,imin,imax,a,ia,ja,icg,ifg)
|
||||
|
||||
call amg_cpuclock(cpu_ticks)
|
||||
opdef_cpu = opdef_cpu + (cpu_ticks-cpu_old)
|
||||
c
|
||||
go to 20
|
||||
c
|
||||
30 continue
|
||||
c
|
||||
c
|
||||
c compute & print statistics after coarsening
|
||||
|
||||
@ -118,6 +135,16 @@ c compute & print statistics after coarsening
|
||||
* nun,imin,imax,a,ia,ja,iu,ip,icg,ifg,
|
||||
* b,ib,jb,ipmn,ipmx,iv,xp,yp)
|
||||
|
||||
write (9,100) dfloat(coarsen_cpu)/100.0,
|
||||
* dfloat(opdef_cpu)/100.0,
|
||||
* dfloat(RPdef_cpu)/100.0
|
||||
|
||||
100 format(//'CPU Times for Setup Phase:',
|
||||
* /5x, ' Coarse Gridpoint Selection: ', F10.4,
|
||||
* /5x, ' Defining coarse-grid operator: ', F10.4,
|
||||
* /5x, 'Defining intergrid transfer operators: ', F10.4,//)
|
||||
|
||||
|
||||
close(9)
|
||||
endif
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user