Improved (but not ideal) timing routines

1997-09-30 16:55:34 +00:00 · 1997-09-30 16:55:34 +00:00 · 32cc862966
commit 32cc862966
parent da2ec75352
6 changed files with 98 additions and 4 deletions
--- a/seq_linear_solvers/amg/amg/crsgd.f
+++ b/seq_linear_solvers/amg/amg/crsgd.f
@ -3,7 +3,7 @@ c
      subroutine crsgd(ierr,k,nstr,ecg,ncg,ewt,nwt,mmax,icdep,
     *     nun,imin,imax,a,ia,ja,iu,ip,icg,ifg,
     *     b,ib,jb,ipmn,ipmx,iv,xp,yp,
-     *     ndimu,ndimp,ndima,ndimb)
+     *     ndimu,ndimp,ndima,ndimb,coarsen_cpu,RPdef_cpu)
 c     
 c---------------------------------------------------------------------
 c     
@ -210,6 +210,11 @@ c
      dimension iarr(10)
 c     
      dimension icdep(10,10)
+
+      integer*4 coarsen_cpu
+      integer*4 cpu_ticks
+      integer*4 cpu_old
+      integer*4 RPdef_cpu
 c     
 c---------------------------------------------------------------------
 c     
@ -229,6 +234,11 @@ c

      call idec(nwt,5,ndigit,iarr)
      iwts   =iarr(1)
+cveh
+c     test of timing routine
+cveh
+      call amg_cpuclock(cpu_ticks)
+      cpu_old = cpu_ticks
 c     
 c===  > set array for coupled/dependent coarsening
 c     
@ -284,6 +294,10 @@ c===  > test for grid coarse enough
 c     
      call gtest(k,mmax,imin,imax,icg,ifg,
     *     ndimu,ndimp,ndima,ndimb)
+cveh
+      call amg_cpuclock(cpu_ticks)
+      coarsen_cpu = coarsen_cpu + (cpu_ticks-cpu_old)
+cveh
 c     
 c===  > return if coarsest level
 c     
@ -291,6 +305,9 @@ c
 c     
 c===  > load f-rows of b with strong c-connections
 c     
+      call amg_cpuclock(cpu_ticks)
+      cpu_old = cpu_ticks
+
      call bloadf(ierr,k,imin,imax,a,ia,ja,icg,b,ib,jb,
     *     ndimu,ndimp,ndima,ndimb)
      if (ierr .ne. 0) return
@ -311,5 +328,7 @@ c
      call rstdf0(k,imin,imax,icg,b,ib,jb,
     *     ndimu,ndimp,ndima,ndimb)
 c     
+      call amg_cpuclock(cpu_ticks)
+      RPdef_cpu = RPdef_cpu + (cpu_ticks-cpu_old)
      return
      end
--- a/seq_linear_solvers/amg/amg/rcs_files
+++ b/seq_linear_solvers/amg/amg/rcs_files
@ -23,6 +23,7 @@ f90amg.c
 f90amg_params.c
 f90amg_setup.c
 f90amg_solve.c
+f90amg_timing.c
 fortran.h
 general.h
 headers.h
--- a/seq_linear_solvers/amg/amg/setup.f
+++ b/seq_linear_solvers/amg/amg/setup.f
@ -38,6 +38,12 @@ c
      dimension icdep(10,10)
 c     
      character*(*)  lfname
+    
+      integer*4 coarsen_cpu
+      integer*4 opdef_cpu
+      integer*4 RPdef_cpu
+      integer*4 cpu_ticks
+      integer*4 cpu_old

 c---------------------------------------------------------------------
 c     initialize the error flag to zero
@ -80,6 +86,10 @@ c
 c     
 c===  > coarsen problem
 c     
+      coarsen_cpu = 0
+      opdef_cpu = 0
+      RPdef_cpu = 0
+
 20   k=k+1
 c     
 c     =>   choose coarse grid and define interpolation
@ -87,7 +97,7 @@ c
      call crsgd(ierr,k-1,nstr,ecg,ncg,ewt,nwt,levels,icdep,
     *     nun,imin,imax,a,ia,ja,iu,ip,icg,ifg,
     *     b,ib,jb,ipmn,ipmx,iv,xp,yp,
-     *     ndimu,ndimp,ndima,ndimb)
+     *     ndimu,ndimp,ndima,ndimb,coarsen_cpu,RPdef_cpu)

      if (ierr .ne. 0) return
 c     
@ -97,6 +107,9 @@ c
 c     
 c     =>   compute coarse grid matrix
 c     
+      call amg_cpuclock(cpu_ticks)
+      cpu_old = cpu_ticks
+
      call opdfn(ierr,k,levels,ndima,
     *     imin,imax,a,ia,ja,icg,ifg,b,ib,jb)
      if (ierr .ne. 0) return
@ -106,10 +119,14 @@ c
      call trunc(k,imin,imax,a,ia,ja)

      call symm(k,isymm,imin,imax,a,ia,ja,icg,ifg)
+
+      call amg_cpuclock(cpu_ticks)
+      opdef_cpu = opdef_cpu + (cpu_ticks-cpu_old)
 c     
      go to 20
 c     
 30   continue
+c
 c     
 c     compute & print statistics after coarsening
    
@ -118,6 +135,16 @@ c     compute & print statistics after coarsening
     *        nun,imin,imax,a,ia,ja,iu,ip,icg,ifg,
     *        b,ib,jb,ipmn,ipmx,iv,xp,yp)

+      write (9,100) dfloat(coarsen_cpu)/100.0,
+     *              dfloat(opdef_cpu)/100.0,
+     *              dfloat(RPdef_cpu)/100.0
+
+100   format(//'CPU Times for Setup Phase:',
+     *       /5x, '           Coarse Gridpoint Selection: ', F10.4,
+     *       /5x, '        Defining coarse-grid operator: ', F10.4,
+     *       /5x, 'Defining intergrid transfer operators: ', F10.4,//)
+
+
          close(9)
      endif

--- a/seq_ls/amg/amg/crsgd.f
+++ b/seq_ls/amg/amg/crsgd.f
@ -3,7 +3,7 @@ c
      subroutine crsgd(ierr,k,nstr,ecg,ncg,ewt,nwt,mmax,icdep,
     *     nun,imin,imax,a,ia,ja,iu,ip,icg,ifg,
     *     b,ib,jb,ipmn,ipmx,iv,xp,yp,
-     *     ndimu,ndimp,ndima,ndimb)
+     *     ndimu,ndimp,ndima,ndimb,coarsen_cpu,RPdef_cpu)
 c     
 c---------------------------------------------------------------------
 c     
@ -210,6 +210,11 @@ c
      dimension iarr(10)
 c     
      dimension icdep(10,10)
+
+      integer*4 coarsen_cpu
+      integer*4 cpu_ticks
+      integer*4 cpu_old
+      integer*4 RPdef_cpu
 c     
 c---------------------------------------------------------------------
 c     
@ -229,6 +234,11 @@ c

      call idec(nwt,5,ndigit,iarr)
      iwts   =iarr(1)
+cveh
+c     test of timing routine
+cveh
+      call amg_cpuclock(cpu_ticks)
+      cpu_old = cpu_ticks
 c     
 c===  > set array for coupled/dependent coarsening
 c     
@ -284,6 +294,10 @@ c===  > test for grid coarse enough
 c     
      call gtest(k,mmax,imin,imax,icg,ifg,
     *     ndimu,ndimp,ndima,ndimb)
+cveh
+      call amg_cpuclock(cpu_ticks)
+      coarsen_cpu = coarsen_cpu + (cpu_ticks-cpu_old)
+cveh
 c     
 c===  > return if coarsest level
 c     
@ -291,6 +305,9 @@ c
 c     
 c===  > load f-rows of b with strong c-connections
 c     
+      call amg_cpuclock(cpu_ticks)
+      cpu_old = cpu_ticks
+
      call bloadf(ierr,k,imin,imax,a,ia,ja,icg,b,ib,jb,
     *     ndimu,ndimp,ndima,ndimb)
      if (ierr .ne. 0) return
@ -311,5 +328,7 @@ c
      call rstdf0(k,imin,imax,icg,b,ib,jb,
     *     ndimu,ndimp,ndima,ndimb)
 c     
+      call amg_cpuclock(cpu_ticks)
+      RPdef_cpu = RPdef_cpu + (cpu_ticks-cpu_old)
      return
      end
--- a/seq_ls/amg/amg/rcs_files
+++ b/seq_ls/amg/amg/rcs_files
@ -23,6 +23,7 @@ f90amg.c
 f90amg_params.c
 f90amg_setup.c
 f90amg_solve.c
+f90amg_timing.c
 fortran.h
 general.h
 headers.h
--- a/seq_ls/amg/amg/setup.f
+++ b/seq_ls/amg/amg/setup.f
@ -38,6 +38,12 @@ c
      dimension icdep(10,10)
 c     
      character*(*)  lfname
+    
+      integer*4 coarsen_cpu
+      integer*4 opdef_cpu
+      integer*4 RPdef_cpu
+      integer*4 cpu_ticks
+      integer*4 cpu_old

 c---------------------------------------------------------------------
 c     initialize the error flag to zero
@ -80,6 +86,10 @@ c
 c     
 c===  > coarsen problem
 c     
+      coarsen_cpu = 0
+      opdef_cpu = 0
+      RPdef_cpu = 0
+
 20   k=k+1
 c     
 c     =>   choose coarse grid and define interpolation
@ -87,7 +97,7 @@ c
      call crsgd(ierr,k-1,nstr,ecg,ncg,ewt,nwt,levels,icdep,
     *     nun,imin,imax,a,ia,ja,iu,ip,icg,ifg,
     *     b,ib,jb,ipmn,ipmx,iv,xp,yp,
-     *     ndimu,ndimp,ndima,ndimb)
+     *     ndimu,ndimp,ndima,ndimb,coarsen_cpu,RPdef_cpu)

      if (ierr .ne. 0) return
 c     
@ -97,6 +107,9 @@ c
 c     
 c     =>   compute coarse grid matrix
 c     
+      call amg_cpuclock(cpu_ticks)
+      cpu_old = cpu_ticks
+
      call opdfn(ierr,k,levels,ndima,
     *     imin,imax,a,ia,ja,icg,ifg,b,ib,jb)
      if (ierr .ne. 0) return
@ -106,10 +119,14 @@ c
      call trunc(k,imin,imax,a,ia,ja)

      call symm(k,isymm,imin,imax,a,ia,ja,icg,ifg)
+
+      call amg_cpuclock(cpu_ticks)
+      opdef_cpu = opdef_cpu + (cpu_ticks-cpu_old)
 c     
      go to 20
 c     
 30   continue
+c
 c     
 c     compute & print statistics after coarsening
    
@ -118,6 +135,16 @@ c     compute & print statistics after coarsening
     *        nun,imin,imax,a,ia,ja,iu,ip,icg,ifg,
     *        b,ib,jb,ipmn,ipmx,iv,xp,yp)

+      write (9,100) dfloat(coarsen_cpu)/100.0,
+     *              dfloat(opdef_cpu)/100.0,
+     *              dfloat(RPdef_cpu)/100.0
+
+100   format(//'CPU Times for Setup Phase:',
+     *       /5x, '           Coarse Gridpoint Selection: ', F10.4,
+     *       /5x, '        Defining coarse-grid operator: ', F10.4,
+     *       /5x, 'Defining intergrid transfer operators: ', F10.4,//)
+
+
          close(9)
      endif