hypre/utilities/timing.c
falgout e3181f26b1 Added 64 bit feature using HYPRE_Int (see tracker [issue489] for details).
Changed MPI routines to hypre_MPI routines.
Added hypre_printf, etc. routines.
Added AUTOTEST tests to look for 'int' and 'MPI_' calls.
Added a new approach for the Fortran interface (not implemented everywhere yet).
2010-12-20 19:27:44 +00:00

632 lines
18 KiB
C

/*BHEADER**********************************************************************
* Copyright (c) 2008, Lawrence Livermore National Security, LLC.
* Produced at the Lawrence Livermore National Laboratory.
* This file is part of HYPRE. See file COPYRIGHT for details.
*
* HYPRE is free software; you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License (as published by the Free
* Software Foundation) version 2.1 dated February 1999.
*
* $Revision$
***********************************************************************EHEADER*/
/******************************************************************************
*
* Routines for doing timing.
*
*****************************************************************************/
#define HYPRE_TIMING_GLOBALS
#include "_hypre_utilities.h"
#include "timing.h"
/*-------------------------------------------------------
* Timing macros
*-------------------------------------------------------*/
#define hypre_StartTiming() \
hypre_TimingWallCount -= time_getWallclockSeconds();\
hypre_TimingCPUCount -= time_getCPUSeconds()
#define hypre_StopTiming() \
hypre_TimingWallCount += time_getWallclockSeconds();\
hypre_TimingCPUCount += time_getCPUSeconds()
#ifndef HYPRE_USE_PTHREADS
#define hypre_global_timing_ref(index,field) hypre_global_timing->field
#else
#define hypre_global_timing_ref(index,field) \
hypre_global_timing[index].field
#endif
/*--------------------------------------------------------------------------
* hypre_InitializeTiming
*--------------------------------------------------------------------------*/
HYPRE_Int
hypre_InitializeTiming( const char *name )
{
HYPRE_Int time_index;
double *old_wall_time;
double *old_cpu_time;
double *old_flops;
char **old_name;
HYPRE_Int *old_state;
HYPRE_Int *old_num_regs;
HYPRE_Int new_name;
HYPRE_Int i;
#ifdef HYPRE_USE_PTHREADS
HYPRE_Int threadid = hypre_GetThreadID();
#endif
/*-------------------------------------------------------
* Allocate global TimingType structure if needed
*-------------------------------------------------------*/
if (hypre_global_timing == NULL)
{
#ifndef HYPRE_USE_PTHREADS
hypre_global_timing = hypre_CTAlloc(hypre_TimingType, 1);
#else
hypre_global_timing = hypre_CTAlloc(hypre_TimingType,
hypre_NumThreads + 1);
#endif
}
/*-------------------------------------------------------
* Check to see if name has already been registered
*-------------------------------------------------------*/
new_name = 1;
for (i = 0; i < (hypre_global_timing_ref(threadid, size)); i++)
{
if (hypre_TimingNumRegs(i) > 0)
{
if (strcmp(name, hypre_TimingName(i)) == 0)
{
new_name = 0;
time_index = i;
hypre_TimingNumRegs(time_index) ++;
break;
}
}
}
if (new_name)
{
for (i = 0; i < hypre_global_timing_ref(threadid ,size); i++)
{
if (hypre_TimingNumRegs(i) == 0)
{
break;
}
}
time_index = i;
}
/*-------------------------------------------------------
* Register the new timing name
*-------------------------------------------------------*/
if (new_name)
{
if (time_index == (hypre_global_timing_ref(threadid, size)))
{
old_wall_time = (hypre_global_timing_ref(threadid, wall_time));
old_cpu_time = (hypre_global_timing_ref(threadid, cpu_time));
old_flops = (hypre_global_timing_ref(threadid, flops));
old_name = (hypre_global_timing_ref(threadid, name));
old_state = (hypre_global_timing_ref(threadid, state));
old_num_regs = (hypre_global_timing_ref(threadid, num_regs));
(hypre_global_timing_ref(threadid, wall_time)) =
hypre_CTAlloc(double, (time_index+1));
(hypre_global_timing_ref(threadid, cpu_time)) =
hypre_CTAlloc(double, (time_index+1));
(hypre_global_timing_ref(threadid, flops)) =
hypre_CTAlloc(double, (time_index+1));
(hypre_global_timing_ref(threadid, name)) =
hypre_CTAlloc(char *, (time_index+1));
(hypre_global_timing_ref(threadid, state)) =
hypre_CTAlloc(HYPRE_Int, (time_index+1));
(hypre_global_timing_ref(threadid, num_regs)) =
hypre_CTAlloc(HYPRE_Int, (time_index+1));
(hypre_global_timing_ref(threadid, size)) ++;
for (i = 0; i < time_index; i++)
{
hypre_TimingWallTime(i) = old_wall_time[i];
hypre_TimingCPUTime(i) = old_cpu_time[i];
hypre_TimingFLOPS(i) = old_flops[i];
hypre_TimingName(i) = old_name[i];
hypre_TimingState(i) = old_state[i];
hypre_TimingNumRegs(i) = old_num_regs[i];
}
hypre_TFree(old_wall_time);
hypre_TFree(old_cpu_time);
hypre_TFree(old_flops);
hypre_TFree(old_name);
hypre_TFree(old_state);
hypre_TFree(old_num_regs);
}
hypre_TimingName(time_index) = hypre_CTAlloc(char, 80);
strncpy(hypre_TimingName(time_index), name, 79);
hypre_TimingState(time_index) = 0;
hypre_TimingNumRegs(time_index) = 1;
(hypre_global_timing_ref(threadid, num_names)) ++;
}
return time_index;
}
/*--------------------------------------------------------------------------
* hypre_FinalizeTiming
*--------------------------------------------------------------------------*/
HYPRE_Int
hypre_FinalizeTiming( HYPRE_Int time_index )
{
HYPRE_Int ierr = 0;
HYPRE_Int i;
#ifdef HYPRE_USE_PTHREADS
HYPRE_Int threadid = hypre_GetThreadID();
HYPRE_Int free_global_timing;
#endif
if (hypre_global_timing == NULL)
return ierr;
if (time_index < (hypre_global_timing_ref(threadid, size)))
{
if (hypre_TimingNumRegs(time_index) > 0)
{
hypre_TimingNumRegs(time_index) --;
}
if (hypre_TimingNumRegs(time_index) == 0)
{
hypre_TFree(hypre_TimingName(time_index));
(hypre_global_timing_ref(threadid, num_names)) --;
}
}
#ifdef HYPRE_USE_PTHREADS
free_global_timing = 1;
for (i = 0; i <= hypre_NumThreads; i++)
{
if (hypre_global_timing_ref(i, num_names))
{
free_global_timing = 0;
break;
}
}
if (free_global_timing)
{
pthread_mutex_lock(&time_mtx);
if(hypre_global_timing)
{
for (i = 0; i <= hypre_NumThreads; i++)
{
hypre_TFree(hypre_global_timing_ref(i, wall_time));
hypre_TFree(hypre_global_timing_ref(i, cpu_time));
hypre_TFree(hypre_global_timing_ref(i, flops));
hypre_TFree(hypre_global_timing_ref(i, name));
hypre_TFree(hypre_global_timing_ref(i, state));
hypre_TFree(hypre_global_timing_ref(i, num_regs));
}
hypre_TFree(hypre_global_timing);
hypre_global_timing = NULL;
}
pthread_mutex_unlock(&time_mtx);
}
#else
if ((hypre_global_timing -> num_names) == 0)
{
for (i = 0; i < (hypre_global_timing -> size); i++)
{
hypre_TFree(hypre_global_timing_ref(i, wall_time));
hypre_TFree(hypre_global_timing_ref(i, cpu_time));
hypre_TFree(hypre_global_timing_ref(i, flops));
hypre_TFree(hypre_global_timing_ref(i, name));
hypre_TFree(hypre_global_timing_ref(i, state));
hypre_TFree(hypre_global_timing_ref(i, num_regs));
}
hypre_TFree(hypre_global_timing);
hypre_global_timing = NULL;
}
#endif
return ierr;
}
/*--------------------------------------------------------------------------
* hypre_IncFLOPCount
*--------------------------------------------------------------------------*/
HYPRE_Int
hypre_IncFLOPCount( HYPRE_Int inc )
{
HYPRE_Int ierr = 0;
#ifdef HYPRE_USE_PTHREADS
HYPRE_Int threadid = hypre_GetThreadID();
#endif
if (hypre_global_timing == NULL)
return ierr;
hypre_TimingFLOPCount += (double) (inc);
#ifdef HYPRE_USE_PTHREADS
if (threadid != hypre_NumThreads)
{
pthread_mutex_lock(&time_mtx);
hypre_TimingAllFLOPS += (double) (inc);
pthread_mutex_unlock(&time_mtx);
}
#endif
return ierr;
}
/*--------------------------------------------------------------------------
* hypre_BeginTiming
*--------------------------------------------------------------------------*/
HYPRE_Int
hypre_BeginTiming( HYPRE_Int time_index )
{
HYPRE_Int ierr = 0;
#ifdef HYPRE_USE_PTHREADS
HYPRE_Int threadid = hypre_GetThreadID();
#endif
if (hypre_global_timing == NULL)
return ierr;
if (hypre_TimingState(time_index) == 0)
{
hypre_StopTiming();
hypre_TimingWallTime(time_index) -= hypre_TimingWallCount;
hypre_TimingCPUTime(time_index) -= hypre_TimingCPUCount;
#ifdef HYPRE_USE_PTHREADS
if (threadid != hypre_NumThreads)
hypre_TimingFLOPS(time_index) -= hypre_TimingFLOPCount;
else
hypre_TimingFLOPS(time_index) -= hypre_TimingAllFLOPS;
#else
hypre_TimingFLOPS(time_index) -= hypre_TimingFLOPCount;
#endif
hypre_StartTiming();
}
hypre_TimingState(time_index) ++;
return ierr;
}
/*--------------------------------------------------------------------------
* hypre_EndTiming
*--------------------------------------------------------------------------*/
HYPRE_Int
hypre_EndTiming( HYPRE_Int time_index )
{
HYPRE_Int ierr = 0;
#ifdef HYPRE_USE_PTHREADS
HYPRE_Int threadid = hypre_GetThreadID();
#endif
if (hypre_global_timing == NULL)
return ierr;
hypre_TimingState(time_index) --;
if (hypre_TimingState(time_index) == 0)
{
hypre_StopTiming();
hypre_TimingWallTime(time_index) += hypre_TimingWallCount;
hypre_TimingCPUTime(time_index) += hypre_TimingCPUCount;
#ifdef HYPRE_USE_PTHREADS
if (threadid != hypre_NumThreads)
hypre_TimingFLOPS(time_index) += hypre_TimingFLOPCount;
else
hypre_TimingFLOPS(time_index) += hypre_TimingAllFLOPS;
#else
hypre_TimingFLOPS(time_index) += hypre_TimingFLOPCount;
#endif
hypre_StartTiming();
}
return ierr;
}
/*--------------------------------------------------------------------------
* hypre_ClearTiming
*--------------------------------------------------------------------------*/
HYPRE_Int
hypre_ClearTiming( )
{
HYPRE_Int ierr = 0;
HYPRE_Int i;
#ifdef HYPRE_USE_PTHREADS
HYPRE_Int threadid = hypre_GetThreadID();
#endif
if (hypre_global_timing == NULL)
return ierr;
for (i = 0; i < (hypre_global_timing_ref(threadid,size)); i++)
{
hypre_TimingWallTime(i) = 0.0;
hypre_TimingCPUTime(i) = 0.0;
hypre_TimingFLOPS(i) = 0.0;
}
return ierr;
}
/*--------------------------------------------------------------------------
* hypre_PrintTiming
*--------------------------------------------------------------------------*/
#ifndef HYPRE_USE_PTHREADS /* non-threaded version of hypre_PrintTiming */
HYPRE_Int
hypre_PrintTiming( const char *heading,
MPI_Comm comm )
{
HYPRE_Int ierr = 0;
double local_wall_time;
double local_cpu_time;
double wall_time;
double cpu_time;
double wall_mflops;
double cpu_mflops;
HYPRE_Int i;
HYPRE_Int myrank;
if (hypre_global_timing == NULL)
return ierr;
hypre_MPI_Comm_rank(comm, &myrank );
/* print heading */
if (myrank == 0)
{
hypre_printf("=============================================\n");
hypre_printf("%s:\n", heading);
hypre_printf("=============================================\n");
}
for (i = 0; i < (hypre_global_timing -> size); i++)
{
if (hypre_TimingNumRegs(i) > 0)
{
local_wall_time = hypre_TimingWallTime(i);
local_cpu_time = hypre_TimingCPUTime(i);
hypre_MPI_Allreduce(&local_wall_time, &wall_time, 1,
hypre_MPI_DOUBLE, hypre_MPI_MAX, comm);
hypre_MPI_Allreduce(&local_cpu_time, &cpu_time, 1,
hypre_MPI_DOUBLE, hypre_MPI_MAX, comm);
if (myrank == 0)
{
hypre_printf("%s:\n", hypre_TimingName(i));
/* print wall clock info */
hypre_printf(" wall clock time = %f seconds\n", wall_time);
if (wall_time)
wall_mflops = hypre_TimingFLOPS(i) / wall_time / 1.0E6;
else
wall_mflops = 0.0;
hypre_printf(" wall MFLOPS = %f\n", wall_mflops);
/* print CPU clock info */
hypre_printf(" cpu clock time = %f seconds\n", cpu_time);
if (cpu_time)
cpu_mflops = hypre_TimingFLOPS(i) / cpu_time / 1.0E6;
else
cpu_mflops = 0.0;
hypre_printf(" cpu MFLOPS = %f\n\n", cpu_mflops);
}
}
}
return ierr;
}
#else /* threaded version of hypre_PrintTiming */
#ifdef hypre_MPI_Comm_rank
#undef hypre_MPI_Comm_rank
#endif
#ifdef hypre_MPI_Allreduce
#undef hypre_MPI_Allreduce
#endif
HYPRE_Int
hypre_PrintTiming( const char *heading,
MPI_Comm comm )
{
HYPRE_Int ierr = 0;
double local_wall_time;
double local_cpu_time;
double wall_time;
double cpu_time;
double wall_mflops;
double cpu_mflops;
HYPRE_Int i, j, index;
HYPRE_Int myrank;
HYPRE_Int my_thread = hypre_GetThreadID();
HYPRE_Int threadid;
HYPRE_Int max_size;
HYPRE_Int num_regs;
char target_name[32];
if (my_thread == hypre_NumThreads)
{
if (hypre_global_timing == NULL)
return ierr;
hypre_MPI_Comm_rank(comm, &myrank );
/* print heading */
if (myrank == 0)
{
hypre_printf("=============================================\n");
hypre_printf("%s:\n", heading);
hypre_printf("=============================================\n");
}
for (i = 0; i < 7; i++)
{
switch (i)
{
case 0:
threadid = my_thread;
strcpy(target_name, hypre_TimingName(i));
break;
case 1:
strcpy(target_name, "SMG");
break;
case 2:
strcpy(target_name, "SMGRelax");
break;
case 3:
strcpy(target_name, "SMGResidual");
break;
case 4:
strcpy(target_name, "CyclicReduction");
break;
case 5:
strcpy(target_name, "SMGIntAdd");
break;
case 6:
strcpy(target_name, "SMGRestrict");
break;
}
threadid = 0;
for (j = 0; j < hypre_global_timing[threadid].size; j++)
{
if (strcmp(target_name, hypre_TimingName(j)) == 0)
{
index = j;
break;
}
else
index = -1;
}
if (i < hypre_global_timing[my_thread].size)
{
threadid = my_thread;
num_regs = hypre_TimingNumRegs(i);
}
else
num_regs = hypre_TimingNumRegs(index);
if (num_regs > 0)
{
local_wall_time = 0.0;
local_cpu_time = 0.0;
if (index >= 0)
{
for (threadid = 0; threadid < hypre_NumThreads; threadid++)
{
local_wall_time =
hypre_max(local_wall_time, hypre_TimingWallTime(index));
local_cpu_time =
hypre_max(local_cpu_time, hypre_TimingCPUTime(index));
}
}
if (i < hypre_global_timing[my_thread].size)
{
threadid = my_thread;
local_wall_time += hypre_TimingWallTime(i);
local_cpu_time += hypre_TimingCPUTime(i);
}
hypre_MPI_Allreduce(&local_wall_time, &wall_time, 1,
hypre_MPI_DOUBLE, hypre_MPI_MAX, comm);
hypre_MPI_Allreduce(&local_cpu_time, &cpu_time, 1,
hypre_MPI_DOUBLE, hypre_MPI_MAX, comm);
if (myrank == 0)
{
hypre_printf("%s:\n", target_name);
/* print wall clock info */
hypre_printf(" wall clock time = %f seconds\n", wall_time);
wall_mflops = 0.0;
if (wall_time)
{
if (index >= 0)
{
for (threadid = 0; threadid < hypre_NumThreads; threadid++)
{
wall_mflops +=
hypre_TimingFLOPS(index) / wall_time / 1.0E6;
}
}
if (i < hypre_global_timing[my_thread].size)
{
threadid = my_thread;
wall_mflops += hypre_TimingFLOPS(i) / wall_time / 1.0E6;
}
}
hypre_printf(" wall MFLOPS = %f\n", wall_mflops);
/* print CPU clock info */
hypre_printf(" cpu clock time = %f seconds\n", cpu_time);
cpu_mflops = 0.0;
if (cpu_time)
{
if (index >= 0)
{
for (threadid = 0; threadid < hypre_NumThreads; threadid++)
{
cpu_mflops +=
hypre_TimingFLOPS(index) / cpu_time / 1.0E6;
}
}
if (i < hypre_global_timing[my_thread].size)
{
threadid = my_thread;
cpu_mflops += hypre_TimingFLOPS(i) / cpu_time / 1.0E6;
}
}
hypre_printf(" cpu MFLOPS = %f\n\n", cpu_mflops);
}
}
}
}
return ierr;
}
#endif