Skip to content

Commit

Permalink
number of flops changed to 1608 from 1320
Browse files Browse the repository at this point in the history
  • Loading branch information
urbach committed Oct 5, 2012
1 parent 2a5affa commit 2cdaaa5
Show file tree
Hide file tree
Showing 7 changed files with 23 additions and 19 deletions.
8 changes: 4 additions & 4 deletions GPU/mixed_solve.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2775,13 +2775,13 @@ void benchmark(spinor * const Q,MixedsolveParameter<RealT>& mixedsolveParameter)
assert((stop = clock())!=-1);
timeelapsed = (double) (stop-start)/CLOCKS_PER_SEC;
// x2 because 2x Hopping per iteration
double benchres = 1320.0*2*(VOLUME/2)* 1000 / timeelapsed / 1.0e9;
double benchres = 1608.0*2*(VOLUME/2)* 1000 / timeelapsed / 1.0e9;
printf("Benchmark: %f Gflops\n", benchres);
#else
stop = MPI_Wtime();
timeelapsed = (double) (stop-start);
// x2 because 2x Hopping per iteration
double benchres = 1320.0*2*(g_nproc*VOLUME/2)* 1000 / timeelapsed / 1.0e9;
double benchres = 1608.0*2*(g_nproc*VOLUME/2)* 1000 / timeelapsed / 1.0e9;
if (g_proc_id == 0) {
printf("Benchmark: %f Gflops\n", benchres);
}
Expand Down Expand Up @@ -2894,13 +2894,13 @@ void benchmark2(spinor * const Q,MixedsolveParameter<RealT>& mixedsolveParameter
assert((stop = clock())!=-1);
timeelapsed = (double) (stop-start)/CLOCKS_PER_SEC;
// x8 because 8x Hopping per iteration
double benchres = 1320.0*8*(VOLUME/2)* 100 / timeelapsed / 1.0e9;
double benchres = 1608.0*8*(VOLUME/2)* 100 / timeelapsed / 1.0e9;
printf("Benchmark: %f Gflops\n", benchres);
#else
stop = MPI_Wtime();
timeelapsed = (double) (stop-start);
// 8 because 8x Hopping per iteration
double benchres = 1320.0*8*(g_nproc*VOLUME/2)* 100 / timeelapsed / 1.0e9;
double benchres = 1608.0*8*(g_nproc*VOLUME/2)* 100 / timeelapsed / 1.0e9;
if (g_proc_id == 0) {
printf("Benchmark: %f Gflops\n", benchres);
}
Expand Down
2 changes: 1 addition & 1 deletion GPU/mixed_solve_eo_nd.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -3486,7 +3486,7 @@ extern "C" int mixedsolve_eo_nd (spinor * P_up, spinor * P_dn,
#ifdef ALGORITHM_BENCHMARK
double effectiveflops; // will used to count the "effective" flop's (from the algorithmic perspective)
// double hoppingflops = 1488.0;
double hoppingflops = 1320.0;
double hoppingflops = 1608.0;
double matrixflops = 2 * ( 2 * ( (2*hoppingflops+12+3) + (2*hoppingflops+3) + (12+2) + 12 ) );
#ifdef MPI
double allflops; // flops added for all processes
Expand Down
12 changes: 6 additions & 6 deletions benchmark.c
Original file line number Diff line number Diff line change
Expand Up @@ -315,9 +315,9 @@ int main(int argc,char *argv[])
if(g_proc_id==0) {
printf("# The following result is just to make sure that the calculation is not optimized away: %e\n", antioptaway);
printf("# Total compute time %e sec, variance of the time %e sec. (%d iterations).\n", sdt, sqdt, j_max);
printf("# Communication switched on:\n# (%d Mflops [%d bit arithmetic])\n", (int)(1320.0f/sdt),(int)sizeof(spinor)/3);
printf("# Communication switched on:\n# (%d Mflops [%d bit arithmetic])\n", (int)(1608.0f/sdt),(int)sizeof(spinor)/3);
#ifdef OMP
printf("# Mflops per OpenMP thread ~ %d\n",(int)(1320.0f/(omp_num_threads*sdt)));
printf("# Mflops per OpenMP thread ~ %d\n",(int)(1608.0f/(omp_num_threads*sdt)));
#endif
printf("\n");
fflush(stdout);
Expand Down Expand Up @@ -345,9 +345,9 @@ int main(int argc,char *argv[])
dt=1.0e6f*dt/((double)(k_max*j_max*(VOLUME)));
if(g_proc_id==0) {
printf("# The following result is printed just to make sure that the calculation is not optimized away: %e\n",antioptaway);
printf("# Communication switched off: \n# (%d Mflops [%d bit arithmetic])\n", (int)(1320.0f/dt),(int)sizeof(spinor)/3);
printf("# Communication switched off: \n# (%d Mflops [%d bit arithmetic])\n", (int)(1608.0f/dt),(int)sizeof(spinor)/3);
#ifdef OMP
printf("# Mflops per OpenMP thread ~ %d\n",(int)(1320.0f/(omp_num_threads*dt)));
printf("# Mflops per OpenMP thread ~ %d\n",(int)(1608.0f/(omp_num_threads*dt)));
#endif
printf("\n");
fflush(stdout);
Expand Down Expand Up @@ -411,9 +411,9 @@ int main(int argc,char *argv[])
if(g_proc_id==0) {
printf("# The following result is just to make sure that the calculation is not optimized away: %e\n", antioptaway);
printf("# Total compute time %e sec, variance of the time %e sec. (%d iterations).\n", sdt, sqdt, j_max);
printf("\n# (%d Mflops [%d bit arithmetic])\n", (int)(1392.0f/sdt),(int)sizeof(spinor)/3);
printf("\n# (%d Mflops [%d bit arithmetic])\n", (int)(1680.0f/sdt),(int)sizeof(spinor)/3);
#ifdef OMP
printf("# Mflops per OpenMP thread ~ %d\n",(int)(1320.0f/(omp_num_threads*sdt)));
printf("# Mflops per OpenMP thread ~ %d\n",(int)(1680.0f/(omp_num_threads*sdt)));
#endif
printf("\n");
fflush(stdout);
Expand Down
3 changes: 0 additions & 3 deletions benchmark.input

This file was deleted.

4 changes: 2 additions & 2 deletions linsolve.c
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,8 @@ int solve_cg(spinor * const k, spinor * const l, double eps_sq, const int rel_pr
}
etime = gettime();
/* 2 A + 2 Nc Ns + N_Count ( 2 A + 10 Nc Ns ) */
/* 2*1320.0 because the linalg is over VOLUME/2 */
flops = (2*(2*1320.0+2*3*4) + 2*3*4 + iteration*(2.*(2*1320.0+2*3*4) + 10*3*4))*VOLUME/2/1.0e6f;
/* 2*1608.0 because the linalg is over VOLUME/2 */
flops = (2*(2*1608.0+2*3*4) + 2*3*4 + iteration*(2.*(2*1608.0+2*3*4) + 10*3*4))*VOLUME/2/1.0e6f;
if(g_proc_id==0 && g_debug_level > 0) {
printf("CG: iter: %d eps_sq: %1.4e t/s: %1.4e\n", iteration, eps_sq, etime-atime);
printf("CG: flopcount: t/s: %1.4e mflops_local: %.1f mflops: %.1f\n",
Expand Down
7 changes: 7 additions & 0 deletions sample-input/benchmark.input
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
T=48
L=24
NrXProcs = 1
NrYProcs = 1
NrZProcs = 1

OMPNumThreads = 1
6 changes: 3 additions & 3 deletions solver/cg_her.c
Original file line number Diff line number Diff line change
Expand Up @@ -127,11 +127,11 @@ int cg_her(spinor * const P, spinor * const Q, const int max_iter,
etime = gettime();
g_sloppy_precision = save_sloppy;
/* 2 A + 2 Nc Ns + N_Count ( 2 A + 10 Nc Ns ) */
/* 2*1320.0 because the linalg is over VOLUME/2 */
flops = (2*(2*1320.0+2*3*4) + 2*3*4 + iteration*(2.*(2*1320.0+2*3*4) + 10*3*4))*N/1.0e6f;
/* 2*1608.0 because the linalg is over VOLUME/2 */
flops = (2*(2*1608.0+2*3*4) + 2*3*4 + iteration*(2.*(2*1608.0+2*3*4) + 10*3*4))*N/1.0e6f;
if(g_debug_level > 0 && g_proc_id == 0 && N != VOLUME) {
printf("# CG: iter: %d eps_sq: %1.4e t/s: %1.4e\n", iteration, eps_sq, etime-atime);
printf("# CG: flopcount (for tmWilson only): t/s: %1.4e mflops_local: %.1f mflops: %.1f\n",
printf("# CG: flopcount (for e/o tmWilson only): t/s: %1.4e mflops_local: %.1f mflops: %.1f\n",
etime-atime, flops/(etime-atime), g_nproc*flops/(etime-atime));
}
finalize_solver(solver_field, nr_sf);
Expand Down

0 comments on commit 2cdaaa5

Please sign in to comment.