Skip to content

Commit

Permalink
Do not use bgq_dispatch if BGQ_REPLACE=0 for invert,hmc_tm,benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
Meinersbur committed Jan 14, 2013
1 parent 19ffb6a commit 53b6126
Show file tree
Hide file tree
Showing 9 changed files with 165 additions and 134 deletions.
4 changes: 4 additions & 0 deletions benchmark.c
Original file line number Diff line number Diff line change
Expand Up @@ -463,5 +463,9 @@ static int main_benchmark(int argc,char *argv[])


int main(int argc, char *argv[]) {
#if BGQ_REPLACE
return bgq_parallel_mainlike(&main_benchmark, argc, argv);
#else
main_benchmark(argc, argv);
#endif
}
4 changes: 4 additions & 0 deletions bgq/bgq_HoppingMatrix.c
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,11 @@ void bgq_HoppingMatrix(bool isOdd, bgq_weylfield_controlblock *targetfield, bgq_


if(g_update_gauge_copy) {
#if BGQ_REPLACE
update_backward_gauge(g_gauge_field);
#else
bgq_gaugefield_transferfrom(g_gauge_field);
#endif
}


Expand Down
10 changes: 7 additions & 3 deletions bgq/bgq_dispatch.c
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,12 @@ int bgq_parallel(bgq_master_func master_func, void *master_arg) {
// We use OpenMP only to start the threads
// Overhead of using OpenMP is too large
#pragma omp parallel
{
{ //*((char*)NULL)=0;
size_t tid = omp_get_thread_num(); // Or
if (tid == 0) {
//int threads = omp_get_num_threads();
//master_print("Entered parallel control with %d threads\n", threads);
}

// Start workers
if (tid != 0) {
Expand Down Expand Up @@ -136,7 +140,7 @@ void bgq_worker() {


//assert((tid != 0) && "This function is for non-master threads only");
//size_t count = 0;
//size_t count = 0;
while (true) {
// Wait until every thread did its work
// This doesn't need to be a barrier, waiting for submission of some work from the master is ok too
Expand Down Expand Up @@ -177,7 +181,7 @@ void bgq_worker() {
//printf("%*sCALL: tid=%u seq=%u\n", (int)tid*20, "",(int)tid, (int)g_bgq_dispatch_seq);
assert(g_bgq_dispatch_func);
void *arg = g_bgq_dispatch_arg;
g_bgq_dispatch_func(arg, tid, threads); //TODO: Shuffle tid to load-balance work?
g_bgq_dispatch_func(arg, tid, threads); //TODO: Shuffle tid to loadbalance work?
}

if (tid==0) {
Expand Down
112 changes: 1 addition & 111 deletions bgqbench.c
Original file line number Diff line number Diff line change
Expand Up @@ -1450,119 +1450,9 @@ int main(int argc, char *argv[]) {
/* BEGIN MK */
assert(even_odd_flag);
exec_bench(j_max, k_max);
return 0;
/* END MK */

while (sdt < 30.) {
#ifdef MPI
MPI_Barrier(MPI_COMM_WORLD);
#endif
t1 = gettime();
antioptaway = 0.0;
for (j = 0; j < j_max; j++) {
for (k = 0; k < k_max; k++) {
Hopping_Matrix(0, g_spinor_field[k + k_max], g_spinor_field[k]);
Hopping_Matrix(1, g_spinor_field[k], g_spinor_field[k + k_max]);
antioptaway += creal(g_spinor_field[2 * k_max][0].s0.c0);
}
}
t2 = gettime();
dt = t2 - t1;
#ifdef MPI
MPI_Allreduce (&dt, &sdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
#else
sdt = dt;
#endif
qdt = dt * dt;
#ifdef MPI
MPI_Allreduce (&qdt, &sqdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
#else
sqdt = qdt;
#endif
sdt = sdt / ((double) g_nproc);
sqdt = sqrt(sqdt / g_nproc - sdt * sdt);
j_max *= 2;
}
j_max = j_max / 2;
dts = dt;
sdt = 1.0e6f * sdt / ((double) (k_max * j_max * (VOLUME)));
sqdt = 1.0e6f * sqdt / ((double) (k_max * j_max * (VOLUME)));

if (g_proc_id == 0) {
printf("# The following result is just to make sure that the calculation is not optimized away: %e\n", antioptaway);
printf("# Total compute time %e sec, variance of the time %e sec. (%d iterations).\n", sdt, sqdt, j_max);
printf("# Communication switched on:\n# (%d Mflops [%d bit arithmetic])\n", (int) (1608.0f / sdt), (int) sizeof(spinor) / 3);
#ifdef OMP
printf("# Mflops per OpenMP thread ~ %d\n",(int)(1608.0f/(omp_num_threads*sdt)));
#endif
printf("\n");
fflush(stdout);
}

#ifdef MPI
/* isolated computation */
t1 = gettime();
antioptaway=0.0;
for (j=0;j<j_max;j++) {
for (k=0;k<k_max;k++) {
Hopping_Matrix_nocom(0, g_spinor_field[k+k_max], g_spinor_field[k]);
Hopping_Matrix_nocom(1, g_spinor_field[2*k_max], g_spinor_field[k+k_max]);
antioptaway += creal(g_spinor_field[2*k_max][0].s0.c0);
}
}
t2 = gettime();
dt2 = t2-t1;
/* compute the bandwidth */
dt=dts-dt2;
MPI_Allreduce (&dt, &sdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
sdt=sdt/((double)g_nproc);
MPI_Allreduce (&dt2, &dt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
dt=dt/((double)g_nproc);
dt=1.0e6f*dt/((double)(k_max*j_max*(VOLUME)));
if(g_proc_id==0) {
printf("# The following result is printed just to make sure that the calculation is not optimized away: %e\n",antioptaway);
printf("# Communication switched off: \n# (%d Mflops [%d bit arithmetic])\n", (int)(1608.0f/dt),(int)sizeof(spinor)/3);
#ifdef OMP
printf("# Mflops per OpenMP thread ~ %d\n",(int)(1608.0f/(omp_num_threads*dt)));
#endif
printf("\n");
fflush(stdout);
}
sdt=sdt/((double)k_max);
sdt=sdt/((double)j_max);
sdt=sdt/((double)(2*SLICE));
if(g_proc_id==0) {
printf("# The size of the package is %d bytes.\n",(SLICE)*192);
#ifdef _USE_HALFSPINOR
printf("# The bandwidth is %5.2f + %5.2f MB/sec\n", 192./sdt/1024/1024, 192./sdt/1024./1024);
#else
printf("# The bandwidth is %5.2f + %5.2f MB/sec\n", 2.*192./sdt/1024/1024, 2.*192./sdt/1024./1024);
#endif
}
#endif
fflush(stdout);

#ifdef HAVE_LIBLEMON
if(g_proc_id==0) {
printf("# Performing parallel IO test ...\n");
}
xlfInfo = construct_paramsXlfInfo(0.5, 0);
write_gauge_field( "conf.test", 64, xlfInfo);
free(xlfInfo);
if(g_proc_id==0) {
printf("# done ...\n");
}
#endif

#ifdef MPI
MPI_Finalize();
#endif
#ifdef OMP
free_omp_accumulators();
#endif
free_gauge_field();
free_geometry_indices();
free_spinor_field();
free_moment_field();
return (0);
return 0;
}
4 changes: 2 additions & 2 deletions build-bgq.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ CPPFLAGS="${CPPFLAGS} -DBGQ_QPX=1"
CPPFLAGS="${CPPFLAGS} -DXLC=1"
#CPPFLAGS="${CPPFLAGS} -DBGQ=1"
CPPFLAGS="${CPPFLAGS} -DPAPI=1"
CPPFLAGS="${CPPFLAGS} -DBGQ_UNVECTORIZE=1"
CPPFLAGS="${CPPFLAGS} -DBGQ_UNVECTORIZE=0"
#CPPFLAGS="${CPPFLAGS} -DBGQ_COORDCHECK=1"
CPPFLAGS="${CPPFLAGS} -DBGQ_REPLACE=1"
CPPFLAGS="${CPPFLAGS} -DBGQ_REPLACE=0"


CFLAGS=""
Expand Down
4 changes: 4 additions & 0 deletions hmc_tm.c
Original file line number Diff line number Diff line change
Expand Up @@ -615,5 +615,9 @@ int main_hmc(int argc,char *argv[]) {


int main(int argc, char *argv[]) {
#if BGQ_REPLACE
return bgq_parallel_mainlike(&main_hmc, argc, argv);
#else
main_hmc(argc, argv);
#endif
}
4 changes: 4 additions & 0 deletions invert.c
Original file line number Diff line number Diff line change
Expand Up @@ -586,5 +586,9 @@ static int main_invert(int argc, char *argv[])


int main(int argc, char *argv[]) {
#if BGQ_REPLACE
return bgq_parallel_mainlike(&main_invert, argc, argv);
#else
main_invert(argc, argv);
#endif
}
Loading

0 comments on commit 53b6126

Please sign in to comment.