Skip to content

Commit

Permalink
Base version for BG/Q that is actually working (with disapointing per…
Browse files Browse the repository at this point in the history
…formance)
  • Loading branch information
Meinersbur committed Nov 12, 2012
1 parent 8b37a8d commit 4c2dc40
Show file tree
Hide file tree
Showing 16 changed files with 315 additions and 273 deletions.
2 changes: 2 additions & 0 deletions .cproject
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
<listOptionValue builtIn="false" value="BGQ_QPX=1"/>
<listOptionValue builtIn="false" value="SPI=1"/>
<listOptionValue builtIn="false" value="MPI=1"/>
<listOptionValue builtIn="false" value="PAPI=1"/>
</option>
<option id="gnu.cpp.compiler.option.include.paths.99280228" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="/usr/include/openmpi"/>
Expand All @@ -38,6 +39,7 @@
<listOptionValue builtIn="false" value="BGQ_QPX=1"/>
<listOptionValue builtIn="false" value="SPI=1"/>
<listOptionValue builtIn="false" value="MPI=1"/>
<listOptionValue builtIn="false" value="PAPI=1"/>
</option>
<option id="gnu.c.compiler.option.include.paths.318363256" name="Include paths (-I)" superClass="gnu.c.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="/usr/include/openmpi"/>
Expand Down
2 changes: 1 addition & 1 deletion DirectPut.c
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@ int msg_InjFifoInit ( msg_InjFifoHandle_t *injFifoHandlePtr,
return rc;
}

if (g_proc_id==0) printf( "HW freespace=%lx\n", MUSPI_getHwFreeSpace(MUSPI_IdToInjFifo(fifoIds[i],&info->subgroup[subgroupId])) );
//if (g_proc_id==0) printf( "HW freespace=%lx\n", MUSPI_getHwFreeSpace(MUSPI_IdToInjFifo(fifoIds[i],&info->subgroup[subgroupId])) );
}

// Activate the fifos.
Expand Down
33 changes: 22 additions & 11 deletions bgq/bgq_HoppingMatrix.c
Original file line number Diff line number Diff line change
Expand Up @@ -215,12 +215,11 @@ static void bgq_HoppingMatrix_worker_body(void *argptr, size_t tid, size_t threa
}



static inline void bgq_HoppingMatrix_worker_readFulllayout(void * restrict arg, size_t tid, size_t threads, bool kamul, bool readFulllayout) {
static inline void bgq_HoppingMatrix_worker(void * restrict arg, size_t tid, size_t threads, bool kamul, bool readFulllayout) {
bgq_HoppingMatrix_workload *work = arg;
bool isOdd = work->isOdd_src;
bgq_weylfield_controlblock *spinorfield = work->spinorfield;
bgq_weylfield_controlblock *targetfield = work->targetfield;
bgq_weylfield_controlblock * restrict spinorfield = work->spinorfield;
bgq_weylfield_controlblock * restrict targetfield = work->targetfield;
ucoord ic_begin = work->ic_begin;
ucoord ic_end = work->ic_end;
bool noprefetchstream = work->noprefetchstream;
Expand Down Expand Up @@ -260,14 +259,27 @@ static inline void bgq_HoppingMatrix_worker_readFulllayout(void * restrict arg,

bgq_gaugesite *gaugesite = &g_bgq_gaugefield_fromCollapsed[isOdd][ic];
bgq_weyl_ptr_t *destptrs = &targetfield->sendptr[ic];
#if 0
bgq_weyl_ptr_t destptrsx = {
&targetfield->sec_collapsed->d[TUP],
&targetfield->sec_collapsed->d[TDOWN],
&targetfield->sec_collapsed->d[XUP],
&targetfield->sec_collapsed->d[XDOWN],
&targetfield->sec_collapsed->d[YUP],
&targetfield->sec_collapsed->d[YDOWN],
&targetfield->sec_collapsed->d[ZUP],
&targetfield->sec_collapsed->d[ZDOWN]
};
bgq_weyl_ptr_t * restrict destptrs = &destptrsx;
#endif

//TODO: prefetching
//TODO: Check inlining
bgq_su3_spinor_decl(spinor);
if (readFulllayout) {
bgq_spinorsite *spinorsite = &spinorfield->sec_fullspinor[ic];
assert(spinorsite->s[1][0][0]!=0);
bgq_su3_spinor_prefetch_double(&spinorfield->sec_fullspinor[ic+1]); // TODO: This prefetch is too early
//bgq_su3_spinor_prefetch_double(&spinorfield->sec_fullspinor[ic+1]); // TODO: This prefetch is too early
bgq_HoppingMatrix_loadFulllayout(spinor, spinorsite, t1, t2, x, y, z);
} else {
bgq_weylsite *weylsite = &spinorfield->sec_collapsed[ic];
Expand All @@ -279,17 +291,16 @@ static inline void bgq_HoppingMatrix_worker_readFulllayout(void * restrict arg,
}

static void bgq_HoppingMatrix_nokamul_worker_readFulllayout(void *arg, size_t tid, size_t threads) {
bgq_HoppingMatrix_worker_readFulllayout(arg,tid,threads,false,true);
bgq_HoppingMatrix_worker(arg,tid,threads,false,true);
}

static void bgq_HoppingMatrix_kamul_worker_readFulllayout(void *arg, size_t tid, size_t threads) {
bgq_HoppingMatrix_worker_readFulllayout(arg,tid,threads,true,true);
bgq_HoppingMatrix_worker(arg,tid,threads,true,true);
}
static void bgq_HoppingMatrix_nokamul_worker_readWeyllayout(void *arg, size_t tid, size_t threads) {
bgq_HoppingMatrix_worker_readFulllayout(arg,tid,threads,false,false);
bgq_HoppingMatrix_worker(arg,tid,threads,false,false);
}
static void bgq_HoppingMatrix_kamul_worker_readWeyllayout(void *arg, size_t tid, size_t threads) {
bgq_HoppingMatrix_worker_readFulllayout(arg,tid,threads,true,false);
bgq_HoppingMatrix_worker(arg,tid,threads,true,false);
}


Expand Down Expand Up @@ -764,7 +775,7 @@ void bgq_HoppingMatrix(bool isOdd, bgq_weylfield_controlblock *targetfield, bgq_
bgq_master_call(&bgq_HoppingMatrix_kamul_worker_readWeyllayout, &work_body);
}

if (!COMM_X) {
if (!COMM_T) {
// Copy the data from HALO_T into the required locations
bgq_master_sync();
static bgq_work_datamove work_datamovet;
Expand Down
17 changes: 9 additions & 8 deletions bgq/bgq_HoppingMatrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -440,38 +440,39 @@ EXTERN_INLINE void bgq_HoppingMatrix_compute_storeWeyllayout_alldir_raw(bgq_weyl
//TODO: There is some spilling that can proberbly be avoided

bgq_su3_matrix_prefetch_double(&gaugesite->su3[TDOWN]);
bgq_prefetch(&targetptrs->d[TDOWN]);
//bgq_prefetch(&targetptrs->d[TDOWN]);
bgq_HoppingMatrix_compute_storeWeyllayout_tup(targetptrs, gaugesite, spinor, t1, t2, x, y, z, qka0,qka1,qka2,qka3,kamul);

bgq_su3_matrix_prefetch_double(&gaugesite->su3[XUP]);
bgq_prefetch(&targetptrs->d[XUP]);
//bgq_prefetch(&targetptrs->d[XUP]);
bgq_HoppingMatrix_compute_storeWeyllayout_tdown(targetptrs, gaugesite, spinor, t1, t2, x, y, z, qka0,qka1,qka2,qka3,kamul);

bgq_su3_matrix_prefetch_double(&gaugesite->su3[XDOWN]);
bgq_prefetch(&targetptrs->d[XDOWN]);
//bgq_prefetch(&targetptrs->d[XDOWN]);
bgq_HoppingMatrix_compute_storeWeyllayout_xup(targetptrs, gaugesite, spinor, t1, t2, x, y, z, qka0,qka1,qka2,qka3,kamul);

bgq_su3_matrix_prefetch_double(&gaugesite->su3[YUP]);
bgq_prefetch(&targetptrs->d[YUP]);
//bgq_prefetch(&targetptrs->d[YUP]);
bgq_HoppingMatrix_compute_storeWeyllayout_xdown(targetptrs, gaugesite, spinor, t1, t2, x, y, z, qka0,qka1,qka2,qka3,kamul);

bgq_su3_matrix_prefetch_double(&gaugesite->su3[YDOWN]);
bgq_prefetch(&targetptrs->d[TDOWN]);
//bgq_prefetch(&targetptrs->d[TDOWN]);
bgq_HoppingMatrix_compute_storeWeyllayout_yup(targetptrs, gaugesite, spinor, t1, t2, x, y, z, qka0,qka1,qka2,qka3,kamul);

bgq_su3_matrix_prefetch_double(&gaugesite->su3[ZUP]);
bgq_prefetch(&targetptrs->d[ZUP]);
//bgq_prefetch(&targetptrs->d[ZUP]);
bgq_HoppingMatrix_compute_storeWeyllayout_ydown(targetptrs, gaugesite, spinor, t1, t2, x, y, z, qka0,qka1,qka2,qka3,kamul);

bgq_su3_matrix_prefetch_double(&gaugesite->su3[ZDOWN]);
bgq_prefetch(&targetptrs->d[ZDOWN]);
//bgq_prefetch(&targetptrs->d[ZDOWN]);
bgq_HoppingMatrix_compute_storeWeyllayout_zup(targetptrs, gaugesite, spinor, t1, t2, x, y, z, qka0,qka1,qka2,qka3,kamul);

bgq_su3_matrix_prefetch_double(&gaugesite->su3[TUP]);
bgq_prefetch(&targetptrs->d[TUP]);
//bgq_prefetch(&targetptrs->d[TUP]);
bgq_HoppingMatrix_compute_storeWeyllayout_zdown(targetptrs, gaugesite, spinor, t1, t2, x, y, z, qka0,qka1,qka2,qka3,kamul);
}


#define bgq_HoppingMatrix_compute_storeWeyllayout(targetptrs,gaugesite,spinor,t1,t2,x,y,z,kamul) bgq_HoppingMatrix_compute_storeWeyllayout_raw(targetptrs,gaugesite,bgq_su3_spinor_vars(spinor),t1,t2,x,y,z,kamul)
EXTERN_INLINE void bgq_HoppingMatrix_compute_storeWeyllayout_raw(bgq_weyl_ptr_t *targetptrs, bgq_gaugesite *gaugesite, bgq_su3_spinor_params(spinor), ucoord t1, ucoord t2, ucoord x, ucoord y, ucoord z, bool kamul)
{
Expand Down
43 changes: 21 additions & 22 deletions bgq/bgq_comm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1606,20 +1606,19 @@ static unsigned mySpirank;
static inline unsigned bgq_abcde2spirank(Personality_t *pers, uint8_t a, uint8_t b,uint8_t c,uint8_t d,uint8_t e) {
assert(pers);
torus_t tdims = {
pers->Network_Config.Anodes,
pers->Network_Config.Bnodes,
pers->Network_Config.Cnodes,
pers->Network_Config.Dnodes,
pers->Network_Config.Enodes
};
torus_t dims =
{
pers->Network_Config.Anodes,
pers->Network_Config.Bnodes,
pers->Network_Config.Cnodes,
pers->Network_Config.Dnodes,
pers->Network_Config.Enodes
};
pers->Network_Config.Anodes,
pers->Network_Config.Bnodes,
pers->Network_Config.Cnodes,
pers->Network_Config.Dnodes,
pers->Network_Config.Enodes
};
torus_t dims = {
pers->Network_Config.Anodes,
pers->Network_Config.Bnodes,
pers->Network_Config.Cnodes,
pers->Network_Config.Dnodes,
pers->Network_Config.Enodes
};

unsigned numNodes = tdims.a * tdims.b * tdims.c * tdims.d * tdims.e;
unsigned result = ((((a)*dims.b + b)*dims.c + c)*dims.d + d)*dims.e + e;
Expand Down Expand Up @@ -1671,7 +1670,7 @@ static void setup_destinations(Personality_t *pers) {
MUSPI_SetUpDestination(&nb2dest[cd].dest, nb.a, nb.b, nb.c, nb.d, nb.e);
nb2dest[cd].hintsABCD = 0;
nb2dest[cd].hintsE = 0;
printf("node %d: %d(%d,%d,%d,%d,%d)-%d->%d(%d,%d,%d,%d,%d)\n", g_proc_id, mySpirank, tcoords.a, tcoords.b, tcoords.c, tcoords.e, tcoords.d, cd, nbrank, nb.a, nb.b, nb.c, nb.d, nb.e);
//printf("node %d: %d(%d,%d,%d,%d,%d)-%d->%d(%d,%d,%d,%d,%d)\n", g_proc_id, mySpirank, tcoords.a, tcoords.b, tcoords.c, tcoords.e, tcoords.d, cd, nbrank, nb.a, nb.b, nb.c, nb.d, nb.e);
}
}

Expand Down Expand Up @@ -1835,7 +1834,7 @@ void bgq_comm_spi_init(void) {
assert((roffsets[cd] + secsize) <= (bgq_weyl_section_offset(sec_recv_end) - bgq_weyl_section_offset(sec_recv_begin)));
totalMessageSize += secsize;

master_print("SPI %llu: d=%llu msize=%zu soffset=%zu d_dst=%llu roffset=%zu\n", cd, d_src, messageSizes[commdir], soffsets[commdir], d_dst, roffsets[cd]);
//master_print("SPI %llu: d=%llu msize=%zu soffset=%zu d_dst=%llu roffset=%zu\n", cd, d_src, messageSizes[commdir], soffsets[commdir], d_dst, roffsets[cd]);
}
assert(totalMessageSize == bgq_weyl_section_offset(sec_recv_end) - bgq_weyl_section_offset(sec_recv_begin));

Expand Down Expand Up @@ -1910,7 +1909,7 @@ void bgq_comm_spi_init(void) {
//TODO: inline?
void bgq_comm_recv(bool nospi) {
assert(omp_get_thread_num()==0);
master_print("Comm Receiving...\n");
//master_print("Comm Receiving...\n");
#ifdef SPI
if (!nospi) {
// reset the recv counter
Expand All @@ -1925,7 +1924,7 @@ void bgq_comm_recv(bool nospi) {

void bgq_comm_send(bool nospi) {
assert(omp_get_thread_num()==0);
master_print("Comm Sending...\n");
//master_print("Comm Sending...\n");
#ifdef SPI
if (!nospi) {
// make sure everybody has reset recvCounter
Expand All @@ -1947,7 +1946,7 @@ void bgq_comm_send(bool nospi) {

void bgq_comm_wait(bool nospi) {
assert(omp_get_thread_num()==0);
master_print("Comm Waiting...\n");
//master_print("Comm Waiting...\n");

uint64_t ppc32 = mfspr(SPRN_PPR32);
ThreadPriority_Low(); // If there is some other work to be done on this node, give it priority
Expand All @@ -1958,17 +1957,17 @@ void bgq_comm_wait(bool nospi) {
uint64_t startTime = 0;

// Wait for all data is received
printf("node %d: %llu bytes to be received\n", g_proc_id, totalMessageSize);
//printf("node %d: %llu bytes to be received\n", g_proc_id, totalMessageSize);
while(recvCounter > 0) {
// Check range of pending bytes to receive
assert(recvCounter <= totalMessageSize);

if (GetTimeBase() - startTime >= 1600) {
printf("node %d: %llu bytes left\n", g_proc_id, recvCounter);
//printf("node %d: %llu bytes left\n", g_proc_id, recvCounter);
startTime = GetTimeBase();
}
}
printf("node %d: All data received\n", g_proc_id);
//printf("node %d: All data received\n", g_proc_id);

// Wait for all data sent
while (true) {
Expand Down
3 changes: 2 additions & 1 deletion bgq/bgq_operator.inc.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

#include <stdbool.h>


#if 0
#define OPERATOR_OUTPLACENAME bgq_operator_outplace
#define OPERATOR_INPLACENAME bgq_operator_inplace
#define OPERATOR_ARGFIELDS 1
Expand Down Expand Up @@ -182,3 +182,4 @@ void bgq_operator_inplace(bgq_weylfield_controlblock *targetfield) {

#undef OPERATOR_INCLUDED
#undef OPERATOR_NAME
#endif
Loading

0 comments on commit 4c2dc40

Please sign in to comment.