Skip to content

Commit

Permalink
Bugfixes for legacy compatibility
Browse files Browse the repository at this point in the history
  • Loading branch information
Michael Kruse committed Dec 4, 2012
1 parent ba4d091 commit e07760a
Show file tree
Hide file tree
Showing 17 changed files with 252 additions and 82 deletions.
10 changes: 5 additions & 5 deletions bgq/bgq_ComputeWeyl.inc.c
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ void bgq_HoppingMatrix_compute_storeWeyllayout_raw(bgq_weyl_ptr_t *targetptrs, b
bgq_setbgqvalue_src(t1, x, y, z, TUP, BGQREF_TDOWN_WEYL, bgq_cmplxval1(weyl_tup_v0_c0));
bgq_setbgqvalue_src(t2, x, y, z, TUP, BGQREF_TDOWN_WEYL, bgq_cmplxval2(weyl_tup_v0_c0));
if (kamul) {
bgq_su3_weyl_cmul(weyl_tup, qka0, weyl_tup);
bgq_su3_weyl_cjgmul(weyl_tup, qka0, weyl_tup);
}
bgq_setdesc(BGQREF_TDOWN_KAMUL,"BGQREF_TDOWN_KAMUL");
bgq_setbgqvalue_src(t1, x, y, z, TUP, BGQREF_TDOWN_KAMUL, bgq_cmplxval1(weyl_tup_v0_c0));
Expand Down Expand Up @@ -155,7 +155,7 @@ void bgq_HoppingMatrix_compute_storeWeyllayout_raw(bgq_weyl_ptr_t *targetptrs, b
bgq_setbgqvalue_src(t1, x, y, z, XDOWN, BGQREF_XUP_GAUGE, bgq_cmplxval1(gauge_xup_c00));
bgq_setbgqvalue_src(t2, x, y, z, XDOWN, BGQREF_XUP_GAUGE, bgq_cmplxval2(gauge_xup_c00));
bgq_su3_weyl_mvmul(weyl_xdown, gauge_xup, weyl_xdown);
bgq_setdesc(BGQREF_XUP_WEYL,"BGQREF_TUP_WEYL");
bgq_setdesc(BGQREF_XUP_WEYL,"BGQREF_XUP_WEYL");
bgq_setbgqvalue_src(t1, x, y, z, XDOWN, BGQREF_XUP_WEYL, bgq_cmplxval1(weyl_xdown_v0_c0));
bgq_setbgqvalue_src(t2, x, y, z, XDOWN, BGQREF_XUP_WEYL, bgq_cmplxval2(weyl_xdown_v0_c0));
if (kamul) {
Expand Down Expand Up @@ -204,7 +204,7 @@ void bgq_HoppingMatrix_compute_storeWeyllayout_raw(bgq_weyl_ptr_t *targetptrs, b
bgq_setbgqvalue_src(t1, x, y, z, XUP, BGQREF_XDOWN_WEYL, bgq_cmplxval1(weyl_xup_v0_c0));
bgq_setbgqvalue_src(t2, x, y, z, XUP, BGQREF_XDOWN_WEYL, bgq_cmplxval2(weyl_xup_v0_c0));
if (kamul) {
bgq_su3_weyl_cmul(weyl_xup, qka1, weyl_xup);
bgq_su3_weyl_cjgmul(weyl_xup, qka1, weyl_xup);
}
bgq_setdesc(BGQREF_XDOWN_KAMUL,"BGQREF_XDOWN_KAMUL");
bgq_setbgqvalue_src(t1, x, y, z, XUP, BGQREF_XDOWN_KAMUL, bgq_cmplxval1(weyl_xup_v0_c0));
Expand Down Expand Up @@ -273,7 +273,7 @@ void bgq_HoppingMatrix_compute_storeWeyllayout_raw(bgq_weyl_ptr_t *targetptrs, b

bgq_su3_weyl_mvinvmul(weyl_yup, gauge_ydown, weyl_yup);
if (kamul) {
bgq_su3_weyl_cmul(weyl_yup, qka2, weyl_yup);
bgq_su3_weyl_cjgmul(weyl_yup, qka2, weyl_yup);
}

//bgq_su3_weyl_zeroload(targetptrs->d[YDOWN]);
Expand Down Expand Up @@ -339,7 +339,7 @@ void bgq_HoppingMatrix_compute_storeWeyllayout_raw(bgq_weyl_ptr_t *targetptrs, b

bgq_su3_weyl_mvinvmul(weyl_zup, gauge_zdown, weyl_zup);
if (kamul) {
bgq_su3_weyl_cmul(weyl_zup, qka3, weyl_zup);
bgq_su3_weyl_cjgmul(weyl_zup, qka3, weyl_zup);
}

//bgq_su3_weyl_zeroload(targetptrs->d[ZDOWN]);
Expand Down
67 changes: 17 additions & 50 deletions bgq/bgq_HoppingMatrix.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
#include "bgq_comm.h"
#include "bgq_workers.h"

#include "../update_backward_gauge.h"

#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
Expand Down Expand Up @@ -161,56 +163,6 @@ void bgq_HoppingMatrix_work(bgq_HoppingMatrix_workload *work, bool nokamul, bgq
if (layout & ly_weyl)
flopPerSite += /*weyl reduce*/ 8/*dirs*/ * (2 * 3)/*cmplx per weyl*/ * 2/*flops*/;
flopaccumulator += sites * PHYSICAL_LK * flopPerSite;
#if 0
if (readFulllayout) {
if (nokamul) {
if (inputfield->isFulllayoutSloppy)
bgq_master_call(&bgq_HoppingMatrix_nokamul_worker_readFulllayout_float, work);
else
bgq_master_call(&bgq_HoppingMatrix_nokamul_worker_readFulllayout_double, work);
flopaccumulator += sites * PHYSICAL_LK * (
/*weyl reduce*/ 8/*dirs*/ * 2/*weyl per dir*/ * (2 * 3)/*cmplx per weyl*/ * 2/*flops*/ +
/*su3 mul*/ 8/*dirs*/ * 2/*su3vec per weyl*/ * (6*9 + 2*3)/*flop per su3 mv-mul*/
);

} else {
if (inputfield->isFulllayoutSloppy)
bgq_master_call(&bgq_HoppingMatrix_kamul_worker_readFulllayout_float, work);
else
bgq_master_call(&bgq_HoppingMatrix_kamul_worker_readFulllayout_double, work);
flopaccumulator += sites * PHYSICAL_LK * (
/*weyl reduce*/ 8/*dirs*/ * 2/*weyl per dir*/ * (2 * 3)/*cmplx per weyl*/ * 2/*flops*/ +
/*su3 mul*/ 8/*dirs*/ * 2/*su3vec per weyl*/ * (6*9 + 2*3)/*flop per su3 mv-mul*/ +
/*kamul*/ 8/*dirs*/ * (2 * 3)/*cmplx per weyl*/ * 6/*flops cmplx mul*/
);
}
} else {
// readWeyl
if (nokamul) {
if (inputfield->isWeyllayoutSloppy)
bgq_master_call(&bgq_HoppingMatrix_nokamul_worker_readWeyllayout_float, work);
else
bgq_master_call(&bgq_HoppingMatrix_nokamul_worker_readWeyllayout_double, work);
flopaccumulator += sites * PHYSICAL_LK * (
/*accum spinor*/ 7/*dirs*/ * (4 * 3)/*cmplx per spinor*/ * 2/*flops accum*/ +
/*weyl reduce*/ 8/*dirs*/ * 2/*weyl per dir*/ * (2 * 3)/*cmplx per weyl*/ * 2/*flops*/ +
/*su3 mul*/ 8/*dirs*/ * 2/*su3vec per weyl*/ * (6*9 + 2*3)/*flop per su3 mv-mul*/
);
} else {
if (inputfield->isWeyllayoutSloppy)
bgq_master_call(&bgq_HoppingMatrix_kamul_worker_readWeyllayout_float, work);
else
bgq_master_call(&bgq_HoppingMatrix_kamul_worker_readWeyllayout_double, work);
flopaccumulator += sites * PHYSICAL_LK * (
/*accum spinor*/ 7/*dirs*/ * (4 * 3)/*cmplx per spinor*/ * 2/*flops accum*/ +
/*weyl reduce*/ 8/*dirs*/ * 2/*weyl per dir*/ * (2 * 3)/*cmplx per weyl*/ * 2/*flops*/ +
/*su3 mul*/ 8/*dirs*/ * 2/*su3vec per weyl*/ * (6*9 + 2*3)/*flop per su3 mv-mul*/ +
/*kamul*/ 8/*dirs*/ * (2 * 3)/*cmplx per weyl*/ * 6/*flops cmplx mul*/
);
}
}
#endif
//master_print("nokamul=%d readFulllayout=%d sites=%zu flopaccum=%llu diff=%llu\n", nokamul, readFulllayout, sites, flopaccumulator, flopaccumulator-old);
}


Expand Down Expand Up @@ -332,13 +284,28 @@ void Hopping_Matrix(const int ieo, spinor * const l, spinor * const k) {
bgq_weylfield_controlblock *targetfield = bgq_translate_spinorfield(l);
bgq_weylfield_controlblock *sourcefield = bgq_translate_spinorfield(k);

#ifdef _GAUGE_COPY
if(g_update_gauge_copy) {
update_backward_gauge(g_gauge_field);
}
#endif

//master_print("BEGIN HoppingMatrix replacement\n");
bgq_HoppingMatrix(ieo, targetfield, sourcefield, 0);
//master_print("BEGIN HoppingMatrix replacement\n");
//bgq_master_sync();
}

void Hopping_Matrix_nocom(const int ieo, spinor * const l, spinor * const k) {
bgq_weylfield_controlblock *targetfield = bgq_translate_spinorfield(l);
bgq_weylfield_controlblock *sourcefield = bgq_translate_spinorfield(k);

#ifdef _GAUGE_COPY
if(g_update_gauge_copy) {
update_backward_gauge(g_gauge_field);
}
#endif

bgq_HoppingMatrix(ieo, targetfield, sourcefield, hm_nocom);
}
#endif
8 changes: 7 additions & 1 deletion bgq/bgq_dispatch.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,11 @@ static inline void bgq_thread_barrier() {


int bgq_parallel(bgq_master_func master_func, void *master_arg) {
#ifndef NDEBUG
for (int i = 0; i < 64*25; i+=1)
space[64*25] = ' ';
space[64*25] = '\0';
#endif
assert(!omp_in_parallel() && "This starts the parallel section, do not call it within one");
g_bgq_dispatch_func = NULL;
g_bgq_dispatch_arg = NULL;
Expand All @@ -74,7 +76,9 @@ int bgq_parallel(bgq_master_func master_func, void *master_arg) {
}
#endif
g_bgq_dispatch_threads = omp_get_max_threads();
#ifdef OMP
omp_num_threads = 1/*omp_get_num_threads()*/; // For legacy linalg (it depends on whether nested parallelism is enabled)
#endif
g_bgq_dispatch_inparallel = true;

int master_result = 0;
Expand Down Expand Up @@ -114,7 +118,9 @@ int bgq_parallel(bgq_master_func master_func, void *master_arg) {
}
g_bgq_dispatch_inparallel = false;
g_bgq_dispatch_threads = 0;
#ifdef OMP
omp_num_threads = omp_get_max_threads();
#endif
return master_result;
}

Expand Down Expand Up @@ -216,7 +222,7 @@ void bgq_master_call(bgq_worker_func func, void *arg) {
#else
#pragma omp flush
#endif

// Join the worker force
bgq_worker();
} else {
// Not in a parallel section. Two possibilities:
Expand Down
2 changes: 1 addition & 1 deletion bgq/bgq_gaugefield.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ typedef struct {
} su3_array64;

static void bgq_gaugefield_worker_transferfrom(void *arg_untyped, size_t tid, size_t threads) {
su3 **sourcefield = (su3**) arg_untyped;
su3 **sourcefield = (su3**)arg_untyped;

const size_t workload = PHYSICAL_VOLUME * PHYSICAL_LP;
const size_t threadload = (workload + threads - 1) / threads;
Expand Down
35 changes: 35 additions & 0 deletions bgq/bgq_qpx.h
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,16 @@ typedef struct {
bgq_mov(dst,MAKENAME5(madd,dst,a,b,c)); \
}

#define bgq_msub(dst,a,b,c) \
{ \
bgq_vector4double_decl(MAKENAME5(madd,dst,a,b,c)); \
MAKENAME6(madd,dst,a,b,c,q0) = NAME2(a,q0) * NAME2(b,q0) - NAME2(c,q0); \
MAKENAME6(madd,dst,a,b,c,q1) = NAME2(a,q1) * NAME2(b,q1) - NAME2(c,q1); \
MAKENAME6(madd,dst,a,b,c,q2) = NAME2(a,q2) * NAME2(b,q2) - NAME2(c,q2); \
MAKENAME6(madd,dst,a,b,c,q3) = NAME2(a,q3) * NAME2(b,q3) - NAME2(c,q3); \
bgq_mov(dst,MAKENAME5(madd,dst,a,b,c)); \
}

#define bgq_mov(dst,src) \
NAME2(dst,q0) = NAME2(src,q0); \
NAME2(dst,q1) = NAME2(src,q1); \
Expand Down Expand Up @@ -491,6 +501,9 @@ typedef struct {
#define bgq_madd(dst,a,b,c) \
(dst) = vec_madd(a,b,c)

#define bgq_msub(dst,a,b,c) \
(dst) = vec_msub(a,b,c)

#define bgq_mov(dst,src) \
(dst) = (src)

Expand Down Expand Up @@ -669,6 +682,17 @@ typedef struct {
bgq_xxnpmadd (dst , rhs, lhs, MAKENAME4(cmul,dst,lhs,rhs)); \
}


// Conjugated lhs
// conj(lhs) * rhs
#define bgq_cjgmul(dst,lhs,rhs) \
do { \
bgq_vector4double_decl(MAKENAME4(cjgmul,dst,lhs,rhs)); \
bgq_xmul (MAKENAME4(cjgmul,dst,lhs,rhs), lhs, rhs); \
bgq_xxcpnmadd (dst , rhs, lhs, MAKENAME4(cjgmul,dst,lhs,rhs)); \
} while (0)


#define cvec_madd(a,b,c) vec_xxnpmadd(b,a,vec_xmadd(a,b,c))
// vec_xxnpmadd(b,a,vec_xmadd(a,b,c))
// vec_xxnpmadd(a,b,vec_xmadd(b,a,c))
Expand Down Expand Up @@ -1359,10 +1383,21 @@ do {\
bgq_cmul(NAME2(dst,c1), c, NAME2(v,c1)); \
bgq_cmul(NAME2(dst,c2), c, NAME2(v,c2))

#define bgq_su3_cjgvmul(dst,c,v) \
bgq_cjgmul(NAME2(dst,c0), c, NAME2(v,c0)); \
bgq_cjgmul(NAME2(dst,c1), c, NAME2(v,c1)); \
bgq_cjgmul(NAME2(dst,c2), c, NAME2(v,c2))


#define bgq_su3_weyl_cmul(dst,c,weyl) \
bgq_su3_cvmul(NAME2(dst,v0), c, NAME2(weyl,v0)); \
bgq_su3_cvmul(NAME2(dst,v1), c, NAME2(weyl,v1))

#define bgq_su3_weyl_cjgmul(dst,c,weyl) \
bgq_su3_cjgvmul(NAME2(dst,v0), c, NAME2(weyl,v0)); \
bgq_su3_cjgvmul(NAME2(dst,v1), c, NAME2(weyl,v1))


#define bgq_su3_mvmul(dst,m,v) \
{ \
bgq_su3_vdecl(MAKENAME4(mvmul,dst,m,v)); \
Expand Down
2 changes: 1 addition & 1 deletion bgq/bgq_reduction.inc.c
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ static inline void NAME2(REDUCTION_NAME,worker)(void *arg_untyped, size_t tid, s

#if REDUCTION_ARGFIELDS>=2
bgq_su3_spinor_decl(spinor2);
bgq_spinorfield_readSpinor(&spinor2, argfield1, isOdd, ic, readWeyllayout2, sloppy2, mul2, false);
bgq_spinorfield_readSpinor(&spinor2, argfield2, isOdd, ic, readWeyllayout2, sloppy2, mul2, false);
#endif

REDUCTION_SITEREDUCEFUNC(REDUCTION_REDPTRARGS IF1ARG(, bgq_su3_spinor_vars(spinor1)) IF2ARG(, bgq_su3_spinor_vars(spinor2)) REDUCTION_EXTRAARGLIST, ic);
Expand Down
Loading

0 comments on commit e07760a

Please sign in to comment.