Skip to content

Commit

Permalink
Working phmc on x86
Browse files Browse the repository at this point in the history
  • Loading branch information
Michael Kruse committed Dec 16, 2012
1 parent 7c4b686 commit 3a6231b
Show file tree
Hide file tree
Showing 16 changed files with 484 additions and 93 deletions.
16 changes: 14 additions & 2 deletions Nondegenerate_Matrix.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
#include "Nondegenerate_Matrix.h"

#include "bgq/bgq_spinorfield.h"
#include "bgq/bgq_stdoperators.h"


void mul_one_minus_imubar(spinor * const l, spinor * const k);
Expand Down Expand Up @@ -369,8 +370,18 @@ void Q_tau1_min_cconst_ND(spinor * const l_strange, spinor * const l_charm,

/************ loop over all lattice sites ************/

spinorfield_linalg_wr(l_strange, k_strange);
spinorfield_linalg_wr(l_charm, k_charm);

#if 0
bgq_weylfield_controlblock *strange_target = bgq_translate_spinorfield(l_strange);
bgq_weylfield_controlblock *strange_source = bgq_translate_spinorfield(k_strange);
bgq_spinorfield_cmul_plain_add_double(strange_target, true, strange_source, strange_target, -z);

bgq_weylfield_controlblock *charm_target = bgq_translate_spinorfield(l_strange);
bgq_weylfield_controlblock *charm_source = bgq_translate_spinorfield(k_strange);
bgq_spinorfield_cmul_plain_add_double(charm_target, true, charm_source, charm_target, -z);
#else
spinorfield_linalg_ur(l_strange, k_strange);
spinorfield_linalg_ur(l_charm, k_charm);
#ifdef OMP
#pragma omp parallel for private(r) private(s) private(phi1) private(ix)
#endif
Expand Down Expand Up @@ -400,6 +411,7 @@ void Q_tau1_min_cconst_ND(spinor * const l_strange, spinor * const l_charm,
_complex_times_vector(phi1, z, s->s3);
_vector_sub_assign(r->s3, phi1);
}
#endif

/* Finally, we multiply by the constant phmc_Cpol */
/* which renders the polynomial in monomials */
Expand Down
6 changes: 3 additions & 3 deletions Ptilde_nd.c
Original file line number Diff line number Diff line change
Expand Up @@ -164,9 +164,9 @@ void Poly_tilde_ND(spinor *R_s, spinor *R_c, double *dd, int n,
fact2=-2*(phmc_cheb_evmax+phmc_cheb_evmin)/(phmc_cheb_evmax-phmc_cheb_evmin);

zero_spinor_field(&ds[0],VOLUME/2);
spinorfield_setOddness(&ds[0], 0);
spinorfield_setOddness(&ds[0], 1);
zero_spinor_field(&dds[0],VOLUME/2);
spinorfield_setOddness(&dds[0], 0);
spinorfield_setOddness(&dds[0], 1);
zero_spinor_field(&dc[0],VOLUME/2);
spinorfield_setOddness(&dc[0], 1);
zero_spinor_field(&ddc[0],VOLUME/2);
Expand Down Expand Up @@ -338,7 +338,7 @@ void degree_of_Ptilde() {
/* Ptilde P S P Ptilde X - X */
/* for random spinor X */
random_spinor_field(ss,VOLUME/2, 1);
spinorfield_setOddness(&ss[0], 0);
spinorfield_setOddness(&ss[0], 1);
random_spinor_field(sc,VOLUME/2, 1);
spinorfield_setOddness(&sc[0], 1);

Expand Down
97 changes: 97 additions & 0 deletions Square_root_BR_roots.dat
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
Nr. Re Im
0 -8.9159403320881392e-01 4.7804672826707813e-02
1 -7.2194563107474727e-02 6.4445273979129150e-02
2 8.6001577635862980e-01 5.3378393690399463e-02
3 1.0049540304112328e+00 3.3532545071557744e-03
4 -5.4958787722623881e-01 8.2211131452447878e-02
5 4.9474126903798127e-01 8.4436923558024513e-02
6 -9.8028843572458690e-01 2.3221863040844969e-02
7 -3.1956875798125922e-01 8.6999612636091295e-02
8 7.0002861398743654e-01 7.2378639534923531e-02
9 9.6395860045245074e-01 2.9642217760997631e-02
10 -7.4475391345595521e-01 6.8189294358218688e-02
11 2.5860912302191003e-01 8.5960632088481148e-02
12 -9.4367395603589366e-01 3.5902537104027209e-02
13 -1.9695049328074091e-01 8.3300997495042814e-02
14 7.8644156426994472e-01 6.3603854626646264e-02
15 9.9259556160803730e-01 1.6676581486763764e-02
16 -6.5245500323587680e-01 7.6137293221021168e-02
17 3.7945482169279338e-01 8.6944151761185778e-02
18 -1.0008288288402176e+00 1.0041814904872664e-02
19 -4.3794207919374706e-01 8.6043781993953100e-02
20 6.0223595001415386e-01 7.9428441678699405e-02
21 9.1951890261573377e-01 4.1968111922743308e-02
22 -8.2491599655299641e-01 5.8655842355888790e-02
23 1.3496818662392801e-01 7.7666920257734931e-02
24 -9.1951890261573377e-01 4.1968111922743308e-02
25 -1.3496818662392801e-01 7.7666920257734931e-02
26 8.2491599655299641e-01 5.8655842355888790e-02
27 1.0008288288402176e+00 1.0041814904872664e-02
28 -6.0223595001415386e-01 7.9428441678699405e-02
29 4.3794207919374706e-01 8.6043781993953100e-02
30 -9.9259556160803730e-01 1.6676581486763764e-02
31 -3.7945482169279338e-01 8.6944151761185778e-02
32 6.5245500323587680e-01 7.6137293221021168e-02
33 9.4367395603589366e-01 3.5902537104027209e-02
34 -7.8644156426994472e-01 6.3603854626646264e-02
35 1.9695049328074091e-01 8.3300997495042814e-02
36 -9.6395860045245074e-01 2.9642217760997631e-02
37 -2.5860912302191003e-01 8.5960632088481148e-02
38 7.4475391345595521e-01 6.8189294358218688e-02
39 9.8028843572458690e-01 2.3221863040844969e-02
40 -7.0002861398743654e-01 7.2378639534923531e-02
41 3.1956875798125922e-01 8.6999612636091295e-02
42 -1.0049540304112328e+00 3.3532545071557744e-03
43 -4.9474126903798127e-01 8.4436923558024513e-02
44 5.4958787722623881e-01 8.2211131452447878e-02
45 8.9159403320881392e-01 4.7804672826707813e-02
46 -8.6001577635862980e-01 5.3378393690399463e-02
47 7.2194563107474727e-02 6.4445273979129150e-02
48 7.2194563107474727e-02 -6.4445273979129150e-02
49 -8.6001577635862980e-01 -5.3378393690399463e-02
50 8.9159403320881392e-01 -4.7804672826707813e-02
51 5.4958787722623881e-01 -8.2211131452447878e-02
52 -4.9474126903798127e-01 -8.4436923558024513e-02
53 -1.0049540304112328e+00 -3.3532545071557744e-03
54 3.1956875798125922e-01 -8.6999612636091295e-02
55 -7.0002861398743654e-01 -7.2378639534923531e-02
56 9.8028843572458690e-01 -2.3221863040844969e-02
57 7.4475391345595521e-01 -6.8189294358218688e-02
58 -2.5860912302191003e-01 -8.5960632088481148e-02
59 -9.6395860045245074e-01 -2.9642217760997631e-02
60 1.9695049328074091e-01 -8.3300997495042814e-02
61 -7.8644156426994472e-01 -6.3603854626646264e-02
62 9.4367395603589366e-01 -3.5902537104027209e-02
63 6.5245500323587680e-01 -7.6137293221021168e-02
64 -3.7945482169279338e-01 -8.6944151761185778e-02
65 -9.9259556160803730e-01 -1.6676581486763764e-02
66 4.3794207919374706e-01 -8.6043781993953100e-02
67 -6.0223595001415386e-01 -7.9428441678699405e-02
68 1.0008288288402176e+00 -1.0041814904872664e-02
69 8.2491599655299641e-01 -5.8655842355888790e-02
70 -1.3496818662392801e-01 -7.7666920257734931e-02
71 -9.1951890261573377e-01 -4.1968111922743308e-02
72 1.3496818662392801e-01 -7.7666920257734931e-02
73 -8.2491599655299641e-01 -5.8655842355888790e-02
74 9.1951890261573377e-01 -4.1968111922743308e-02
75 6.0223595001415386e-01 -7.9428441678699405e-02
76 -4.3794207919374706e-01 -8.6043781993953100e-02
77 -1.0008288288402176e+00 -1.0041814904872664e-02
78 3.7945482169279338e-01 -8.6944151761185778e-02
79 -6.5245500323587680e-01 -7.6137293221021168e-02
80 9.9259556160803730e-01 -1.6676581486763764e-02
81 7.8644156426994472e-01 -6.3603854626646264e-02
82 -1.9695049328074091e-01 -8.3300997495042814e-02
83 -9.4367395603589366e-01 -3.5902537104027209e-02
84 2.5860912302191003e-01 -8.5960632088481148e-02
85 -7.4475391345595521e-01 -6.8189294358218688e-02
86 9.6395860045245074e-01 -2.9642217760997631e-02
87 7.0002861398743654e-01 -7.2378639534923531e-02
88 -3.1956875798125922e-01 -8.6999612636091295e-02
89 -9.8028843572458690e-01 -2.3221863040844969e-02
90 4.9474126903798127e-01 -8.4436923558024513e-02
91 -5.4958787722623881e-01 -8.2211131452447878e-02
92 1.0049540304112328e+00 -3.3532545071557744e-03
93 8.6001577635862980e-01 -5.3378393690399463e-02
94 -7.2194563107474727e-02 -6.4445273979129150e-02
95 -8.9159403320881392e-01 -4.7804672826707813e-02
2 changes: 2 additions & 0 deletions bgq/README.txt
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ A specific layout can be requested using the function bgq_spinorfield_prepareWri

"even_odd_flag" must be defined. Non-even/odd spinor fields are not supported.

For HoppingMatrix, there another arrangement of the gauge field is required, which is handled like _GAUGE_COPY. It is always kept in double precesion, even if single precision is chosen.


Local Lattice Constraints
-------------------------
Expand Down
14 changes: 13 additions & 1 deletion bgq/bgq_legacy.c
Original file line number Diff line number Diff line change
Expand Up @@ -276,5 +276,17 @@ void add(spinor * const Q,const spinor * const R,const spinor * const S, const i
bgq_spinorfield_add_double(targetfield, tri_unknown, spinorfield1, spinorfield2);
}

#endif

void assign_mul_add_mul_add_mul_add_mul_r(spinor * const R, spinor * const S, spinor * const U, spinor * const V,
const double c1, const double c2, const double c3, const double c4, const int N) {
bgq_weylfield_controlblock *targetfield = bgq_translate_spinorfield(R);
bgq_weylfield_controlblock *spinorfield1 = bgq_translate_spinorfield(S);
bgq_weylfield_controlblock *spinorfield2 = bgq_translate_spinorfield(U);
bgq_weylfield_controlblock *spinorfield3 = bgq_translate_spinorfield(V);

bgq_spinorfield_rmul_rmul_add_double(targetfield, tri_unknown, spinorfield1, targetfield, c2, c1);
bgq_spinorfield_rmul_plain_add_double(targetfield, tri_unknown, spinorfield2, targetfield, c3);
bgq_spinorfield_rmul_plain_add_double(targetfield, tri_unknown, spinorfield3, targetfield, c4);
}

#endif
32 changes: 32 additions & 0 deletions bgq/bgq_qpx.h
Original file line number Diff line number Diff line change
Expand Up @@ -1382,11 +1382,26 @@ do {\
bgq_isub(NAME2(dst,c1), NAME2(v1,c1), NAME2(v2,c1)); \
bgq_isub(NAME2(dst,c2), NAME2(v1,c2), NAME2(v2,c2))

#define bgq_su3_vmul(dst,r,v) \
do { \
bgq_mul(NAME2(dst,c0), r, NAME2(v,c0)); \
bgq_mul(NAME2(dst,c1), r, NAME2(v,c1)); \
bgq_mul(NAME2(dst,c2), r, NAME2(v,c2)); \
} while (0)

#define bgq_su3_cvmul(dst,c,v) \
bgq_cmul(NAME2(dst,c0), c, NAME2(v,c0)); \
bgq_cmul(NAME2(dst,c1), c, NAME2(v,c1)); \
bgq_cmul(NAME2(dst,c2), c, NAME2(v,c2))


#define bgq_su3_cvmadd(dst,c,v,vadd) \
do { \
bgq_cmadd(NAME2(dst,c0), c, NAME2(v,c0), NAME2(vadd,c0)); \
bgq_cmadd(NAME2(dst,c1), c, NAME2(v,c1), NAME2(vadd,c0)); \
bgq_cmadd(NAME2(dst,c2), c, NAME2(v,c2), NAME2(vadd,c0)); \
} while (0)

#define bgq_su3_cjgvmul(dst,c,v) \
bgq_cjgmul(NAME2(dst,c0), c, NAME2(v,c0)); \
bgq_cjgmul(NAME2(dst,c1), c, NAME2(v,c1)); \
Expand All @@ -1402,6 +1417,23 @@ do {\
bgq_su3_cjgvmul(NAME2(dst,v1), c, NAME2(weyl,v1))


#define bgq_su3_spinor_mul(dst,r,spinor) \
do { \
bgq_su3_vmul(NAME2(dst,v0),r,NAME2(spinor,v0)); \
bgq_su3_vmul(NAME2(dst,v1),r,NAME2(spinor,v1)); \
bgq_su3_vmul(NAME2(dst,v2),r,NAME2(spinor,v2)); \
bgq_su3_vmul(NAME2(dst,v3),r,NAME2(spinor,v3)); \
} while (0)

#define bgq_su3_spinor_add(dst,lhs,rhs) \
do { \
bgq_su3_vadd(NAME2(dst,v0),NAME2(lhs,v0),NAME2(rhs,v0)); \
bgq_su3_vadd(NAME2(dst,v1),NAME2(lhs,v1),NAME2(rhs,v1)); \
bgq_su3_vadd(NAME2(dst,v2),NAME2(lhs,v2),NAME2(rhs,v2)); \
bgq_su3_vadd(NAME2(dst,v3),NAME2(lhs,v3),NAME2(rhs,v3)); \
} while (0)


#define bgq_su3_mvmul(dst,m,v) \
{ \
bgq_su3_vdecl(MAKENAME4(mvmul,dst,m,v)); \
Expand Down
96 changes: 17 additions & 79 deletions bgq/bgq_spinorfield.c
Original file line number Diff line number Diff line change
Expand Up @@ -595,8 +595,8 @@ void bgq_spinorfield_enableLayout(bgq_weylfield_controlblock *field, tristate is
memset(field->legacy_field, 0xFF, VOLUMEPLUSRAND/2 * sizeof(*field->legacy_field));
#endif
#ifndef NVALGRIND
VALGRIND_MEMPOOL_FREE(g_spinor_field[0], field->legacy_field);
VALGRIND_MEMPOOL_ALLOC(g_spinor_field[0], field->legacy_field, VOLUMEPLUSRAND/2*sizeof(*field->legacy_field));
VALGRIND_MEMPOOL_FREE(field->collectionBase->legacy_base, field->legacy_field);
VALGRIND_MEMPOOL_ALLOC(field->collectionBase->legacy_base, field->legacy_field, VOLUMEPLUSRAND/2*sizeof(*field->legacy_field));
#endif
}
field->has_legacy = true;
Expand All @@ -608,52 +608,6 @@ void bgq_spinorfield_enableLayout(bgq_weylfield_controlblock *field, tristate is
}


static void bgq_spinorfield_setup_float(bgq_weylfield_controlblock *field, bool isOdd, bool readFullspinor, bool writeFullspinor, bool readWeyl, bool writeWeyl) {
//bgq_spinorfield_setup(field, isOdd, readFullspinor, writeFullspinor, readWeyl, writeWeyl, true);
}


static void bgq_spinorfield_setup_double(bgq_weylfield_controlblock *field, bool isOdd, bool readFullspinor, bool writeFullspinor, bool readWeyl, bool writeWeyl) {
//bgq_spinorfield_setup(field, isOdd, readFullspinor, writeFullspinor, readWeyl, writeWeyl, true);
}


typedef struct {
bgq_weylfield_controlblock *field;
} bgq_conversion_args;

#if 0
void bgq_spinorfield_transfer(bool isOdd, bgq_weylfield_controlblock *targetfield, spinor *sourcefield) {
bgq_spinorfield_prepareWrite(targetfield, isOdd, ly_full_double);
//bgq_spinorfield_setup(targetfield, isOdd, false, true, false, false, false);

bgq_master_sync(); // N o other thread should mess up with this data anymore
size_t ioff = isOdd ? (VOLUME+RAND)/2 : 0;
for (size_t i_eosub = 0; i_eosub < VOLUME/2; i_eosub+=1) {
size_t i_eo = i_eosub + ioff;
size_t i_lexic = g_eo2lexic[i_eo];
int t = g_coord[i_lexic][0] - g_proc_coords[0]*T;
int x = g_coord[i_lexic][1] - g_proc_coords[1]*LX;
int y = g_coord[i_lexic][2] - g_proc_coords[2]*LY;
int z = g_coord[i_lexic][3] - g_proc_coords[3]*LZ;
spinor_array64 *sourcespinor = (spinor_array64*)&sourcefield[i_eosub];
assert(bgq_local2isOdd(t,x,y,z)==isOdd);

ucoord ih = bgq_local2halfvolume(t,x,y,z);
ucoord ic = bgq_local2collapsed(t,x,y,z);
ucoord k = bgq_local2k(t,x,y,z);
bgq_spinorsite_double *targetspinor = &targetfield->sec_fullspinor_double[ic];

for (int v = 0; v < 4; v+=1) {
for (int c = 0; c < 3; c+=1) {
targetspinor->s[v][c][k] = sourcespinor->v[v].c[c];
}
}
bgq_spinorveck_written_double(targetspinor, k, t, x, y, z);
}
}
#endif

bgq_spinor bgq_legacy_getspinor(spinor *spinor, ucoord t, ucoord x, ucoord y, ucoord z) {
assert(0 <= t && t < LOCAL_LT);
assert(0 <= x && x < LOCAL_LX);
Expand Down Expand Up @@ -689,31 +643,9 @@ bgq_spinor bgq_spinorfield_getspinor(bgq_weylfield_controlblock *field, ucoord t
bgq_su3_spinor_decl(spinor);
bgq_spinorfield_readSpinor(&spinor, field, isOdd, ic, layout&ly_weyl, layout&ly_sloppy, layout&ly_mul, layout==ly_legacy);
return bgq_spinor_fromqpx(spinor, k);

#if 0
if (field->hasFullspinorData) {
bgq_spinorfield_setup(field, field->isOdd, true, false, false, false, false);
bgq_spinorsite spinor = field->sec_fullspinor[ic];
return bgq_spinor_fromvec(spinor,k);
} else if (field->hasWeylfieldData) {
bgq_spinorfield_setup(field, field->isOdd, false, false, true, false, false);
bgq_su3_spinor_decl(spinor);
size_t offset = bgq_pointer2offset(field, &field->sec_collapsed[ic].d[XUP]);
ucoord index = bgq_offset2index(offset);
bgq_HoppingMatrix_loadWeyllayout(spinor,&field->sec_collapsed[ic], bgq_t2t(t,0), bgq_t2t(t,1), x, y, z);
return bgq_spinor_fromqpx(spinor,k);
} else {
printf("Field contains no data\n");
abort();
bgq_spinor dummy;
return dummy;
}
#endif
}




char *(g_idxdesc[BGQREF_count]);
complexdouble *g_bgqvalue = NULL;
complexdouble *g_refvalue = NULL;
Expand Down Expand Up @@ -1303,9 +1235,6 @@ void bgq_spinorfield_prepareWrite(bgq_weylfield_controlblock *field, tristate is
}





void bgq_spinorfield_prepareReadWrite(bgq_weylfield_controlblock *field, tristate isOdd, bgq_spinorfield_layout layout) {
isOdd = tristate_combine(isOdd, field->isOdd);
bgq_spinorfield_prepareRead(field, isOdd, layout&ly_weyl, (layout!=ly_legacy) && !(layout&ly_sloppy), layout&ly_sloppy, layout&ly_mul, layout==ly_legacy);
Expand All @@ -1317,10 +1246,16 @@ bgq_spinorfield_layout bgq_spinorfield_prepareRead(bgq_weylfield_controlblock *f
assert(field);
assert(field->has_fulllayout_double || field->has_fulllayout_float || field->has_weyllayout_double || field->has_weyllayout_float || field->has_legacy); // There must be some data to read
assert(acceptDouble || acceptFloat || acceptLegacy); // Accept at least something
assert((isOdd==tri_unknown) || (field->isOdd==tri_unknown) || (isOdd==field->isOdd));
//bgq_master_sync();
if (isOdd == tri_unknown) {
isOdd = field->isOdd; // May still be unknown

if (isOdd == field->isOdd) {
// Matching oddness, or unknown
} else if (isOdd==tri_unknown) {
isOdd = field->isOdd;
} else if (field->isOdd==tri_unknown) {
field->isOdd = isOdd;
} else {
// Mismatching oddness
master_error(1, "Oddness mismatch");
}

bool actionRewrite = false;
Expand Down Expand Up @@ -1421,7 +1356,7 @@ bgq_spinorfield_layout bgq_spinorfield_prepareRead(bgq_weylfield_controlblock *f
assert(diff==0);
#endif
} else {
assert(!"You ain't accept anything!");
assert(!"You ain't accept anything we can convert to!");
}
} else {
result = layout;
Expand Down Expand Up @@ -1567,7 +1502,10 @@ bgq_weylfield_collection *bgq_spinorfields_allocate(size_t count, spinor *legacy
field->legacy_field = (spinor*)((uint8_t*)legacyFields + i*fieldsize);
assert((uintptr_t)field->legacy_field % 32 == 0);
#ifndef NVALGRIND
VALGRIND_MEMPOOL_ALLOC(legacyFields, field->legacy_field, fieldsize);
VALGRIND_MEMPOOL_ALLOC(legacyFields, field->legacy_field, VOLUMEPLUSRAND/2*sizeof(*field->legacy_field));
#endif
#ifndef NDEBUG
memset(field->legacy_field, 0xFF, fieldsize);
#endif
} else {
field->legacy_field = NULL;
Expand Down
Loading

0 comments on commit 3a6231b

Please sign in to comment.