Skip to content

Commit

Permalink
only one hopping body for most of the ifdefs
Browse files Browse the repository at this point in the history
  • Loading branch information
urbach committed Aug 19, 2012
1 parent 38cc95a commit e28eaeb
Show file tree
Hide file tree
Showing 12 changed files with 428 additions and 593 deletions.
27 changes: 20 additions & 7 deletions Hopping_Matrix.c
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@
#endif
#include "global.h"
#include "su3.h"
#include "sse.h"
#ifdef MPI
# include "xchange_field.h"
# ifdef _USE_TSPLITPAR
Expand All @@ -101,6 +100,7 @@

#if defined _USE_HALFSPINOR
# if ((defined SSE2)||(defined SSE3))
# include "sse.h"
# include "operator/halfspinor_sse_dbl.c"

# elif (defined BGL && defined XLC)
Expand All @@ -118,19 +118,32 @@

#else /* thats _USE_HALFSPINOR */

# if ((defined SSE2)||(defined SSE3))
# if (((defined SSE2)||(defined SSE3)) && defined _USE_TSPLITPAR)
# include "sse.h"
# include "operator/hopping_sse_dbl.c"

# elif (defined BGQ && defined XLC)
# include "bgq.h"
# include "operator/hopping_bgq_dbl.c"

# elif (defined BGL && defined XLC)
# include "bgl.h"
# include "operator/hopping_bg_dbl.c"

# else
# include "operator/hopping_dbl.c"
# if ((defined SSE2)||(defined SSE3))
# include "sse.h"

# elif (defined BGQ && defined XLC)
# include "bgq.h"
# include "xlc_prefetch.h"

# elif defined XLC
# include"xlc_prefetch.h"

# endif
void Hopping_Matrix(const int ieo, spinor * const l, spinor * const k) {

# include "operator/hopping_body_dbl.c"

return;
}
# endif

#endif /* thats _USE_HALFSPINOR */
Expand Down
1 change: 1 addition & 0 deletions X_psi.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
# include<config.h>
#endif
#include <stdlib.h>
#include <math.h>
#ifdef HAVE_CONFIG_H
# include<config.h>
#endif
Expand Down
30 changes: 30 additions & 0 deletions bgq2.h
Original file line number Diff line number Diff line change
Expand Up @@ -917,6 +917,9 @@ inline void vec_su3_multiply_double2b(su3 * const u, vector4double * U, vector4d
return;
}

#ifdef _declare_regs
# undef _declare_regs
#endif
/* We have 32 registers available */
#define _declare_regs() \
vector4double ALIGN r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11; \
Expand All @@ -926,6 +929,9 @@ inline void vec_su3_multiply_double2b(su3 * const u, vector4double * U, vector4d
__alignx(16,l); \
__alignx(16,k);

#ifdef _hop_t_p
# undef _hop_t_p
#endif
#define _hop_t_p() \
_vec_load_spinor(r4, r5, r6, r7, r8, r9, sp->s0); \
_vec_add_ul_spinor(r0, r1, r2, r4, r5, r6, r7, r8, r9); \
Expand All @@ -936,6 +942,9 @@ inline void vec_su3_multiply_double2b(su3 * const u, vector4double * U, vector4d
rs6 = rs0; rs7 = rs1; rs8 = rs2; \
rs9 = rs3; rs10= rs4; rs11= rs5;

#ifdef _hop_t_m
# undef _hop_t_m
#endif
#define _hop_t_m() \
_vec_load_spinor(r4, r5, r6, r7, r8, r9, sm->s0); \
_vec_sub_ul_spinor(r0, r1, r2, r4, r5, r6, r7, r8, r9); \
Expand All @@ -945,6 +954,9 @@ inline void vec_su3_multiply_double2b(su3 * const u, vector4double * U, vector4d
_vec_add_double2(rs0, rs1, rs2, rs3, rs4, rs5, r0, r1, r2, r3, r4, r5); \
_vec_sub_double2(rs6, rs7, rs8, rs9, rs10, rs11, r0, r1, r2, r3, r4, r5);

#ifdef _hop_x_p
# undef _hop_x_p
#endif
#define _hop_x_p() \
_vec_load(r4, r5, sp->s0); \
_vec_load16(r6, r7, sp->s1, U0); \
Expand All @@ -960,6 +972,9 @@ inline void vec_su3_multiply_double2b(su3 * const u, vector4double * U, vector4d
_vec_i_mul_sub2(rs6, rs7, rs8, r3, r4, r5, U0); \
_vec_i_mul_sub2(rs9, rs10, rs11, r0, r1, r2, U1);

#ifdef _hop_x_m
# undef _hop_x_m
#endif
#define _hop_x_m() \
_vec_load(r4, r5, sm->s0); \
_vec_load16(r6, r7, sm->s1, U0); \
Expand All @@ -974,6 +989,9 @@ inline void vec_su3_multiply_double2b(su3 * const u, vector4double * U, vector4d
_vec_i_mul_add2(rs6, rs7, rs8, r3, r4, r5, U0); \
_vec_i_mul_add2(rs9, rs10, rs11, r0, r1, r2, U1);

#ifdef _hop_y_p
# undef _hop_y_p
#endif
#define _hop_y_p() \
_vec_load(r4, r5, sp->s0); \
_vec_load16(r6, r7, sp->s1, U0); \
Expand All @@ -989,6 +1007,9 @@ inline void vec_su3_multiply_double2b(su3 * const u, vector4double * U, vector4d
_vec_sub2(rs6, rs7, rs8, r3, r4, r5); \
_vec_add2(rs9, rs10, rs11, r0, r1, r2);

#ifdef _hop_y_m
# undef _hop_y_m
#endif
#define _hop_y_m() \
_vec_load(r4, r5, sm->s0); \
_vec_load16(r6, r7, sm->s1, U0); \
Expand All @@ -1003,6 +1024,9 @@ inline void vec_su3_multiply_double2b(su3 * const u, vector4double * U, vector4d
_vec_add2(rs6, rs7, rs8, r3, r4, r5); \
_vec_sub2(rs9, rs10, rs11, r0, r1, r2);

#ifdef _hop_z_p
# undef _hop_z_p
#endif
#define _hop_z_p() \
_vec_load(r4, r5, sp->s0); \
_vec_load16(r6, r7, sp->s1, U0); \
Expand All @@ -1018,6 +1042,9 @@ inline void vec_su3_multiply_double2b(su3 * const u, vector4double * U, vector4d
_vec_i_mul_sub2(rs6, rs7, rs8, r0, r1, r2, U0); \
_vec_i_mul_add2(rs9, rs10, rs11, r3, r4, r5, U1);

#ifdef _hop_z_m
# undef _hop_z_m
#endif
#define _hop_z_m() \
_vec_load(r4, r5, sm->s0); \
_vec_load16(r6, r7, sm->s1, U0); \
Expand All @@ -1032,6 +1059,9 @@ inline void vec_su3_multiply_double2b(su3 * const u, vector4double * U, vector4d
_vec_i_mul_add2(rs6, rs7, rs8, r0, r1, r2, U0); \
_vec_i_mul_sub2(rs9, rs10, rs11, r3, r4, r5, U1);

#ifdef _store_res
# undef _store_res
#endif
#define _store_res() \
_vec_store2(rn->s0, rs0, rs1, rs2); \
_vec_store2(rn->s1, rs3, rs4, rs5); \
Expand Down
2 changes: 1 addition & 1 deletion linalg/assign_add_mul_r_add_mul.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@
#ifdef OMP
# include <omp.h>
#endif
#include "sse.h"
#include "su3.h"
#include "su3adj.h"
#include "sse.h"
#include "assign_add_mul_r_add_mul.h"

#if ( defined SSE2 || defined SSE3 )
Expand Down
5 changes: 1 addition & 4 deletions linalg/assign_add_mul_r_bi.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,6 @@
*
* You should have received a copy of the GNU General Public License
* along with tmLQCD. If not, see <http://www.gnu.org/licenses/>.
***********************************************************************/

/************************************************************************
*
* Adpated routine evaluating the P=P+c*Q where P,Q are bispinors
*
Expand All @@ -38,8 +35,8 @@
#ifdef OMP
# include <omp.h>
#endif
#include "sse.h"
#include "su3.h"
#include "sse.h"
#include "assign_add_mul_r_bi.h"


Expand Down
4 changes: 1 addition & 3 deletions linalg/assign_mul_bra_add_mul_ket_add_bi.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
*
* You should have received a copy of the GNU General Public License
* along with tmLQCD. If not, see <http://www.gnu.org/licenses/>.
***********************************************************************/
/*******************************************************************************
*
* File assign_mul_bra_add_mul_ket_add.c
*
Expand All @@ -38,8 +36,8 @@
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include "sse.h"
#include "su3.h"
#include "sse.h"
#include "assign_mul_bra_add_mul_ket_add_bi.h"

/* R inoutput, S input, U input, c1 input, c2 input */
Expand Down
2 changes: 1 addition & 1 deletion measure_gauge_action.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include "sse.h"
#include "su3.h"
#include "su3adj.h"
#include "sse.h"
#include "geometry_eo.h"
#include "global.h"
#include <io/params.h>
Expand Down
14 changes: 6 additions & 8 deletions operator/hopping_bgq_dbl.c → operator/hopping_body_dbl.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,9 @@
*
**********************************************************************/

#include "bgq.h"
#include "xlc_prefetch.h"

void Hopping_Matrix(const int ieo, spinor * const l, spinor * const k) {
#pragma disjoint(*l, *k)
#ifdef XLC
# pragma disjoint(*l, *k)
#endif
#ifdef _GAUGE_COPY
if(g_update_gauge_copy) {
update_backward_gauge(g_gauge_field);
Expand All @@ -51,7 +49,9 @@ void Hopping_Matrix(const int ieo, spinor * const l, spinor * const k) {
spinor * restrict ALIGN sm;
spinor * restrict ALIGN rn;

#pragma disjoint(*sp, *sm, *rn, *up, *um, *l)
#ifdef XLC
# pragma disjoint(*sp, *sm, *rn, *up, *um, *l)
#endif
_declare_regs();

if(ieo == 0){
Expand Down Expand Up @@ -195,5 +195,3 @@ void Hopping_Matrix(const int ieo, spinor * const l, spinor * const k) {
#ifdef OMP
} /* OpenMP closing brace */
#endif
return;
}
108 changes: 0 additions & 108 deletions operator/hopping_dbl.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,114 +26,6 @@
*
**********************************************************************/

#include"xlc_prefetch.h"

#define _declare_regs() \
su3_vector ALIGN psi, chi; \
spinor ALIGN temp;

#define _hop_t_p() \
_vector_add(psi,sp->s0,sp->s2); \
_su3_multiply(chi,(*up),psi); \
_complex_times_vector(psi,ka0,chi); \
_vector_assign(temp.s0,psi); \
_vector_assign(temp.s2,psi); \
_vector_add(psi,sp->s1,sp->s3); \
_su3_multiply(chi,(*up),psi); \
_complex_times_vector(psi,ka0,chi); \
_vector_assign(temp.s1,psi); \
_vector_assign(temp.s3,psi);

#define _hop_t_m() \
_vector_sub(psi,sm->s0,sm->s2); \
_su3_inverse_multiply(chi,(*um),psi); \
_complexcjg_times_vector(psi,ka0,chi); \
_vector_add_assign(temp.s0,psi); \
_vector_sub_assign(temp.s2,psi); \
_vector_sub(psi,sm->s1,sm->s3); \
_su3_inverse_multiply(chi,(*um),psi); \
_complexcjg_times_vector(psi,ka0,chi); \
_vector_add_assign(temp.s1,psi); \
_vector_sub_assign(temp.s3,psi);

#define _hop_x_p() \
_vector_i_add(psi,sp->s0,sp->s3); \
_su3_multiply(chi,(*up),psi); \
_complex_times_vector(psi,ka1,chi); \
_vector_add_assign(temp.s0,psi); \
_vector_i_sub_assign(temp.s3,psi); \
_vector_i_add(psi,sp->s1,sp->s2); \
_su3_multiply(chi,(*up),psi); \
_complex_times_vector(psi,ka1,chi); \
_vector_add_assign(temp.s1,psi); \
_vector_i_sub_assign(temp.s2,psi);

#define _hop_x_m() \
_vector_i_sub(psi,sm->s0,sm->s3); \
_su3_inverse_multiply(chi,(*um),psi); \
_complexcjg_times_vector(psi,ka1,chi); \
_vector_add_assign(temp.s0,psi); \
_vector_i_add_assign(temp.s3,psi); \
_vector_i_sub(psi,sm->s1,sm->s2); \
_su3_inverse_multiply(chi,(*um),psi); \
_complexcjg_times_vector(psi,ka1,chi); \
_vector_add_assign(temp.s1,psi); \
_vector_i_add_assign(temp.s2,psi);

#define _hop_y_p() \
_vector_add(psi,sp->s0,sp->s3); \
_su3_multiply(chi,(*up),psi); \
_complex_times_vector(psi,ka2,chi); \
_vector_add_assign(temp.s0,psi); \
_vector_add_assign(temp.s3,psi); \
_vector_sub(psi,sp->s1,sp->s2); \
_su3_multiply(chi,(*up),psi); \
_complex_times_vector(psi,ka2,chi); \
_vector_add_assign(temp.s1,psi); \
_vector_sub_assign(temp.s2,psi);

#define _hop_y_m() \
_vector_sub(psi,sm->s0,sm->s3); \
_su3_inverse_multiply(chi,(*um),psi); \
_complexcjg_times_vector(psi,ka2,chi); \
_vector_add_assign(temp.s0,psi); \
_vector_sub_assign(temp.s3,psi); \
_vector_add(psi,sm->s1,sm->s2); \
_su3_inverse_multiply(chi,(*um),psi); \
_complexcjg_times_vector(psi,ka2,chi); \
_vector_add_assign(temp.s1,psi); \
_vector_add_assign(temp.s2,psi);

#define _hop_z_p() \
_vector_i_add(psi,sp->s0,sp->s2); \
_su3_multiply(chi,(*up),psi); \
_complex_times_vector(psi,ka3,chi); \
_vector_add_assign(temp.s0,psi); \
_vector_i_sub_assign(temp.s2,psi); \
_vector_i_sub(psi,sp->s1,sp->s3); \
_su3_multiply(chi,(*up),psi); \
_complex_times_vector(psi,ka3,chi); \
_vector_add_assign(temp.s1,psi); \
_vector_i_add_assign(temp.s3,psi);

#define _hop_z_m() \
_vector_i_sub(psi,sm->s0,sm->s2); \
_su3_inverse_multiply(chi,(*um),psi); \
_complexcjg_times_vector(psi,ka3,chi); \
_vector_add_assign(temp.s0, psi); \
_vector_i_add_assign(temp.s2, psi); \
_vector_i_add(psi,sm->s1,sm->s3); \
_su3_inverse_multiply(chi,(*um),psi); \
_complexcjg_times_vector(psi,ka3,chi); \
_vector_add_assign(temp.s1, psi); \
_vector_i_sub_assign(temp.s3, psi);

#define _store_res() \
_vector_assign(rn->s0, temp.s0); \
_vector_assign(rn->s1, temp.s1); \
_vector_assign(rn->s2, temp.s2); \
_vector_assign(rn->s3, temp.s3); \

/* l output , k input*/
/* for ieo=0, k resides on odd sites and l on even sites */
void Hopping_Matrix(int ieo, spinor * const l, spinor * const k){
Expand Down
Loading

0 comments on commit e28eaeb

Please sign in to comment.