Skip to content

Commit

Permalink
Fixed bug with precision in cublasXgemm
Browse files Browse the repository at this point in the history
  • Loading branch information
SGo-Go committed Oct 11, 2013
1 parent 99dee84 commit 38be3f5
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 8 deletions.
2 changes: 1 addition & 1 deletion bench/bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
# os.environ['MKL_NUM_THREADS'] = str(nthreads)

for n in n_list:
ntests = 4*(1024/n) - 3
ntests = 44*(1024/n) - 3
print cmd_all.format(implement = implementation, ofolder = ofolder_local,
nthreads=nthreads, ntests=ntests, blknum=L, blksize=n)
os.system(cmd_all.format(implement = implementation, ofolder = ofolder_local,
Expand Down
4 changes: 2 additions & 2 deletions examples/ctest/ctest.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ void Init_DQMC_matrix(int n, int nb, scalar_t A[])
}
}

#define n 1728 //1024 //1024//1728 ////512
#define nb 10 //45 //45 //16//26 ////90
#define n 1728 /* 1024 1024 1728 512 */
#define nb 10 /* 45 45 16 26 90 */

int main(void)
{
Expand Down
9 changes: 6 additions & 3 deletions include/bsofi_macro.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
#include "third_party/cublas.h"

#define GPU_ALIGN 64 /* 32 */
#define GET_LD_DEV(__len) ((((__len) + (GPU_ALIGN)-1)/(GPU_ALIGN))*(GPU_ALIGN))
#define GET_LD_DEV(__len) \
((((__len) + (GPU_ALIGN)-1)/(GPU_ALIGN))*(GPU_ALIGN))

#define IDX_OFFSET(__ld,__i, __j) ((__j)*(__ld) + __i)
#define BLK_OFFSET(__ld,__i, __j) (n*((__j)*(__ld) + __i))
Expand Down Expand Up @@ -54,7 +55,8 @@ typedef struct _bsofi_profile_t {
tim1 = getwalltime(); \
{__code_do;} \
__total_time += elapsed(getwalltime(), tim1); }
# define RESET_BSOFI_PROFILE(__counter) __counter = (const struct _bsofi_profile_t){0}
# define RESET_BSOFI_PROFILE(__counter) \
__counter = (const struct _bsofi_profile_t){0}

#else
# define BENCH_CUMMULATIVE(__total_time, __code_do) {__code_do;}
Expand All @@ -67,7 +69,8 @@ typedef struct _bsofi_profile_t {
# define CHECK_CUMALLOC(__code) \
if (cudaSuccess != (__code)) { \
DBGERROR("CUDA: GPU device memory allocation failed"); \
cudaFree(dwork); cublasShutdown();/* cublasDestroy(handle); */ \
/* cudaFree(dwork); */ \
cublasShutdown();/* cublasDestroy(handle); */ \
return -1; \
}

Expand Down
4 changes: 2 additions & 2 deletions include/third_party/cublas.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@
__A, __lda, __B, __ldb), "cublasGetMatrix")

# ifdef __SINGLE_PREC__
# define cublasXgemm cublasDgemm
# else
# define cublasXgemm cublasSgemm
# else
# define cublasXgemm cublasDgemm
# endif

#else
Expand Down

0 comments on commit 38be3f5

Please sign in to comment.