Skip to content

Commit e0df18b

Browse files
author
Kent Knox
committed
Removing the pedantic flag from gcc compiles
The library was not developed with the pedantic warning flag enabled, and the build outputs a volume of verbose warning messages on every build. It's not currently helpful to have this enabled. This flag should be enabled again as the warnings get fixed, to enable a more robust library. Various warnings and #pragmas fixed that were remaining. Changed the allocation of a temp buffer in corr-trmv.cpp
1 parent f682e98 commit e0df18b

38 files changed

+240
-210
lines changed

.gitignore

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,5 @@
2424
# vim temp files
2525
.*.swp
2626

27-
src/build/
28-
2927
# python compiled files
3028
*.pyc
31-

src/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ if( BUILD_TEST )
266266
endif( )
267267

268268
# This will define OPENCL_FOUND
269-
find_package( OpenCL )
269+
find_package( OpenCL ${OPENCL_VERSION} )
270270

271271
# Find Boost on the system, and configure the type of boost build we want
272272
set( Boost_USE_MULTITHREADED ON )
@@ -288,7 +288,7 @@ endif()
288288

289289
# Turn on maximum compiler verbosity
290290
if(CMAKE_COMPILER_IS_GNUCXX)
291-
add_definitions(-pedantic -Wall -Wextra
291+
add_definitions(# -pedantic -Wall -Wextra
292292
-D_POSIX_C_SOURCE=199309L -D_XOPEN_SOURCE=500
293293
)
294294
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99 -Wstrict-prototypes" CACHE STRING

src/FindOpenCL.cmake

Lines changed: 102 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# ########################################################################
2-
# Copyright 2013 Advanced Micro Devices, Inc.
2+
# Copyright 2015 Advanced Micro Devices, Inc.
33
#
44
# Licensed under the Apache License, Version 2.0 (the "License");
55
# you may not use this file except in compliance with the License.
@@ -14,7 +14,6 @@
1414
# limitations under the License.
1515
# ########################################################################
1616

17-
1817
# Locate an OpenCL implementation.
1918
# Currently supports AMD APP SDK (http://developer.amd.com/sdks/AMDAPPSDK/Pages/default.aspx/)
2019
#
@@ -46,60 +45,122 @@
4645
# target_link_libraries(foo ${OPENCL_LIBRARIES})
4746
#
4847
#-----------------------
48+
include( CheckSymbolExists )
49+
include( CMakePushCheckState )
50+
51+
if( DEFINED OPENCL_ROOT OR DEFINED ENV{OPENCL_ROOT})
52+
message( STATUS "Defined OPENCL_ROOT: ${OPENCL_ROOT}, ENV{OPENCL_ROOT}: $ENV{OPENCL_ROOT}" )
53+
endif( )
4954

5055
find_path(OPENCL_INCLUDE_DIRS
51-
NAMES OpenCL/cl.h CL/cl.h
52-
HINTS
53-
${OPENCL_ROOT}/include
54-
$ENV{AMDAPPSDKROOT}/include
55-
$ENV{CUDA_PATH}/include
56-
PATHS
57-
/usr/include
58-
/usr/local/include
59-
/usr/local/cuda/include
60-
/opt/cuda/include
61-
DOC "OpenCL header file path"
56+
NAMES OpenCL/cl.h CL/cl.h
57+
HINTS
58+
${OPENCL_ROOT}/include
59+
$ENV{OPENCL_ROOT}/include
60+
$ENV{AMDAPPSDKROOT}/include
61+
$ENV{CUDA_PATH}/include
62+
PATHS
63+
/usr/include
64+
/usr/local/include
65+
/usr/local/cuda/include
66+
DOC "OpenCL header file path"
6267
)
6368
mark_as_advanced( OPENCL_INCLUDE_DIRS )
69+
message( STATUS "OPENCL_INCLUDE_DIRS: ${OPENCL_INCLUDE_DIRS}" )
70+
71+
set( OpenCL_VERSION "0.0" )
72+
73+
cmake_push_check_state( RESET )
74+
set( CMAKE_REQUIRED_INCLUDES "${OPENCL_INCLUDE_DIRS}" )
75+
76+
# Bug in check_symbol_exists prevents us from specifying a list of files, so we loop
77+
# Only 1 of these files will exist on a system, so the other file will not clobber the output variable
78+
if( APPLE )
79+
set( CL_HEADER_FILE "OpenCL/cl.h" )
80+
else( )
81+
set( CL_HEADER_FILE "CL/cl.h" )
82+
endif( )
83+
84+
check_symbol_exists( CL_VERSION_2_0 ${CL_HEADER_FILE} HAVE_CL_2_0 )
85+
check_symbol_exists( CL_VERSION_1_2 ${CL_HEADER_FILE} HAVE_CL_1_2 )
86+
check_symbol_exists( CL_VERSION_1_1 ${CL_HEADER_FILE} HAVE_CL_1_1 )
87+
# message( STATUS "HAVE_CL_2_0: ${HAVE_CL_2_0}" )
88+
# message( STATUS "HAVE_CL_1_2: ${HAVE_CL_1_2}" )
89+
# message( STATUS "HAVE_CL_1_1: ${HAVE_CL_1_1}" )
90+
91+
# set OpenCL_VERSION to the highest detected version
92+
if( HAVE_CL_2_0 )
93+
set( OpenCL_VERSION "2.0" )
94+
elseif( HAVE_CL_1_2 )
95+
set( OpenCL_VERSION "1.2" )
96+
elseif( HAVE_CL_1_1 )
97+
set( OpenCL_VERSION "1.1" )
98+
endif( )
99+
100+
cmake_pop_check_state( )
64101

65102
# Search for 64bit libs if FIND_LIBRARY_USE_LIB64_PATHS is set to true in the global environment, 32bit libs else
66103
get_property( LIB64 GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS )
104+
if( LIB64 )
105+
message( STATUS "FindOpenCL searching for 64-bit libraries" )
106+
else( )
107+
message( STATUS "FindOpenCL searching for 32-bit libraries" )
108+
endif( )
67109

68110
if( LIB64 )
69-
find_library( OPENCL_LIBRARIES
70-
NAMES OpenCL
71-
HINTS
72-
${OPENCL_ROOT}/lib
73-
$ENV{AMDAPPSDKROOT}/lib
74-
$ENV{CUDA_PATH}/lib
75-
DOC "OpenCL dynamic library path"
76-
PATH_SUFFIXES x86_64 x64 x86_64/sdk
77-
PATHS
78-
/usr/lib
79-
/usr/local/cuda/lib
80-
/opt/cuda/lib
81-
)
111+
find_library( OPENCL_LIBRARIES
112+
NAMES OpenCL
113+
HINTS
114+
${OPENCL_ROOT}/lib
115+
$ENV{OPENCL_ROOT}/lib
116+
$ENV{AMDAPPSDKROOT}/lib
117+
$ENV{CUDA_PATH}/lib
118+
DOC "OpenCL dynamic library path"
119+
PATH_SUFFIXES x86_64 x64 x86_64/sdk
120+
PATHS
121+
/usr/lib
122+
/usr/local/cuda/lib
123+
)
82124
else( )
83-
find_library( OPENCL_LIBRARIES
84-
NAMES OpenCL
85-
HINTS
86-
${OPENCL_ROOT}/lib
87-
$ENV{AMDAPPSDKROOT}/lib
88-
$ENV{CUDA_PATH}/lib
89-
DOC "OpenCL dynamic library path"
90-
PATH_SUFFIXES x86 Win32
91-
92-
PATHS
93-
/usr/lib
94-
/usr/local/cuda/lib
95-
/opt/cuda/lib
96-
)
125+
find_library( OPENCL_LIBRARIES
126+
NAMES OpenCL
127+
HINTS
128+
${OPENCL_ROOT}/lib
129+
$ENV{OPENCL_ROOT}/lib
130+
$ENV{AMDAPPSDKROOT}/lib
131+
$ENV{CUDA_PATH}/lib
132+
DOC "OpenCL dynamic library path"
133+
PATH_SUFFIXES x86 Win32
134+
PATHS
135+
/usr/lib
136+
/usr/local/cuda/lib
137+
)
97138
endif( )
98139
mark_as_advanced( OPENCL_LIBRARIES )
99140

141+
# message( STATUS "OpenCL_FIND_VERSION: ${OpenCL_FIND_VERSION}" )
142+
if( OpenCL_VERSION VERSION_LESS OpenCL_FIND_VERSION )
143+
message( FATAL_ERROR "Requested OpenCL version: ${OpenCL_FIND_VERSION}, Found OpenCL version: ${OpenCL_VERSION}" )
144+
endif( )
145+
146+
# If we asked for OpenCL 1.2, and we found a version installed greater than that, pass the 'use deprecated' flag
147+
if( (OpenCL_FIND_VERSION VERSION_LESS "2.0") AND (OpenCL_VERSION VERSION_GREATER OpenCL_FIND_VERSION) )
148+
add_definitions( -DCL_USE_DEPRECATED_OPENCL_2_0_APIS )
149+
150+
# If we asked for OpenCL 1.1, and we found a version installed greater than that, pass the 'use deprecated' flag
151+
if( (OpenCL_FIND_VERSION VERSION_LESS "1.2") AND (OpenCL_VERSION VERSION_GREATER OpenCL_FIND_VERSION) )
152+
add_definitions( -DCL_USE_DEPRECATED_OPENCL_1_1_APIS )
153+
endif( )
154+
endif( )
155+
100156
include( FindPackageHandleStandardArgs )
101-
FIND_PACKAGE_HANDLE_STANDARD_ARGS( OPENCL DEFAULT_MSG OPENCL_LIBRARIES OPENCL_INCLUDE_DIRS )
157+
FIND_PACKAGE_HANDLE_STANDARD_ARGS( OPENCL
158+
REQUIRED_VARS OPENCL_LIBRARIES OPENCL_INCLUDE_DIRS
159+
VERSION_VAR OpenCL_VERSION
160+
)
102161

103162
if( NOT OPENCL_FOUND )
104163
message( STATUS "FindOpenCL looked for libraries named: OpenCL" )
164+
else( )
165+
message(STATUS "FindOpenCL ${OPENCL_LIBRARIES}, ${OPENCL_INCLUDE_DIRS}")
105166
endif()

src/library/blas/AutoGemm/Includes.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ def addKernel(self, kernel):
113113
self.cppStr += "unsigned char *%s_bin = 0;\n" % kernelName
114114
self.cppStr += " size_t %s_binSize = 0;\n" % kernelName
115115
self.cppStr += "#else\n"
116-
self.cppStr += "#pragma message(\"AutoGemmKernelBinaries.cpp: %s was pre-compiled.\")\n" % kernelName
116+
# self.cppStr += "#pragma message(\"AutoGemmKernelBinaries.cpp: %s was pre-compiled.\")\n" % kernelName
117117
self.cppStr += "#endif\n"
118118

119119
kernelName = kernel.getRowName()
@@ -123,7 +123,7 @@ def addKernel(self, kernel):
123123
self.cppStr += "unsigned char *%s_bin = 0;\n" % kernelName
124124
self.cppStr += " size_t %s_binSize = 0;\n" % kernelName
125125
self.cppStr += "#else\n"
126-
self.cppStr += "#pragma message(\"AutoGemmKernelBinaries.cpp: %s was pre-compiled.\")\n" % kernelName
126+
# self.cppStr += "#pragma message(\"AutoGemmKernelBinaries.cpp: %s was pre-compiled.\")\n" % kernelName
127127
self.cppStr += "#endif\n"
128128

129129
kernelName = kernel.getColName()
@@ -133,7 +133,7 @@ def addKernel(self, kernel):
133133
self.cppStr += "unsigned char *%s_bin = 0;\n" % kernelName
134134
self.cppStr += " size_t %s_binSize = 0;\n" % kernelName
135135
self.cppStr += "#else\n"
136-
self.cppStr += "#pragma message(\"AutoGemmKernelBinaries.cpp: %s was pre-compiled.\")\n" % kernelName
136+
# self.cppStr += "#pragma message(\"AutoGemmKernelBinaries.cpp: %s was pre-compiled.\")\n" % kernelName
137137
self.cppStr += "#endif\n"
138138

139139
kernelName = kernel.getCornerName()
@@ -143,7 +143,7 @@ def addKernel(self, kernel):
143143
self.cppStr += "unsigned char *%s_bin = 0;\n" % kernelName
144144
self.cppStr += " size_t %s_binSize = 0;\n" % kernelName
145145
self.cppStr += "#else\n"
146-
self.cppStr += "#pragma message(\"AutoGemmKernelBinaries.cpp: %s was pre-compiled.\")\n" % kernelName
146+
# self.cppStr += "#pragma message(\"AutoGemmKernelBinaries.cpp: %s was pre-compiled.\")\n" % kernelName
147147
self.cppStr += "#endif\n"
148148

149149
self.incFile.write( self.incStr )

src/library/blas/AutoGemm/KernelOpenCL.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -482,7 +482,7 @@ def writeOpenCLKernelToFile(kernel):
482482
kernelFile.write("\";\n")
483483
kernelFile.write("\n")
484484
kernelFile.write("#else\n")
485-
kernelFile.write("#pragma message(\"AutoGemmKernelSources.cpp: %s was overriden by user kernel.\")\n" % kernel.getName() )
485+
# kernelFile.write("#pragma message(\"AutoGemmKernelSources.cpp: %s was overriden by user kernel.\")\n" % kernel.getName() )
486486
kernelFile.write("#endif\n")
487487
kernelFile.close()
488488

src/library/blas/gens/trmm.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1245,7 +1245,7 @@ static int trmmGetDefaultDecomp( PGranularity *pgran,
12451245
unsigned int subdimsNum,
12461246
void *pArgs)
12471247
{
1248-
(void*)subdimsNum;
1248+
DUMMY_ARG_USAGE(subdimsNum);
12491249

12501250
if ( NULL == pArgs ) {
12511251
return -EINVAL;

src/library/blas/trtri/diag_dtrtri_lower_128_16.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
#ifndef KERNEL_DIAG_DTRTRI_LOWER_128_16_SRC_CPP
66
#define KERNEL_DIAG_DTRTRI_LOWER_128_16_SRC_CPP
7-
#pragma message("#define KERNEL_DIAG_DTRTRI_UPPER_128_16_SRC_CPP.")
87

98
#ifndef STRINGIFY
109
#define STRINGIFY2(...) #__VA_ARGS__

src/library/blas/trtri/diag_dtrtri_upper_128_16.cpp

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
#ifndef KERNEL_DIAG_DTRTRI_UPPER_128_16_SRC_CPP
66
#define KERNEL_DIAG_DTRTRI_UPPER_128_16_SRC_CPP
7-
#pragma message("#define KERNEL_DIAG_DTRTRI_UPPER_128_16_SRC_CPP.")
87

98
#ifndef STRINGIFY
109
#define STRINGIFY2(...) #__VA_ARGS__
@@ -64,17 +63,17 @@ uint na)\n
6463
{\n
6564
if(tx <= i && i+bx*BLOCK_SIZE < na )\n
6665
{\n
67-
Bs[i*BLOCK_SIZE+tx] = *(Aoff+i*lda+tx);\n
66+
Bs[i*BLOCK_SIZE+tx] = *(Aoff+i*lda+tx);\n
6867
}\n
6968
else\n
7069
{\n
7170
Bs[i*BLOCK_SIZE+tx] = ZERO;\n
7271
}\n
73-
}\n
72+
}\n
7473
// read in the whole square block of my A and zero out the non data triangular
75-
74+
7675
// Synchronize to make sure the matrices are loaded
77-
//__syncthreads();
76+
//__syncthreads();
7877
barrier(CLK_LOCAL_MEM_FENCE);\n
7978

8079
// solve the diagonals
@@ -92,7 +91,7 @@ uint na)\n
9291
else \n
9392
{\n
9493
Bs[tx*BLOCK_SIZE+tx] = ONE / ( Bs[tx*BLOCK_SIZE+tx]) ;\n
95-
}\n
94+
}\n
9695
}\n
9796
barrier(CLK_LOCAL_MEM_FENCE);\n
9897

@@ -139,14 +138,14 @@ uint na)\n
139138
// __syncthreads();
140139
barrier(CLK_LOCAL_MEM_FENCE);\n
141140
}\n
142-
141+
143142
// write back A
144143
_Pragma("unroll")\n
145144
for( i=0; i < BLOCK_SIZE; i++ )\n
146145
{\n
147146
*(d_dinvA+i*NB+tx) = Bs[i*BLOCK_SIZE+tx];\n
148147
}\n
149-
148+
150149
}\n
151150
// end of kernel
152151
);

src/library/blas/trtri/diag_dtrtri_upper_192_12.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
#ifndef KERNEL_DIAG_DTRTRI_UPPER_192_12_SRC_CPP
66
#define KERNEL_DIAG_DTRTRI_UPPER_192_12_SRC_CPP
7-
#pragma message("#define KERNEL_DIAG_DTRTRI_UPPER_192_12_SRC_CPP.")
87

98
#ifndef STRINGIFY
109
#define STRINGIFY2(...) #__VA_ARGS__
@@ -43,10 +42,10 @@ double neg_switcher; \n
4342
// Thread index
4443
int tx = get_local_id(0); \n
4544

46-
// Thread index
45+
// Thread index
4746
int gx = get_global_id(0); \n
4847

49-
// Block index
48+
// Block index
5049
int bx = get_group_id(0); \n
5150

5251
A = A + offA; \n
@@ -56,7 +55,7 @@ int NumBLperNB = NB / BLOCK_SIZE; \n
5655
d_dinvA += bx / NumBLperNB*NB*NB + (bx % NumBLperNB)*(NB*BLOCK_SIZE + BLOCK_SIZE); \n
5756

5857
__local double Bs[BLOCK_SIZE*BLOCK_SIZE]; \n
59-
__local double workspace[BLOCK_SIZE];\n // workspace used to store the current working column
58+
__local double workspace[BLOCK_SIZE];\n // workspace used to store the current working column
6059

6160
// load A \n
6261
_Pragma("unroll")\n
@@ -74,7 +73,7 @@ for (i = 0; i < BLOCK_SIZE; i++)\n
7473
// read in the whole square block of my A and zero out the non data triangular
7574

7675
// Synchronize to make sure the matrices are loaded
77-
//__syncthreads();
76+
//__syncthreads();
7877
barrier(CLK_LOCAL_MEM_FENCE); \n
7978

8079
// solve the diagonals

src/library/blas/trtri/triple_dgemm_update_128_16_PART1_L.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55

66
#ifndef KERNEL_TRIPLE_DGEMM_UPDATE_128_16_PART1_L_SRC_CPP
77
#define KERNEL_TRIPLE_DGEMM_UPDATE_128_16_PART1_L_SRC_CPP
8-
#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_128_16_PART1_L_SRC_CPP.")
98

109
#ifndef STRINGIFY
1110
#define STRINGIFY2(...) #__VA_ARGS__
@@ -74,13 +73,13 @@ Ain = Ain + offAin; \n
7473
int ya = page*blk * 2; \n
7574
int incA = ya * lda + xa; \n
7675

77-
// maxA will be used to detect overflow on all subsequent accesses on A(xa, ya:ya+???)
76+
// maxA will be used to detect overflow on all subsequent accesses on A(xa, ya:ya+???)
7877

7978
int maxA; \n
8079
if (xa < na)\n
81-
maxA = lda*na; \n // macro READA will detect overflow on y dimension
80+
maxA = lda*na; \n // macro READA will detect overflow on y dimension
8281
else\n
83-
maxA = 0; \n // there is already an overflow on xa
82+
maxA = 0; \n // there is already an overflow on xa
8483

8584
#define READA ( (incA < maxA ) ? Ain[incA] : 0 ) \n
8685

@@ -139,7 +138,7 @@ Ain = Ain + offAin; \n
139138
daxpy(a[1], &bs[13][0], c); \n
140139
daxpy(a[2], &bs[14][0], c); \n
141140
daxpy(a[3], &bs[15][0], c); \n
142-
141+
143142
B += 16; \n
144143
//__syncthreads();
145144
barrier(CLK_LOCAL_MEM_FENCE); \n

0 commit comments

Comments
 (0)