Skip to content

Commit 4d17de5

Browse files
authored
Clean up warnings (NVIDIA#5811)
Signed-off-by: Joaquin Anton Guirao <[email protected]>
1 parent fd37009 commit 4d17de5

File tree

3 files changed

+12
-9
lines changed

3 files changed

+12
-9
lines changed

CMakeLists.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,15 @@ message(STATUS "Generated CMAKE_CUDA_ARCHITECTURES: ${CMAKE_CUDA_ARCHITECTURES}"
286286

287287
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --compiler-options \"-fvisibility=hidden -Wno-free-nonheap-object\" --Wno-deprecated-gpu-targets -Xfatbin -compress-all")
288288

289+
# Suppress a few warnings from nvcc:
290+
# calling a __host__ function(...) from a __host__ __device__ function(...) is not allowed
291+
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --diag-suppress=20011")
292+
# 'long double' is treated as 'double' in device code
293+
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --diag-suppress=20208")
294+
# unrecognized #pragma in device code
295+
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --diag-suppress=20199")
296+
# overloaded virtual function "dali::Operator<dali::CPUBackend>::RunImpl" is only partially overridden in class "..."
297+
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --diag-suppress=611")
289298

290299
# Dependencies
291300
if(${ARCH} MATCHES "aarch64-linux")

dali/kernels/slice/slice_hwc2chw_normalize_gpu.cu

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ __device__ __forceinline__ Tile *load_linear_tile(Tile *tile,
130130
aligned_tile[idx * 4 + 3] = in.w;
131131
}
132132

133-
uint32_t processed_in_main = left_after_prologue & -4; // equivalent to (x / 4) * 4
133+
uint32_t processed_in_main = left_after_prologue & ~0x3;; // equivalent to (x / 4) * 4
134134
uint32_t left_after_main = left_after_prologue - processed_in_main;
135135

136136
// epilogue
@@ -238,7 +238,7 @@ __device__ __forceinline__ Tile *slice_load_linear_tile(
238238
aligned_tile[idx * 4 + 3] = in.w;
239239
}
240240

241-
uint32_t processed_in_main = left_after_prologue & -4; // equivalent to (x / 4) * 4
241+
uint32_t processed_in_main = left_after_prologue & ~0x3; // equivalent to (x / 4) * 4
242242
uint32_t left_after_main = left_after_prologue - processed_in_main;
243243

244244
// epilogue
@@ -339,7 +339,7 @@ __device__ __forceinline__ void load_planar_tile(Tile tile[][kBlockSize / kStati
339339
tile[c][xy] = in.w;
340340
}
341341

342-
uint32_t processed_in_main = left_after_prologue & -4; // equivalent to (x / 4) * 4
342+
uint32_t processed_in_main = left_after_prologue & ~0x3; // equivalent to (x / 4) * 4
343343
uint32_t left_after_main = left_after_prologue - processed_in_main;
344344

345345
// epilogue
@@ -521,8 +521,6 @@ __device__ __forceinline__ void store_planar_hwc_pad(
521521
int64_t start_x = static_cast<int64_t>(blockIdx.x - sample.first_block) * kBlockSize;
522522
int64_t end_x = ::min(start_x + kBlockSize, sample.sample_size);
523523

524-
const auto *__restrict__ fill_values = static_cast<const float16 *>(sample.fill_values);
525-
526524
// Preload the norm values so they are accessed via registers and not from gmem via pointer.
527525
Compute norm_mul[kOutChannels], norm_add[kOutChannels];
528526

@@ -545,7 +543,6 @@ __device__ __forceinline__ void store_planar_hwc_pad(
545543

546544
// TODO(klecki) in the version without mirror, we can keep one offset, as we can start the
547545
// output pointer at the output tile.
548-
auto *out_aligned = sample.out;
549546
auto *out_h2 = reinterpret_cast<__half2 *>(sample.out);
550547
uint32_t to_write = end_x_padded - start_x_padded;
551548

@@ -726,8 +723,6 @@ __global__ void Hwc2HwcNormalizePadFp16(const Hwc2HwcChwSampleDesc<Out, In> *sam
726723
uint32_t *first_blocks, uint32_t num_samples) {
727724
static_assert(std::is_same<In, uint8_t>::value, "Only uint8_t supported as input");
728725

729-
constexpr int kOutChannels = kStaticChannels + 1;
730-
731726
int sample_idx = FindSampleIdx(first_blocks, num_samples);
732727
const auto sample = samples[sample_idx];
733728

dali/operators/math/expressions/expression_impl_gpu_ternary.cuh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,6 @@ __global__ void ExecuteTiledTernaryOp1D(const SampleDescGPU<3> *samples, const T
6464
const auto &tile = tiles[blockIdx.y];
6565
const auto &sample = samples[tile.sample_idx];
6666
auto output = static_cast<Result *>(sample.output.data);
67-
auto &out_strides = sample.output.strides;
6867
auto &arg0 = sample.args[0];
6968
auto &arg1 = sample.args[1];
7069
auto &arg2 = sample.args[2];

0 commit comments

Comments
 (0)