Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 24a02dc

Browse files
committedMay 20, 2024··
Program against explicitly versioned NVTX V1 API
The explicit V1 API is always available. See discussion here: NVIDIA/NVTX#96 Fixes: #1750
1 parent 18c083b commit 24a02dc

File tree

1 file changed

+34
-18
lines changed

1 file changed

+34
-18
lines changed
 

‎cub/cub/detail/nvtx.cuh

+34-18
Original file line numberDiff line numberDiff line change
@@ -37,19 +37,23 @@
3737
# pragma system_header
3838
#endif // no system header
3939

40-
// Enable the functionality of this header if
40+
// Enable the functionality of this header if:
4141
// * The NVTX3 C API is available in CTK
4242
// * NVTX is not explicitly disabled
4343
// * C++14 is availabl for cuda::std::optional
44-
#if __has_include(<nvtx3/nvToolsExt.h>) && !defined(NVTX_DISABLE) && _CCCL_STD_VER >= 2014
44+
#if __has_include(<nvtx3/nvToolsExt.h> ) && !defined(NVTX_DISABLE) && _CCCL_STD_VER >= 2014
4545
// Include our NVTX3 C++ wrapper if not available from the CTK
4646
# if __has_include(<nvtx3/nvtx3.hpp>) // TODO(bgruber): replace by a check for the first CTK version shipping the header
4747
# include <nvtx3/nvtx3.hpp>
4848
# else // __has_include(<nvtx3/nvtx3.hpp>)
4949
# include "nvtx3.hpp"
5050
# endif // __has_include(<nvtx3/nvtx3.hpp>)
5151

52-
# include <cuda/std/optional>
52+
// We expect the NVTX3 V1 C++ API to be available when nvtx3.hpp is available. This should work, because newer versions
53+
// of NVTX3 will continue to declare previous API versions. See also:
54+
// https://github.com/NVIDIA/NVTX/blob/release-v3/c/include/nvtx3/nvtx3.hpp#L2835-L2841.
55+
# ifdef NVTX3_CPP_DEFINITIONS_V1_0
56+
# include <cuda/std/optional>
5357

5458
CUB_NAMESPACE_BEGIN
5559
namespace detail
@@ -62,26 +66,38 @@ struct NVTXCCCLDomain
6266
CUB_NAMESPACE_END
6367

6468
// Hook for the NestedNVTXRangeGuard from the unit tests
65-
# ifndef CUB_DETAIL_BEFORE_NVTX_RANGE_SCOPE
66-
# define CUB_DETAIL_BEFORE_NVTX_RANGE_SCOPE(name)
67-
# endif // !CUB_DETAIL_BEFORE_NVTX_RANGE_SCOPE
69+
# ifndef CUB_DETAIL_BEFORE_NVTX_RANGE_SCOPE
70+
# define CUB_DETAIL_BEFORE_NVTX_RANGE_SCOPE(name)
71+
# endif // !CUB_DETAIL_BEFORE_NVTX_RANGE_SCOPE
6872

6973
// Conditionally inserts a NVTX range starting here until the end of the current function scope in host code. Does
7074
// nothing in device code.
7175
// The optional is needed to defer the construction of an NVTX range (host-only code) and message string registration
7276
// into a dispatch region running only on the host, while preserving the semantic scope where the range is declared.
73-
# define CUB_DETAIL_NVTX_RANGE_SCOPE_IF(condition, name) \
74-
CUB_DETAIL_BEFORE_NVTX_RANGE_SCOPE(name) \
75-
::cuda::std::optional<::nvtx3::scoped_range_in<CUB_NS_QUALIFIER::detail::NVTXCCCLDomain>> __cub_nvtx3_range; \
76-
NV_IF_TARGET( \
77-
NV_IS_HOST, \
78-
static const ::nvtx3::registered_string_in<CUB_NS_QUALIFIER::detail::NVTXCCCLDomain> __cub_nvtx3_func_name{name}; \
79-
static const ::nvtx3::event_attributes __cub_nvtx3_func_attr{__cub_nvtx3_func_name}; \
80-
if (condition) __cub_nvtx3_range.emplace(__cub_nvtx3_func_attr); \
81-
(void) __cub_nvtx3_range;)
77+
# define CUB_DETAIL_NVTX_RANGE_SCOPE_IF(condition, name) \
78+
CUB_DETAIL_BEFORE_NVTX_RANGE_SCOPE(name) \
79+
::cuda::std::optional<::nvtx3::v1::scoped_range_in<CUB_NS_QUALIFIER::detail::NVTXCCCLDomain>> __cub_nvtx3_range; \
80+
NV_IF_TARGET( \
81+
NV_IS_HOST, \
82+
static const ::nvtx3::v1::registered_string_in<CUB_NS_QUALIFIER::detail::NVTXCCCLDomain> __cub_nvtx3_func_name{ \
83+
name}; \
84+
static const ::nvtx3::v1::event_attributes __cub_nvtx3_func_attr{__cub_nvtx3_func_name}; \
85+
if (condition) __cub_nvtx3_range.emplace(__cub_nvtx3_func_attr); \
86+
(void) __cub_nvtx3_range;)
8287

83-
# define CUB_DETAIL_NVTX_RANGE_SCOPE(name) CUB_DETAIL_NVTX_RANGE_SCOPE_IF(true, name)
84-
#else // __has_include(<nvtx3/nvToolsExt.h>) && !defined(NVTX_DISABLE) && _CCCL_STD_VER > 2011
88+
# define CUB_DETAIL_NVTX_RANGE_SCOPE(name) CUB_DETAIL_NVTX_RANGE_SCOPE_IF(true, name)
89+
# else // NVTX3_CPP_DEFINITIONS_V1_0
90+
// Tell the user we don't support their NVTX3 version.
91+
# if defined(_CCCL_COMPILER_MSVC)
92+
# pragma message( \
93+
"warning: nvtx3.hpp is available but does not define the V1 API. This is odd. Please open a GitHub issue at: https://github.com/NVIDIA/cccl/issues.")
94+
# else
95+
# warning nvtx3.hpp is available but does not define the V1 API. This is odd. Please open a GitHub issue at: https://github.com/NVIDIA/cccl/issues.
96+
# endif
97+
# define CUB_DETAIL_NVTX_RANGE_SCOPE_IF(condition, name)
98+
# define CUB_DETAIL_NVTX_RANGE_SCOPE(name)
99+
# endif // NVTX3_CPP_DEFINITIONS_V1_0
100+
#else // __has_include(<nvtx3/nvToolsExt.h> ) && !defined(NVTX_DISABLE) && _CCCL_STD_VER >= 2014
85101
# define CUB_DETAIL_NVTX_RANGE_SCOPE_IF(condition, name)
86102
# define CUB_DETAIL_NVTX_RANGE_SCOPE(name)
87-
#endif // __has_include(<nvtx3/nvToolsExt.h>) && !defined(NVTX_DISABLE) && _CCCL_STD_VER > 2011
103+
#endif // __has_include(<nvtx3/nvToolsExt.h> ) && !defined(NVTX_DISABLE) && _CCCL_STD_VER >= 2014

0 commit comments

Comments
 (0)
Please sign in to comment.