37
37
# pragma system_header
38
38
#endif // no system header
39
39
40
- // Enable the functionality of this header if
40
+ // Enable the functionality of this header if:
41
41
// * The NVTX3 C API is available in CTK
42
42
// * NVTX is not explicitly disabled
43
43
// * C++14 is availabl for cuda::std::optional
44
- #if __has_include(<nvtx3/nvToolsExt.h>) && !defined(NVTX_DISABLE) && _CCCL_STD_VER >= 2014
44
+ #if __has_include(<nvtx3/nvToolsExt.h> ) && !defined(NVTX_DISABLE) && _CCCL_STD_VER >= 2014
45
45
// Include our NVTX3 C++ wrapper if not available from the CTK
46
46
# if __has_include(<nvtx3/nvtx3.hpp>) // TODO(bgruber): replace by a check for the first CTK version shipping the header
47
47
# include < nvtx3/nvtx3.hpp>
48
48
# else // __has_include(<nvtx3/nvtx3.hpp>)
49
49
# include " nvtx3.hpp"
50
50
# endif // __has_include(<nvtx3/nvtx3.hpp>)
51
51
52
- # include < cuda/std/optional>
52
+ // We expect the NVTX3 V1 C++ API to be available when nvtx3.hpp is available. This should work, because newer versions
53
+ // of NVTX3 will continue to declare previous API versions. See also:
54
+ // https://github.com/NVIDIA/NVTX/blob/release-v3/c/include/nvtx3/nvtx3.hpp#L2835-L2841.
55
+ # ifdef NVTX3_CPP_DEFINITIONS_V1_0
56
+ # include < cuda/std/optional>
53
57
54
58
CUB_NAMESPACE_BEGIN
55
59
namespace detail
@@ -62,26 +66,38 @@ struct NVTXCCCLDomain
62
66
CUB_NAMESPACE_END
63
67
64
68
// Hook for the NestedNVTXRangeGuard from the unit tests
65
- # ifndef CUB_DETAIL_BEFORE_NVTX_RANGE_SCOPE
66
- # define CUB_DETAIL_BEFORE_NVTX_RANGE_SCOPE (name )
67
- # endif // !CUB_DETAIL_BEFORE_NVTX_RANGE_SCOPE
69
+ # ifndef CUB_DETAIL_BEFORE_NVTX_RANGE_SCOPE
70
+ # define CUB_DETAIL_BEFORE_NVTX_RANGE_SCOPE (name )
71
+ # endif // !CUB_DETAIL_BEFORE_NVTX_RANGE_SCOPE
68
72
69
73
// Conditionally inserts a NVTX range starting here until the end of the current function scope in host code. Does
70
74
// nothing in device code.
71
75
// The optional is needed to defer the construction of an NVTX range (host-only code) and message string registration
72
76
// into a dispatch region running only on the host, while preserving the semantic scope where the range is declared.
73
- # define CUB_DETAIL_NVTX_RANGE_SCOPE_IF (condition, name ) \
74
- CUB_DETAIL_BEFORE_NVTX_RANGE_SCOPE (name) \
75
- ::cuda::std::optional<::nvtx3::scoped_range_in<CUB_NS_QUALIFIER::detail::NVTXCCCLDomain>> __cub_nvtx3_range; \
76
- NV_IF_TARGET ( \
77
- NV_IS_HOST, \
78
- static const ::nvtx3::registered_string_in<CUB_NS_QUALIFIER::detail::NVTXCCCLDomain> __cub_nvtx3_func_name{name}; \
79
- static const ::nvtx3::event_attributes __cub_nvtx3_func_attr{__cub_nvtx3_func_name}; \
80
- if (condition) __cub_nvtx3_range.emplace(__cub_nvtx3_func_attr); \
81
- (void ) __cub_nvtx3_range;)
77
+ # define CUB_DETAIL_NVTX_RANGE_SCOPE_IF (condition, name ) \
78
+ CUB_DETAIL_BEFORE_NVTX_RANGE_SCOPE (name) \
79
+ ::cuda::std::optional<::nvtx3::v1::scoped_range_in<CUB_NS_QUALIFIER::detail::NVTXCCCLDomain>> __cub_nvtx3_range; \
80
+ NV_IF_TARGET ( \
81
+ NV_IS_HOST, \
82
+ static const ::nvtx3::v1::registered_string_in<CUB_NS_QUALIFIER::detail::NVTXCCCLDomain> __cub_nvtx3_func_name{ \
83
+ name}; \
84
+ static const ::nvtx3::v1::event_attributes __cub_nvtx3_func_attr{__cub_nvtx3_func_name}; \
85
+ if (condition) __cub_nvtx3_range.emplace(__cub_nvtx3_func_attr); \
86
+ (void ) __cub_nvtx3_range;)
82
87
83
- # define CUB_DETAIL_NVTX_RANGE_SCOPE (name ) CUB_DETAIL_NVTX_RANGE_SCOPE_IF(true , name)
84
- #else // __has_include(<nvtx3/nvToolsExt.h>) && !defined(NVTX_DISABLE) && _CCCL_STD_VER > 2011
88
+ # define CUB_DETAIL_NVTX_RANGE_SCOPE (name ) CUB_DETAIL_NVTX_RANGE_SCOPE_IF(true , name)
89
+ # else // NVTX3_CPP_DEFINITIONS_V1_0
90
+ // Tell the user we don't support their NVTX3 version.
91
+ # if defined(_CCCL_COMPILER_MSVC)
92
+ # pragma message( \
93
+ " warning: nvtx3.hpp is available but does not define the V1 API. This is odd. Please open a GitHub issue at: https://github.com/NVIDIA/cccl/issues." )
94
+ # else
95
+ # warning nvtx3.hpp is available but does not define the V1 API. This is odd. Please open a GitHub issue at: https://github.com/NVIDIA/cccl/issues.
96
+ # endif
97
+ # define CUB_DETAIL_NVTX_RANGE_SCOPE_IF (condition, name )
98
+ # define CUB_DETAIL_NVTX_RANGE_SCOPE (name )
99
+ # endif // NVTX3_CPP_DEFINITIONS_V1_0
100
+ #else // __has_include(<nvtx3/nvToolsExt.h> ) && !defined(NVTX_DISABLE) && _CCCL_STD_VER >= 2014
85
101
# define CUB_DETAIL_NVTX_RANGE_SCOPE_IF (condition, name )
86
102
# define CUB_DETAIL_NVTX_RANGE_SCOPE (name )
87
- #endif // __has_include(<nvtx3/nvToolsExt.h>) && !defined(NVTX_DISABLE) && _CCCL_STD_VER > 2011
103
+ #endif // __has_include(<nvtx3/nvToolsExt.h> ) && !defined(NVTX_DISABLE) && _CCCL_STD_VER >= 2014
0 commit comments