Use rmm prefetc API so we don't have to handle CUDA 12/13 API differences

robertmaynard · robertmaynard · commit 7767735ead0f · 2025-08-28T19:36:52.000-04:00
diff --git a/cpp/libcugraph_etl/include/hash/concurrent_unordered_map.cuh b/cpp/libcugraph_etl/include/hash/concurrent_unordered_map.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2024, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2017-2025, NVIDIA CORPORATION.  All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -26,6 +26,7 @@
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/mr/device/polymorphic_allocator.hpp>
+#include <rmm/prefetch.hpp>
 
 #include <cuda/atomic>
 #include <thrust/pair.h>
@@ -83,6 +84,11 @@ constexpr bool is_packable()
          std::has_unique_object_representations_v<pair_type>;
 }
 
+#if defined(CUDART_VERSION) && CUDART_VERSION >= 13000
+cudaMemLocation location{
+  (device.value() == cudaCpuDeviceId) ? cudaMemLocationTypeHost : cudaMemLocationTypeDevice,
+  device.value()};
+
 /**
  * @brief Allows viewing a pair in a packed representation
  *
@@ -472,10 +478,10 @@ class concurrent_unordered_map {
     cudaError_t status = cudaPointerGetAttributes(&hashtbl_values_ptr_attributes, m_hashtbl_values);
 
     if (cudaSuccess == status && isPtrManaged(hashtbl_values_ptr_attributes)) {
-      RAFT_CUDA_TRY(cudaMemPrefetchAsync(
-        m_hashtbl_values, m_capacity * sizeof(value_type), dev_id, stream.value()));
+      rmm::prefetch(
+        m_hashtbl_values, m_capacity * sizeof(value_type), rmm::cuda_device_id{dev_id}, stream);
     }
-    RAFT_CUDA_TRY(cudaMemPrefetchAsync(this, sizeof(*this), dev_id, stream.value()));
+    rmm::prefetch(this, sizeof(*this), rmm::cuda_device_id{dev_id}, stream);
   }
 
   /**
@@ -545,8 +551,8 @@ class concurrent_unordered_map {
       if (cudaSuccess == status && isPtrManaged(hashtbl_values_ptr_attributes)) {
         int dev_id = 0;
         RAFT_CUDA_TRY(cudaGetDevice(&dev_id));
-        RAFT_CUDA_TRY(cudaMemPrefetchAsync(
-          m_hashtbl_values, m_capacity * sizeof(value_type), dev_id, stream.value()));
+        rmm::prefetch(
+          m_hashtbl_values, m_capacity * sizeof(value_type), rmm::cuda_device_id{dev_id}, stream);
       }
     }