Support mixed-precision for SpMV (#907)

aartbik · web-flow · commit a3fbd5dfd07d · 2025-03-11T11:26:23.000-07:00
* Support mixed-precision for SpMV
diff --git a/include/matx/transforms/matmul/matvec_cusparse.h b/include/matx/transforms/matmul/matvec_cusparse.h
@@ -77,6 +77,9 @@ class MatVecCUSPARSEHandle_t {
   using TB = typename TensorTypeB::value_type;
   using TC = typename TensorTypeC::value_type;
 
+  // Mixed-precision compute type.
+  using TCOMP = std::conditional_t<is_matx_half_v<TC>, float, TC>;
+
   /**
    * Construct a SpMV handle
    */
@@ -87,12 +90,12 @@ class MatVecCUSPARSEHandle_t {
     params_ = GetSpMVParams(c, a, b, stream, alpha, beta);
 
     // Properly typed alpha, beta.
-    if constexpr (std::is_same_v<TC, cuda::std::complex<float>> ||
-                  std::is_same_v<TC, cuda::std::complex<double>>) {
+    if constexpr (std::is_same_v<TCOMP, cuda::std::complex<float>> ||
+                  std::is_same_v<TCOMP, cuda::std::complex<double>>) {
       salpha_ = {alpha, 0};
       sbeta_ = {beta, 0};
-    } else if constexpr (std::is_same_v<TC, float> ||
-                         std::is_same_v<TC, double>) {
+    } else if constexpr (std::is_same_v<TCOMP, float> ||
+                         std::is_same_v<TCOMP, double>) {
       salpha_ = alpha;
       sbeta_ = beta;
     } else {
@@ -139,7 +142,7 @@ class MatVecCUSPARSEHandle_t {
 
     // Allocate a workspace for SpMV.
     const cusparseSpMVAlg_t algo = CUSPARSE_SPMV_ALG_DEFAULT;
-    const cudaDataType comptp = dtc; // TODO: support separate comp type?!
+    const cudaDataType comptp = MatXTypeToCudaType<TCOMP>();
     ret =
         cusparseSpMV_bufferSize(handle_, params_.opA, &salpha_, matA_, vecB_,
                                 &sbeta_, vecC_, comptp, algo, &workspaceSize_);
@@ -188,7 +191,7 @@ class MatVecCUSPARSEHandle_t {
                             [[maybe_unused]] const TensorTypeB &b) {
     MATX_NVTX_START("", matx::MATX_NVTX_LOG_INTERNAL);
     const cusparseSpMVAlg_t algo = CUSPARSE_SPMV_ALG_DEFAULT;
-    const cudaDataType comptp = MatXTypeToCudaType<TC>(); // TODO: see above
+    const cudaDataType comptp = MatXTypeToCudaType<TCOMP>();
     [[maybe_unused]] cusparseStatus_t ret =
         cusparseSpMV(handle_, params_.opA, &salpha_, matA_, vecB_, &sbeta_,
                      vecC_, comptp, algo, workspace_);
@@ -203,8 +206,8 @@ class MatVecCUSPARSEHandle_t {
   size_t workspaceSize_ = 0;
   void *workspace_ = nullptr;
   detail::MatVecCUSPARSEParams_t params_;
-  TC salpha_;
-  TC sbeta_;
+  TCOMP salpha_;
+  TCOMP sbeta_;
 };
 
 /**
@@ -287,10 +290,12 @@ void sparse_matvec_impl(TensorTypeC &C, const TensorTypeA &a,
                 "tensors must have SpMV rank");
   static_assert(std::is_same_v<TC, TA> && std::is_same_v<TC, TB>,
                 "tensors must have the same data type");
-  // TODO: allow MIXED-PRECISION computation!
-  static_assert(std::is_same_v<TC, float> || std::is_same_v<TC, double> ||
-                    std::is_same_v<TC, cuda::std::complex<float>> ||
-                    std::is_same_v<TC, cuda::std::complex<double>>,
+  static_assert(std::is_same_v<TC, matx::matxFp16> ||
+                std::is_same_v<TC, matx::matxBf16> ||
+                std::is_same_v<TC, float> ||
+                std::is_same_v<TC, double> ||
+                std::is_same_v<TC, cuda::std::complex<float>> ||
+                std::is_same_v<TC, cuda::std::complex<double>>,
                 "unsupported data type");
   MATX_ASSERT(a.Size(RANKA - 1) == b.Size(RANKB - 1) &&
                   a.Size(RANKA - 2) == c.Size(RANKC - 1),