Skip to content

Commit 9a69511

Browse files
committed
Create nvfp4 bindings in shared library
1 parent 33ffdd9 commit 9a69511

File tree

4 files changed

+564
-2
lines changed

4 files changed

+564
-2
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -512,6 +512,7 @@ if(BUILD_CUTLASS)
512512
set(NVFUSER_CUTLASS_SRCS)
513513
list(APPEND NVFUSER_CUTLASS_SRCS
514514
${NVFUSER_CUTLASS}/fp8_blockwise_moe_kernel.cu
515+
${NVFUSER_CUTLASS}/nvfp4_blockwise_moe_kernel.cu
515516
${NVFUSER_CUTLASS}/cutlass_utils.cpp
516517
)
517518
add_library(nvf_cutlass SHARED ${NVFUSER_CUTLASS_SRCS})

cutlass/nvf_cutlass.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,4 +31,17 @@ void fp8_blockwise_scaled_grouped_mm(
3131
const torch::Tensor& expert_offsets,
3232
const torch::Tensor& workspace);
3333

34+
void nvfp4_blockwise_scaled_grouped_mm(
35+
torch::Tensor& output,
36+
const torch::Tensor& a,
37+
const torch::Tensor& b,
38+
const torch::Tensor& a_blockscale,
39+
const torch::Tensor& b_blockscales,
40+
const torch::Tensor& alphas,
41+
const torch::Tensor& ab_strides,
42+
const torch::Tensor& c_strides,
43+
const torch::Tensor& problem_sizes,
44+
const torch::Tensor& expert_offsets,
45+
const torch::Tensor& sf_offsets);
46+
3447
} // namespace nvfuser::cutlass_kernels

0 commit comments

Comments
 (0)