File tree Expand file tree Collapse file tree 4 files changed +564
-2
lines changed Expand file tree Collapse file tree 4 files changed +564
-2
lines changed Original file line number Diff line number Diff line change @@ -512,6 +512,7 @@ if(BUILD_CUTLASS)
512
512
set (NVFUSER_CUTLASS_SRCS )
513
513
list (APPEND NVFUSER_CUTLASS_SRCS
514
514
${NVFUSER_CUTLASS} /fp8_blockwise_moe_kernel.cu
515
+ ${NVFUSER_CUTLASS} /nvfp4_blockwise_moe_kernel.cu
515
516
${NVFUSER_CUTLASS} /cutlass_utils.cpp
516
517
)
517
518
add_library (nvf_cutlass SHARED ${NVFUSER_CUTLASS_SRCS} )
Original file line number Diff line number Diff line change @@ -31,4 +31,17 @@ void fp8_blockwise_scaled_grouped_mm(
31
31
const torch::Tensor& expert_offsets,
32
32
const torch::Tensor& workspace);
33
33
34
+ void nvfp4_blockwise_scaled_grouped_mm (
35
+ torch::Tensor& output,
36
+ const torch::Tensor& a,
37
+ const torch::Tensor& b,
38
+ const torch::Tensor& a_blockscale,
39
+ const torch::Tensor& b_blockscales,
40
+ const torch::Tensor& alphas,
41
+ const torch::Tensor& ab_strides,
42
+ const torch::Tensor& c_strides,
43
+ const torch::Tensor& problem_sizes,
44
+ const torch::Tensor& expert_offsets,
45
+ const torch::Tensor& sf_offsets);
46
+
34
47
} // namespace nvfuser::cutlass_kernels
You can’t perform that action at this time.
0 commit comments