-
Notifications
You must be signed in to change notification settings - Fork 52
Description
1、现象:
使用新特性自动CV融合AUTO_CV_COMBINE后,kernel v核处理前的代码不能正常打印
pass_configs = {
tilelang.PassConfigKey.TL_ASCEND_AUTO_SYNC: True,
tilelang.PassConfigKey.TL_ASCEND_AUTO_CV_COMBINE: True, --启动CV融合
}
@tilelang.jit(out_idx=[1], pass_configs=pass_configs)
def gelu_mul(M, N, block_M, block_N, dtype="float"):
m_num = T.ceildiv(M, block_M)
# The gelu_mul operator splits the input tensor into two tensors, x1 and x2, based on the last dimension.
# It performs a GELU operation on x1 and multiplies the result by x2. Therefore, the kernel splitting is only relative to the dimension of x1.
n_num = T.ceildiv(N // 2, block_N)
VEC_NUM = 2
@T.prim_func
def main(
A: T.Tensor((M, N), dtype),
B: T.Tensor((M, N // 2), dtype)
):
with T.Kernel(m_num * n_num, is_npu=True) as (cid, vid):
bx = cid // n_num
by = cid % n_num
a1_ub = T.alloc_ub((block_M // VEC_NUM, block_N), dtype)
a2_ub = T.alloc_ub((block_M // VEC_NUM, block_N), dtype)
b_ub = T.alloc_ub((block_M // VEC_NUM, block_N), dtype)
T.printf("-----outer cid:%d-------------vid:%d--------------------------------------\n", cid, vid) 没有正常打印
temp_ub = T.alloc_ub((block_M // VEC_NUM, block_N), dtype)
ps:翻译后的源码也没有该部分
AscendC::TBufAscendC::TPosition::A2 ascend_l0a;
pipe.InitBuffer(ascend_l0a, 65536);
AscendC::TBufAscendC::TPosition::B2 ascend_l0b;
pipe.InitBuffer(ascend_l0b, 131072);
AscendC::TBufAscendC::TPosition::A1 ascend_l1; pipe.InitBuffer(ascend_l1, 524032);
AscendC::TBufAscendC::TPosition::CO1 ascend_l0c; pipe.InitBuffer(ascend_l0c, 131072);
AscendC::TBufAscendC::TPosition::VECCALC ascend_ub; pipe.InitBuffer(ascend_ub, 196352);
pipe.Destroy();
auto cid = AscendC::GetBlockIdx(); --翻译后的源码缺失