Skip to content

Commit 9a443bc

Browse files
authored
riscv64: Support scalar-vector bitcasts (#8692)
* riscv64: Implement scalar to vector bitcast and vice-versa * riscv64: Delete `vfslide1up.vf` instruction * fuzzgen: Enable `bitcast.i128` for RISC-V
1 parent 7f7064c commit 9a443bc

File tree

8 files changed

+330
-7
lines changed

8 files changed

+330
-7
lines changed

cranelift/codegen/src/isa/riscv64/inst.isle

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2815,6 +2815,10 @@
28152815
;; Generates a bitcast instruction.
28162816
;; Args are: src, src_ty, dst_ty
28172817
(decl gen_bitcast (Reg Type Type) Reg)
2818+
(rule 5 (gen_bitcast r (ty_scalar_float src_ty) (ty_vec_fits_in_register _)) (rv_vfmv_sf r src_ty))
2819+
(rule 4 (gen_bitcast r (ty_int_ref_scalar_64 src_ty) (ty_vec_fits_in_register _)) (rv_vmv_sx r src_ty))
2820+
(rule 3 (gen_bitcast r (ty_vec_fits_in_register _) (ty_scalar_float dst_ty)) (rv_vfmv_fs r dst_ty))
2821+
(rule 2 (gen_bitcast r (ty_vec_fits_in_register _) (ty_int_ref_scalar_64 dst_ty)) (rv_vmv_xs r dst_ty))
28182822
(rule 1 (gen_bitcast r $F32 $I32) (rv_fmvxw r))
28192823
(rule 1 (gen_bitcast r $F64 $I64) (rv_fmvxd r))
28202824
(rule 1 (gen_bitcast r $I32 $F32) (rv_fmvwx r))

cranelift/codegen/src/isa/riscv64/inst/vector.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -268,13 +268,16 @@ impl VecAluOpRRRR {
268268
VecAluOpRRRR::VfnmaccVV | VecAluOpRRRR::VfnmaccVF => 0b101101,
269269
VecAluOpRRRR::VfmsacVV | VecAluOpRRRR::VfmsacVF => 0b101110,
270270
VecAluOpRRRR::VfnmsacVV | VecAluOpRRRR::VfnmsacVF => 0b101111,
271+
VecAluOpRRRR::Vslide1upVX => 0b001110,
271272
}
272273
}
273274

274275
pub fn category(&self) -> VecOpCategory {
275276
match self {
276277
VecAluOpRRRR::VmaccVV | VecAluOpRRRR::VnmsacVV => VecOpCategory::OPMVV,
277-
VecAluOpRRRR::VmaccVX | VecAluOpRRRR::VnmsacVX => VecOpCategory::OPMVX,
278+
VecAluOpRRRR::VmaccVX | VecAluOpRRRR::VnmsacVX | VecAluOpRRRR::Vslide1upVX => {
279+
VecOpCategory::OPMVX
280+
}
278281
VecAluOpRRRR::VfmaccVV
279282
| VecAluOpRRRR::VfnmaccVV
280283
| VecAluOpRRRR::VfmsacVV
@@ -299,7 +302,10 @@ impl VecAluOpRRRR {
299302

300303
impl VecInstOverlapInfo for VecAluOpRRRR {
301304
fn forbids_src_dst_overlaps(&self) -> bool {
302-
false
305+
match self {
306+
VecAluOpRRRR::Vslide1upVX => true,
307+
_ => false,
308+
}
303309
}
304310
}
305311

cranelift/codegen/src/isa/riscv64/inst_vector.isle

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,7 @@
229229
(VfnmaccVF)
230230
(VfmsacVF)
231231
(VfnmsacVF)
232+
(Vslide1upVX)
232233
))
233234

234235
;; Register-Imm ALU Ops
@@ -1095,6 +1096,13 @@
10951096
(rule (rv_vslideup_vvi vd vs2 imm mask vstate)
10961097
(vec_alu_rrr_uimm5 (VecAluOpRRRImm5.VslideupVI) vd vs2 imm mask vstate))
10971098

1099+
;; Helper for emitting the `vslide1up.vx` instruction.
1100+
;;
1101+
;; # vd[0]=x[rs1], vd[i+1] = vs2[i]
1102+
(decl rv_vslide1up_vx (VReg VReg XReg VecOpMasking VState) VReg)
1103+
(rule (rv_vslide1up_vx vd vs2 rs1 mask vstate)
1104+
(vec_alu_rrrr (VecAluOpRRRR.Vslide1upVX) vd vs2 rs1 mask vstate))
1105+
10981106
;; Helper for emitting the `vmv.x.s` instruction.
10991107
;; This instruction copies the first element of the source vector to the destination X register.
11001108
;; Masked versions of this instruction are not supported.

cranelift/codegen/src/isa/riscv64/lower.isle

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2436,8 +2436,24 @@
24362436
(elf_tls_get_addr name))
24372437

24382438
;;;;; Rules for `bitcast`;;;;;;;;;
2439-
(rule
2440-
(lower (has_type out_ty (bitcast _ v @ (value_type in_ty))))
2439+
2440+
;; These rules should probably be handled in `gen_bitcast`, but it's convenient to have that return
2441+
;; a single register, instead of a `ValueRegs`
2442+
(rule 2 (lower (has_type $I128 (bitcast _ v @ (value_type (ty_vec_fits_in_register _)))))
2443+
(value_regs
2444+
(gen_extractlane $I64X2 v 0)
2445+
(gen_extractlane $I64X2 v 1)))
2446+
2447+
;; Move the high half into a vector register, and then use vslide1up to move it up and
2448+
;; insert the lower half in one instruction.
2449+
(rule 1 (lower (has_type (ty_vec_fits_in_register _) (bitcast _ v @ (value_type $I128))))
2450+
(let ((lo XReg (value_regs_get v 0))
2451+
(hi XReg (value_regs_get v 1))
2452+
(vstate VState (vstate_from_type $I64X2))
2453+
(vec VReg (rv_vmv_sx hi vstate)))
2454+
(rv_vslide1up_vx vec vec lo (unmasked) vstate)))
2455+
2456+
(rule 0 (lower (has_type out_ty (bitcast _ v @ (value_type in_ty))))
24412457
(gen_bitcast v in_ty out_ty))
24422458

24432459
;;;;; Rules for `ceil`;;;;;;;;;
Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,236 @@
1+
test compile precise-output
2+
set unwind_info=false
3+
target riscv64 has_v
4+
5+
function %bitcast_vec_to_i128(i64x2) -> i128 {
6+
block0(v0: i64x2):
7+
v1 = bitcast.i128 little v0
8+
return v1
9+
}
10+
11+
; VCode:
12+
; addi sp,sp,-16
13+
; sd ra,8(sp)
14+
; sd fp,0(sp)
15+
; mv fp,sp
16+
; block0:
17+
; vle8.v v8,-16(incoming_arg) #avl=16, #vtype=(e8, m1, ta, ma)
18+
; vmv.x.s a0,v8 #avl=2, #vtype=(e64, m1, ta, ma)
19+
; vslidedown.vi v12,v8,1 #avl=2, #vtype=(e64, m1, ta, ma)
20+
; vmv.x.s a1,v12 #avl=2, #vtype=(e64, m1, ta, ma)
21+
; ld ra,8(sp)
22+
; ld fp,0(sp)
23+
; addi sp,sp,16
24+
; ret
25+
;
26+
; Disassembled:
27+
; block0: ; offset 0x0
28+
; addi sp, sp, -0x10
29+
; sd ra, 8(sp)
30+
; sd s0, 0(sp)
31+
; mv s0, sp
32+
; block1: ; offset 0x10
33+
; .byte 0x57, 0x70, 0x08, 0xcc
34+
; addi t6, sp, 0x10
35+
; .byte 0x07, 0x84, 0x0f, 0x02
36+
; .byte 0x57, 0x70, 0x81, 0xcd
37+
; .byte 0x57, 0x25, 0x80, 0x42
38+
; .byte 0x57, 0xb6, 0x80, 0x3e
39+
; .byte 0xd7, 0x25, 0xc0, 0x42
40+
; ld ra, 8(sp)
41+
; ld s0, 0(sp)
42+
; addi sp, sp, 0x10
43+
; ret
44+
45+
function %bitcast_i128_to_vec(i128) -> i64x2 {
46+
block0(v0: i128):
47+
v1 = bitcast.i64x2 little v0
48+
return v1
49+
}
50+
51+
; VCode:
52+
; block0:
53+
; vmv.s.x v12,a1 #avl=2, #vtype=(e64, m1, ta, ma)
54+
; vmv1r.v v14,v12
55+
; vslide1up.vx v14,v12,a0 #avl=2, #vtype=(e64, m1, ta, ma)
56+
; vse8.v v14,0(a2) #avl=16, #vtype=(e8, m1, ta, ma)
57+
; ret
58+
;
59+
; Disassembled:
60+
; block0: ; offset 0x0
61+
; .byte 0x57, 0x70, 0x81, 0xcd
62+
; .byte 0x57, 0xe6, 0x05, 0x42
63+
; .byte 0x57, 0x37, 0xc0, 0x9e
64+
; .byte 0x57, 0x67, 0xc5, 0x3a
65+
; .byte 0x57, 0x70, 0x08, 0xcc
66+
; .byte 0x27, 0x07, 0x06, 0x02
67+
; ret
68+
69+
function %bitcast_vec_to_i64(i32x2) -> i64 {
70+
block0(v0: i32x2):
71+
v1 = bitcast.i64 little v0
72+
return v1
73+
}
74+
75+
; VCode:
76+
; addi sp,sp,-16
77+
; sd ra,8(sp)
78+
; sd fp,0(sp)
79+
; mv fp,sp
80+
; block0:
81+
; vle8.v v8,-16(incoming_arg) #avl=8, #vtype=(e8, m1, ta, ma)
82+
; vmv.x.s a0,v8 #avl=1, #vtype=(e64, m1, ta, ma)
83+
; ld ra,8(sp)
84+
; ld fp,0(sp)
85+
; addi sp,sp,16
86+
; ret
87+
;
88+
; Disassembled:
89+
; block0: ; offset 0x0
90+
; addi sp, sp, -0x10
91+
; sd ra, 8(sp)
92+
; sd s0, 0(sp)
93+
; mv s0, sp
94+
; block1: ; offset 0x10
95+
; .byte 0x57, 0x70, 0x04, 0xcc
96+
; addi t6, sp, 0x10
97+
; .byte 0x07, 0x84, 0x0f, 0x02
98+
; .byte 0x57, 0xf0, 0x80, 0xcd
99+
; .byte 0x57, 0x25, 0x80, 0x42
100+
; ld ra, 8(sp)
101+
; ld s0, 0(sp)
102+
; addi sp, sp, 0x10
103+
; ret
104+
105+
function %bitcast_i64_to_vec(i64) -> i32x2 {
106+
block0(v0: i64):
107+
v1 = bitcast.i32x2 little v0
108+
return v1
109+
}
110+
111+
; VCode:
112+
; block0:
113+
; vmv.s.x v11,a0 #avl=1, #vtype=(e64, m1, ta, ma)
114+
; vse8.v v11,0(a1) #avl=8, #vtype=(e8, m1, ta, ma)
115+
; ret
116+
;
117+
; Disassembled:
118+
; block0: ; offset 0x0
119+
; .byte 0x57, 0xf0, 0x80, 0xcd
120+
; .byte 0xd7, 0x65, 0x05, 0x42
121+
; .byte 0x57, 0x70, 0x04, 0xcc
122+
; .byte 0xa7, 0x85, 0x05, 0x02
123+
; ret
124+
125+
function %bitcast_vec_to_f64(i32x2) -> f64 {
126+
block0(v0: i32x2):
127+
v1 = bitcast.f64 little v0
128+
return v1
129+
}
130+
131+
; VCode:
132+
; addi sp,sp,-16
133+
; sd ra,8(sp)
134+
; sd fp,0(sp)
135+
; mv fp,sp
136+
; block0:
137+
; vle8.v v8,-16(incoming_arg) #avl=8, #vtype=(e8, m1, ta, ma)
138+
; vfmv.f.s fa0,v8 #avl=1, #vtype=(e64, m1, ta, ma)
139+
; ld ra,8(sp)
140+
; ld fp,0(sp)
141+
; addi sp,sp,16
142+
; ret
143+
;
144+
; Disassembled:
145+
; block0: ; offset 0x0
146+
; addi sp, sp, -0x10
147+
; sd ra, 8(sp)
148+
; sd s0, 0(sp)
149+
; mv s0, sp
150+
; block1: ; offset 0x10
151+
; .byte 0x57, 0x70, 0x04, 0xcc
152+
; addi t6, sp, 0x10
153+
; .byte 0x07, 0x84, 0x0f, 0x02
154+
; .byte 0x57, 0xf0, 0x80, 0xcd
155+
; .byte 0x57, 0x15, 0x80, 0x42
156+
; ld ra, 8(sp)
157+
; ld s0, 0(sp)
158+
; addi sp, sp, 0x10
159+
; ret
160+
161+
function %bitcast_f64_to_vec(f64) -> i32x2 {
162+
block0(v0: f64):
163+
v1 = bitcast.i32x2 little v0
164+
return v1
165+
}
166+
167+
; VCode:
168+
; block0:
169+
; vfmv.s.f v11,fa0 #avl=1, #vtype=(e64, m1, ta, ma)
170+
; vse8.v v11,0(a0) #avl=8, #vtype=(e8, m1, ta, ma)
171+
; ret
172+
;
173+
; Disassembled:
174+
; block0: ; offset 0x0
175+
; .byte 0x57, 0xf0, 0x80, 0xcd
176+
; .byte 0xd7, 0x55, 0x05, 0x42
177+
; .byte 0x57, 0x70, 0x04, 0xcc
178+
; .byte 0xa7, 0x05, 0x05, 0x02
179+
; ret
180+
181+
function %bitcast_i16x2_to_f32(i16x2) -> f32 {
182+
block0(v0: i16x2):
183+
v1 = bitcast.f32 little v0
184+
return v1
185+
}
186+
187+
; VCode:
188+
; addi sp,sp,-16
189+
; sd ra,8(sp)
190+
; sd fp,0(sp)
191+
; mv fp,sp
192+
; block0:
193+
; vle8.v v8,-16(incoming_arg) #avl=4, #vtype=(e8, m1, ta, ma)
194+
; vfmv.f.s fa0,v8 #avl=1, #vtype=(e32, m1, ta, ma)
195+
; ld ra,8(sp)
196+
; ld fp,0(sp)
197+
; addi sp,sp,16
198+
; ret
199+
;
200+
; Disassembled:
201+
; block0: ; offset 0x0
202+
; addi sp, sp, -0x10
203+
; sd ra, 8(sp)
204+
; sd s0, 0(sp)
205+
; mv s0, sp
206+
; block1: ; offset 0x10
207+
; .byte 0x57, 0x70, 0x02, 0xcc
208+
; addi t6, sp, 0x10
209+
; .byte 0x07, 0x84, 0x0f, 0x02
210+
; .byte 0x57, 0xf0, 0x00, 0xcd
211+
; .byte 0x57, 0x15, 0x80, 0x42
212+
; ld ra, 8(sp)
213+
; ld s0, 0(sp)
214+
; addi sp, sp, 0x10
215+
; ret
216+
217+
function %bitcast_f32_to_i16x2(f32) -> i16x2 {
218+
block0(v0: f32):
219+
v1 = bitcast.i16x2 little v0
220+
return v1
221+
}
222+
223+
; VCode:
224+
; block0:
225+
; vfmv.s.f v11,fa0 #avl=1, #vtype=(e32, m1, ta, ma)
226+
; vse8.v v11,0(a0) #avl=4, #vtype=(e8, m1, ta, ma)
227+
; ret
228+
;
229+
; Disassembled:
230+
; block0: ; offset 0x0
231+
; .byte 0x57, 0xf0, 0x00, 0xcd
232+
; .byte 0xd7, 0x55, 0x05, 0x42
233+
; .byte 0x57, 0x70, 0x02, 0xcc
234+
; .byte 0xa7, 0x05, 0x05, 0x02
235+
; ret
236+
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
test run
2+
target riscv64 has_v
3+
target riscv64 has_v has_c has_zcb
4+
5+
function %bitcast_i32x2_to_i64(i32x2) -> i64 {
6+
block0(v0: i32x2):
7+
v1 = bitcast.i64 little v0
8+
return v1
9+
}
10+
; run: %bitcast_i32x2_to_i64([0xBEEF 0xC0FFEE]) == 0x00c0ffee_0000beef
11+
; run: %bitcast_i32x2_to_i64([-1 127]) == 0x0000007f_ffffffff
12+
13+
14+
function %bitcast_i64_to_i32x2(i64) -> i32x2 {
15+
block0(v0: i64):
16+
v1 = bitcast.i32x2 little v0
17+
return v1
18+
}
19+
; run: %bitcast_i64_to_i32x2(0x00c0ffee_0000beef) == [0xBEEF 0xC0FFEE]
20+
; run: %bitcast_i64_to_i32x2(0x0000007f_ffffffff) == [-1 127]
21+
22+
function %bitcast_i32x2_to_f64(i32x2) -> f64 {
23+
block0(v0: i32x2):
24+
v1 = bitcast.f64 little v0
25+
return v1
26+
}
27+
; run: %bitcast_i32x2_to_f64([0xBEEF 0xC0FFEE]) == 0x1.0ffee0000beefp-1011
28+
; run: %bitcast_i32x2_to_f64([-1 127]) == 0x0.0007fffffffffp-1022
29+
30+
function %bitcast_f64_to_i32x2(f64) -> i32x2 {
31+
block0(v0: f64):
32+
v1 = bitcast.i32x2 little v0
33+
return v1
34+
}
35+
; run: %bitcast_f64_to_i32x2(0x1.0ffee0000beefp-1011) == [0xBEEF 0xC0FFEE]
36+
; run: %bitcast_f64_to_i32x2(0x0.0007fffffffffp-1022) == [-1 127]
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
test run
2+
target riscv64 has_v
3+
target riscv64 has_v has_c has_zcb
4+
5+
function %bitcast_i64x2_to_i128(i64x2) -> i128 {
6+
block0(v0: i64x2):
7+
v1 = bitcast.i128 little v0
8+
return v1
9+
}
10+
; run: %bitcast_i64x2_to_i128([0xBEEF 0xC0FFEE]) == 0x0000000000c0ffee_000000000000beef
11+
; run: %bitcast_i64x2_to_i128([-1 127]) == 0x000000000000007f_ffffffffffffffff
12+
13+
14+
function %bitcast_i128_to_i64x2(i128) -> i64x2 {
15+
block0(v0: i128):
16+
v1 = bitcast.i64x2 little v0
17+
return v1
18+
}
19+
; run: %bitcast_i128_to_i64x2(0x0000000000c0ffee_000000000000beef) == [0xBEEF 0xC0FFEE]
20+
; run: %bitcast_i128_to_i64x2(0x000000000000007f_ffffffffffffffff) == [-1 127]

cranelift/fuzzgen/src/function_generator.rs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -764,9 +764,6 @@ fn valid_for_target(triple: &Triple, op: Opcode, args: &[Type], rets: &[Type]) -
764764
&[I128],
765765
&[F32 | F64]
766766
),
767-
// https://github.com/bytecodealliance/wasmtime/issues/6104
768-
(Opcode::Bitcast, &[I128], &[_]),
769-
(Opcode::Bitcast, &[_], &[I128]),
770767
// TODO
771768
(
772769
Opcode::SelectSpectreGuard,

0 commit comments

Comments
 (0)