Skip to content

Commit a9fddf3

Browse files
committed
[NVPTX] Allow directly storing immediates to improve readability
1 parent 0f173a0 commit a9fddf3

16 files changed

+165
-186
lines changed

llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1364,20 +1364,18 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
13641364
SDValue Offset, Base;
13651365
SelectADDR(ST->getBasePtr(), Base, Offset);
13661366

1367-
SDValue Ops[] = {Value,
1367+
SDValue Ops[] = {selectPossiblyImm(Value),
13681368
getI32Imm(Ordering, DL),
13691369
getI32Imm(Scope, DL),
13701370
getI32Imm(CodeAddrSpace, DL),
1371-
getI32Imm(NVPTX::PTXLdStInstCode::Untyped, DL),
13721371
getI32Imm(ToTypeWidth, DL),
13731372
Base,
13741373
Offset,
13751374
Chain};
13761375

1377-
const MVT::SimpleValueType SourceVT =
1378-
Value.getNode()->getSimpleValueType(0).SimpleTy;
1379-
const std::optional<unsigned> Opcode = pickOpcodeForVT(
1380-
SourceVT, NVPTX::ST_i8, NVPTX::ST_i16, NVPTX::ST_i32, NVPTX::ST_i64);
1376+
const std::optional<unsigned> Opcode =
1377+
pickOpcodeForVT(Value.getSimpleValueType().SimpleTy, NVPTX::ST_i8,
1378+
NVPTX::ST_i16, NVPTX::ST_i32, NVPTX::ST_i64);
13811379
if (!Opcode)
13821380
return false;
13831381

@@ -1414,7 +1412,9 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
14141412

14151413
const unsigned NumElts = getLoadStoreVectorNumElts(ST);
14161414

1417-
SmallVector<SDValue, 16> Ops(ST->ops().slice(1, NumElts));
1415+
SmallVector<SDValue, 16> Ops;
1416+
for (auto &V : ST->ops().slice(1, NumElts))
1417+
Ops.push_back(selectPossiblyImm(V));
14181418
SDValue Addr = N->getOperand(NumElts + 1);
14191419
const unsigned ToTypeWidth = TotalWidth / NumElts;
14201420

@@ -1425,9 +1425,8 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
14251425
SelectADDR(Addr, Base, Offset);
14261426

14271427
Ops.append({getI32Imm(Ordering, DL), getI32Imm(Scope, DL),
1428-
getI32Imm(CodeAddrSpace, DL),
1429-
getI32Imm(NVPTX::PTXLdStInstCode::Untyped, DL),
1430-
getI32Imm(ToTypeWidth, DL), Base, Offset, Chain});
1428+
getI32Imm(CodeAddrSpace, DL), getI32Imm(ToTypeWidth, DL), Base,
1429+
Offset, Chain});
14311430

14321431
const MVT::SimpleValueType EltVT =
14331432
ST->getOperand(1).getSimpleValueType().SimpleTy;
@@ -2158,6 +2157,19 @@ bool NVPTXDAGToDAGISel::SelectADDR(SDValue Addr, SDValue &Base,
21582157
return true;
21592158
}
21602159

2160+
SDValue NVPTXDAGToDAGISel::selectPossiblyImm(SDValue V) {
2161+
if (V.getOpcode() == ISD::BITCAST)
2162+
V = V.getOperand(0);
2163+
2164+
if (auto *CN = dyn_cast<ConstantSDNode>(V))
2165+
return CurDAG->getTargetConstant(CN->getAPIntValue(), SDLoc(V),
2166+
V.getValueType());
2167+
if (auto *CN = dyn_cast<ConstantFPSDNode>(V))
2168+
return CurDAG->getTargetConstantFP(CN->getValueAPF(), SDLoc(V),
2169+
V.getValueType());
2170+
return V;
2171+
}
2172+
21612173
bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
21622174
unsigned int spN) const {
21632175
const Value *Src = nullptr;

llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
106106
}
107107

108108
bool SelectADDR(SDValue Addr, SDValue &Base, SDValue &Offset);
109+
SDValue selectPossiblyImm(SDValue V);
109110

110111
bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const;
111112

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 38 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,18 @@ class OneUse2<SDPatternOperator operator>
184184
class fpimm_pos_inf<ValueType vt>
185185
: FPImmLeaf<vt, [{ return Imm.isPosInfinity(); }]>;
186186

187+
188+
189+
// Operands which can hold a Register or an Immediate.
190+
//
191+
// Unfortunately, since most register classes can hold multiple types, we must
192+
// use the 'Any' type for these.
193+
194+
def RI1 : Operand<i1>;
195+
def RI16 : Operand<Any>;
196+
def RI32 : Operand<Any>;
197+
def RI64 : Operand<Any>;
198+
187199
// Utility class to wrap up information about a register and DAG type for more
188200
// convenient iteration and parameterization
189201
class RegTyInfo<ValueType ty, NVPTXRegClass rc, Operand imm, SDNode imm_node,
@@ -2338,19 +2350,20 @@ let mayLoad=1, hasSideEffects=0 in {
23382350
def LD_i64 : LD<B64>;
23392351
}
23402352

2341-
class ST<NVPTXRegClass regclass>
2353+
class ST<DAGOperand O>
23422354
: NVPTXInst<
23432355
(outs),
2344-
(ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
2345-
LdStCode:$Sign, i32imm:$toWidth, ADDR:$addr),
2346-
"st${sem:sem}${scope:scope}${addsp:addsp}.${Sign:sign}$toWidth"
2356+
(ins O:$src,
2357+
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, i32imm:$toWidth,
2358+
ADDR:$addr),
2359+
"st${sem:sem}${scope:scope}${addsp:addsp}.b$toWidth"
23472360
" \t[$addr], $src;", []>;
23482361

23492362
let mayStore=1, hasSideEffects=0 in {
2350-
def ST_i8 : ST<B16>;
2351-
def ST_i16 : ST<B16>;
2352-
def ST_i32 : ST<B32>;
2353-
def ST_i64 : ST<B64>;
2363+
def ST_i8 : ST<RI16>;
2364+
def ST_i16 : ST<RI16>;
2365+
def ST_i32 : ST<RI32>;
2366+
def ST_i64 : ST<RI64>;
23542367
}
23552368

23562369
// The following is used only in and after vector elementizations. Vector
@@ -2386,38 +2399,38 @@ let mayLoad=1, hasSideEffects=0 in {
23862399
defm LDV_i64 : LD_VEC<B64>;
23872400
}
23882401

2389-
multiclass ST_VEC<NVPTXRegClass regclass, bit support_v8 = false> {
2402+
multiclass ST_VEC<DAGOperand O, bit support_v8 = false> {
23902403
def _v2 : NVPTXInst<
23912404
(outs),
2392-
(ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope,
2393-
LdStCode:$addsp, LdStCode:$Sign, i32imm:$fromWidth,
2405+
(ins O:$src1, O:$src2,
2406+
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, i32imm:$fromWidth,
23942407
ADDR:$addr),
2395-
"st${sem:sem}${scope:scope}${addsp:addsp}.v2.${Sign:sign}$fromWidth "
2408+
"st${sem:sem}${scope:scope}${addsp:addsp}.v2.b$fromWidth "
23962409
"\t[$addr], {{$src1, $src2}};", []>;
23972410
def _v4 : NVPTXInst<
23982411
(outs),
2399-
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2400-
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
2401-
LdStCode:$Sign, i32imm:$fromWidth, ADDR:$addr),
2402-
"st${sem:sem}${scope:scope}${addsp:addsp}.v4.${Sign:sign}$fromWidth "
2412+
(ins O:$src1, O:$src2, O:$src3, O:$src4,
2413+
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, i32imm:$fromWidth,
2414+
ADDR:$addr),
2415+
"st${sem:sem}${scope:scope}${addsp:addsp}.v4.b$fromWidth "
24032416
"\t[$addr], {{$src1, $src2, $src3, $src4}};", []>;
24042417
if support_v8 then
24052418
def _v8 : NVPTXInst<
24062419
(outs),
2407-
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2408-
regclass:$src5, regclass:$src6, regclass:$src7, regclass:$src8,
2409-
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Sign,
2410-
i32imm:$fromWidth, ADDR:$addr),
2411-
"st${sem:sem}${scope:scope}${addsp:addsp}.v8.${Sign:sign}$fromWidth "
2420+
(ins O:$src1, O:$src2, O:$src3, O:$src4,
2421+
O:$src5, O:$src6, O:$src7, O:$src8,
2422+
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, i32imm:$fromWidth,
2423+
ADDR:$addr),
2424+
"st${sem:sem}${scope:scope}${addsp:addsp}.v8.b$fromWidth "
24122425
"\t[$addr], "
24132426
"{{$src1, $src2, $src3, $src4, $src5, $src6, $src7, $src8}};", []>;
24142427
}
24152428

24162429
let mayStore=1, hasSideEffects=0 in {
2417-
defm STV_i8 : ST_VEC<B16>;
2418-
defm STV_i16 : ST_VEC<B16>;
2419-
defm STV_i32 : ST_VEC<B32, support_v8 = true>;
2420-
defm STV_i64 : ST_VEC<B64>;
2430+
defm STV_i8 : ST_VEC<RI16>;
2431+
defm STV_i16 : ST_VEC<RI16>;
2432+
defm STV_i32 : ST_VEC<RI32, support_v8 = true>;
2433+
defm STV_i64 : ST_VEC<RI64>;
24212434
}
24222435

24232436
//---- Conversion ----

llvm/test/CodeGen/NVPTX/access-non-generic.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,7 @@ define void @nested_const_expr() {
107107
; PTX-LABEL: nested_const_expr(
108108
; store 1 to bitcast(gep(addrspacecast(array), 0, 1))
109109
store i32 1, ptr getelementptr ([10 x float], ptr addrspacecast (ptr addrspace(3) @array to ptr), i64 0, i64 1), align 4
110-
; PTX: mov.b32 %r1, 1;
111-
; PTX-NEXT: st.shared.b32 [array+4], %r1;
110+
; PTX: st.shared.b32 [array+4], 1;
112111
ret void
113112
}
114113

llvm/test/CodeGen/NVPTX/chain-different-as.ll

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,13 @@
44
define i64 @test() nounwind readnone {
55
; CHECK-LABEL: test(
66
; CHECK: {
7-
; CHECK-NEXT: .reg .b64 %rd<4>;
7+
; CHECK-NEXT: .reg .b64 %rd<3>;
88
; CHECK-EMPTY:
99
; CHECK-NEXT: // %bb.0:
1010
; CHECK-NEXT: mov.b64 %rd1, 1;
11-
; CHECK-NEXT: mov.b64 %rd2, 42;
12-
; CHECK-NEXT: st.b64 [%rd1], %rd2;
13-
; CHECK-NEXT: ld.global.b64 %rd3, [%rd1];
14-
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
11+
; CHECK-NEXT: st.b64 [%rd1], 42;
12+
; CHECK-NEXT: ld.global.b64 %rd2, [%rd1];
13+
; CHECK-NEXT: st.param.b64 [func_retval0], %rd2;
1514
; CHECK-NEXT: ret;
1615
%addr0 = inttoptr i64 1 to ptr
1716
%addr1 = inttoptr i64 1 to ptr addrspace(1)

llvm/test/CodeGen/NVPTX/demote-vars.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,7 @@ define void @define_private_global(i64 %val) {
6767
; Also check that the if-then is still here, otherwise we may not be testing
6868
; the "more-than-one-use" part.
6969
; CHECK: st.shared.b64 [private_global_used_more_than_once_in_same_fct],
70-
; CHECK: mov.b64 %[[VAR:.*]], 25
71-
; CHECK: st.shared.b64 [private_global_used_more_than_once_in_same_fct], %[[VAR]]
70+
; CHECK: st.shared.b64 [private_global_used_more_than_once_in_same_fct], 25
7271
define void @define_private_global_more_than_one_use(i64 %val, i1 %cond) {
7372
store i64 %val, ptr addrspace(3) @private_global_used_more_than_once_in_same_fct
7473
br i1 %cond, label %then, label %end

llvm/test/CodeGen/NVPTX/i1-load-lower.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,13 @@ target triple = "nvptx-nvidia-cuda"
1010
define void @foo() {
1111
; CHECK-LABEL: foo(
1212
; CHECK: .reg .pred %p<2>;
13-
; CHECK: .reg .b16 %rs<4>;
13+
; CHECK: .reg .b16 %rs<3>;
1414
; CHECK-EMPTY:
1515
; CHECK: ld.global.b8 %rs1, [i1g];
1616
; CHECK: and.b16 %rs2, %rs1, 1;
1717
; CHECK: setp.ne.b16 %p1, %rs2, 0;
1818
; CHECK: @%p1 bra $L__BB0_2;
19-
; CHECK: mov.b16 %rs3, 1;
20-
; CHECK: st.global.b8 [i1g], %rs3;
19+
; CHECK: st.global.b8 [i1g], 1;
2120
; CHECK: ret;
2221
%tmp = load i1, ptr addrspace(1) @i1g, align 2
2322
br i1 %tmp, label %if.end, label %if.then

llvm/test/CodeGen/NVPTX/i128-ld-st.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ define i128 @foo(ptr %p, ptr %o) {
1313
; CHECK-NEXT: ld.param.b64 %rd2, [foo_param_1];
1414
; CHECK-NEXT: ld.param.b64 %rd1, [foo_param_0];
1515
; CHECK-NEXT: ld.b8 %rd3, [%rd1];
16+
; CHECK-NEXT: st.v2.b64 [%rd2], {%rd3, 0};
1617
; CHECK-NEXT: mov.b64 %rd4, 0;
17-
; CHECK-NEXT: st.v2.b64 [%rd2], {%rd3, %rd4};
1818
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd3, %rd4};
1919
; CHECK-NEXT: ret;
2020
%c = load i8, ptr %p, align 1

llvm/test/CodeGen/NVPTX/jump-table.ll

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ define void @foo(i32 %i) {
1010
; CHECK-LABEL: foo(
1111
; CHECK: {
1212
; CHECK-NEXT: .reg .pred %p<2>;
13-
; CHECK-NEXT: .reg .b32 %r<7>;
13+
; CHECK-NEXT: .reg .b32 %r<3>;
1414
; CHECK-EMPTY:
1515
; CHECK-NEXT: // %bb.0: // %entry
1616
; CHECK-NEXT: ld.param.b32 %r2, [foo_param_0];
@@ -24,20 +24,16 @@ define void @foo(i32 %i) {
2424
; CHECK-NEXT: $L__BB0_5;
2525
; CHECK-NEXT: brx.idx %r2, $L_brx_0;
2626
; CHECK-NEXT: $L__BB0_2: // %case0
27-
; CHECK-NEXT: mov.b32 %r6, 0;
28-
; CHECK-NEXT: st.global.b32 [out], %r6;
27+
; CHECK-NEXT: st.global.b32 [out], 0;
2928
; CHECK-NEXT: bra.uni $L__BB0_6;
3029
; CHECK-NEXT: $L__BB0_4: // %case2
31-
; CHECK-NEXT: mov.b32 %r4, 2;
32-
; CHECK-NEXT: st.global.b32 [out], %r4;
30+
; CHECK-NEXT: st.global.b32 [out], 2;
3331
; CHECK-NEXT: bra.uni $L__BB0_6;
3432
; CHECK-NEXT: $L__BB0_5: // %case3
35-
; CHECK-NEXT: mov.b32 %r3, 3;
36-
; CHECK-NEXT: st.global.b32 [out], %r3;
33+
; CHECK-NEXT: st.global.b32 [out], 3;
3734
; CHECK-NEXT: bra.uni $L__BB0_6;
3835
; CHECK-NEXT: $L__BB0_3: // %case1
39-
; CHECK-NEXT: mov.b32 %r5, 1;
40-
; CHECK-NEXT: st.global.b32 [out], %r5;
36+
; CHECK-NEXT: st.global.b32 [out], 1;
4137
; CHECK-NEXT: $L__BB0_6: // %end
4238
; CHECK-NEXT: ret;
4339
entry:

llvm/test/CodeGen/NVPTX/local-stack-frame.ll

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ define void @foo4() {
144144
; PTX32-NEXT: .local .align 4 .b8 __local_depot3[8];
145145
; PTX32-NEXT: .reg .b32 %SP;
146146
; PTX32-NEXT: .reg .b32 %SPL;
147-
; PTX32-NEXT: .reg .b32 %r<6>;
147+
; PTX32-NEXT: .reg .b32 %r<5>;
148148
; PTX32-EMPTY:
149149
; PTX32-NEXT: // %bb.0:
150150
; PTX32-NEXT: mov.b32 %SPL, __local_depot3;
@@ -153,9 +153,8 @@ define void @foo4() {
153153
; PTX32-NEXT: add.u32 %r2, %SPL, 0;
154154
; PTX32-NEXT: add.u32 %r3, %SP, 4;
155155
; PTX32-NEXT: add.u32 %r4, %SPL, 4;
156-
; PTX32-NEXT: mov.b32 %r5, 0;
157-
; PTX32-NEXT: st.local.b32 [%r2], %r5;
158-
; PTX32-NEXT: st.local.b32 [%r4], %r5;
156+
; PTX32-NEXT: st.local.b32 [%r2], 0;
157+
; PTX32-NEXT: st.local.b32 [%r4], 0;
159158
; PTX32-NEXT: { // callseq 1, 0
160159
; PTX32-NEXT: .param .b32 param0;
161160
; PTX32-NEXT: st.param.b32 [param0], %r1;
@@ -181,7 +180,6 @@ define void @foo4() {
181180
; PTX64-NEXT: .local .align 4 .b8 __local_depot3[8];
182181
; PTX64-NEXT: .reg .b64 %SP;
183182
; PTX64-NEXT: .reg .b64 %SPL;
184-
; PTX64-NEXT: .reg .b32 %r<2>;
185183
; PTX64-NEXT: .reg .b64 %rd<5>;
186184
; PTX64-EMPTY:
187185
; PTX64-NEXT: // %bb.0:
@@ -191,9 +189,8 @@ define void @foo4() {
191189
; PTX64-NEXT: add.u64 %rd2, %SPL, 0;
192190
; PTX64-NEXT: add.u64 %rd3, %SP, 4;
193191
; PTX64-NEXT: add.u64 %rd4, %SPL, 4;
194-
; PTX64-NEXT: mov.b32 %r1, 0;
195-
; PTX64-NEXT: st.local.b32 [%rd2], %r1;
196-
; PTX64-NEXT: st.local.b32 [%rd4], %r1;
192+
; PTX64-NEXT: st.local.b32 [%rd2], 0;
193+
; PTX64-NEXT: st.local.b32 [%rd4], 0;
197194
; PTX64-NEXT: { // callseq 1, 0
198195
; PTX64-NEXT: .param .b64 param0;
199196
; PTX64-NEXT: st.param.b64 [param0], %rd1;

llvm/test/CodeGen/NVPTX/lower-alloca.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ define ptx_kernel void @kernel() {
1515
; LOWERALLOCAONLY: [[V1:%.*]] = addrspacecast ptr %A to ptr addrspace(5)
1616
; LOWERALLOCAONLY: [[V2:%.*]] = addrspacecast ptr addrspace(5) [[V1]] to ptr
1717
; LOWERALLOCAONLY: store i32 0, ptr [[V2]], align 4
18-
; PTX: st.local.b32 [{{%rd[0-9]+}}], {{%r[0-9]+}}
18+
; PTX: st.local.b32 [{{%rd[0-9]+}}], 0
1919
store i32 0, ptr %A
2020
call void @callee(ptr %A)
2121
ret void
@@ -26,7 +26,7 @@ define void @alloca_in_explicit_local_as() {
2626
; PTX-LABEL: .visible .func alloca_in_explicit_local_as(
2727
%A = alloca i32, addrspace(5)
2828
; CHECK: store i32 0, ptr addrspace(5) {{%.+}}
29-
; PTX: st.local.b32 [%SP], {{%r[0-9]+}}
29+
; PTX: st.local.b32 [%SP], 0
3030
; LOWERALLOCAONLY: [[V1:%.*]] = addrspacecast ptr addrspace(5) %A to ptr
3131
; LOWERALLOCAONLY: store i32 0, ptr [[V1]], align 4
3232
store i32 0, ptr addrspace(5) %A

llvm/test/CodeGen/NVPTX/lower-byval-args.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -658,7 +658,7 @@ define ptx_kernel void @test_select_write(ptr byval(i32) align 4 %input1, ptr by
658658
; PTX-NEXT: .reg .b64 %SPL;
659659
; PTX-NEXT: .reg .pred %p<2>;
660660
; PTX-NEXT: .reg .b16 %rs<3>;
661-
; PTX-NEXT: .reg .b32 %r<4>;
661+
; PTX-NEXT: .reg .b32 %r<3>;
662662
; PTX-NEXT: .reg .b64 %rd<6>;
663663
; PTX-EMPTY:
664664
; PTX-NEXT: // %bb.0: // %bb
@@ -674,8 +674,7 @@ define ptx_kernel void @test_select_write(ptr byval(i32) align 4 %input1, ptr by
674674
; PTX-NEXT: add.u64 %rd2, %SPL, 4;
675675
; PTX-NEXT: add.u64 %rd4, %SPL, 0;
676676
; PTX-NEXT: selp.b64 %rd5, %rd2, %rd4, %p1;
677-
; PTX-NEXT: mov.b32 %r3, 1;
678-
; PTX-NEXT: st.local.b32 [%rd5], %r3;
677+
; PTX-NEXT: st.local.b32 [%rd5], 1;
679678
; PTX-NEXT: ret;
680679
bb:
681680
%ptrnew = select i1 %cond, ptr %input1, ptr %input2
@@ -838,7 +837,7 @@ define ptx_kernel void @test_phi_write(ptr byval(%struct.S) align 4 %input1, ptr
838837
; PTX-NEXT: .reg .b64 %SPL;
839838
; PTX-NEXT: .reg .pred %p<2>;
840839
; PTX-NEXT: .reg .b16 %rs<3>;
841-
; PTX-NEXT: .reg .b32 %r<4>;
840+
; PTX-NEXT: .reg .b32 %r<3>;
842841
; PTX-NEXT: .reg .b64 %rd<7>;
843842
; PTX-EMPTY:
844843
; PTX-NEXT: // %bb.0: // %bb
@@ -857,8 +856,7 @@ define ptx_kernel void @test_phi_write(ptr byval(%struct.S) align 4 %input1, ptr
857856
; PTX-NEXT: // %bb.1: // %second
858857
; PTX-NEXT: mov.b64 %rd6, %rd1;
859858
; PTX-NEXT: $L__BB14_2: // %merge
860-
; PTX-NEXT: mov.b32 %r3, 1;
861-
; PTX-NEXT: st.local.b32 [%rd6], %r3;
859+
; PTX-NEXT: st.local.b32 [%rd6], 1;
862860
; PTX-NEXT: ret;
863861
bb:
864862
br i1 %cond, label %first, label %second

0 commit comments

Comments
 (0)