From 9340008328a2341c024638c187ea0548627c26f1 Mon Sep 17 00:00:00 2001 From: Patrick O'Neill Date: Tue, 14 May 2024 16:41:24 -0700 Subject: [PATCH 1/3] [DAGCombiner] Mark vectors as not AllAddOne/AllSubOne on undef or type mismatch Fixes #92193. --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 12 +++++- .../RISCV/dag-combine-vselect-datatype.ll | 42 +++++++++++++++++++ 2 files changed, 52 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/dag-combine-vselect-datatype.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index a044b6dc4838a..53519274f9fa6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12140,10 +12140,16 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) { for (unsigned i = 0; i != Elts; ++i) { SDValue N1Elt = N1.getOperand(i); SDValue N2Elt = N2.getOperand(i); - if (N1Elt.isUndef() || N2Elt.isUndef()) + if (N1Elt.isUndef() || N2Elt.isUndef()) { + AllAddOne = false; + AllSubOne = false; continue; - if (N1Elt.getValueType() != N2Elt.getValueType()) + } + if (N1Elt.getValueType() != N2Elt.getValueType()) { + AllAddOne = false; + AllSubOne = false; continue; + } const APInt &C1 = N1Elt->getAsAPIntVal(); const APInt &C2 = N2Elt->getAsAPIntVal(); @@ -12152,6 +12158,8 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) { if (C1 != C2 - 1) AllSubOne = false; } + assert(!(AllAddOne && AllSubOne) && + "Y=X+1 and Y=X-1 cannot be true for any given X and Y."); // Further simplifications for the extra-special cases where the constants are // all 0 or all -1 should be implemented as folds of these patterns. diff --git a/llvm/test/CodeGen/RISCV/dag-combine-vselect-datatype.ll b/llvm/test/CodeGen/RISCV/dag-combine-vselect-datatype.ll new file mode 100644 index 0000000000000..3a1dbd543546a --- /dev/null +++ b/llvm/test/CodeGen/RISCV/dag-combine-vselect-datatype.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -O1 < %s | FileCheck %s + +; Dag-combine used to improperly combine a vector vselect of 0 and 5 into +; 5 + condition(0/1) because one of the two args was transformed from an i32->i64. + +target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128" +target triple = "riscv64-unknown-linux-gnu" + +@g.var.0 = global i8 5 +@g.arr.0 = global i32 0 + +define i8 @foo() { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a0, %hi(g.arr.0) +; CHECK-NEXT: li a1, 4 +; CHECK-NEXT: sw a1, %lo(g.arr.0)(a0) +; CHECK-NEXT: li a0, 0 +; CHECK-NEXT: ret +entry: + store i32 4, ptr @g.arr.0, align 32 + + %g.var.0.val = load i8, ptr @g.var.0, align 1 + %loaded.arr = insertelement <4 x i8> , i8 %g.var.0.val, i64 0 + + %g.arr.elem.0 = load i32, ptr @g.arr.0, align 32 + %insert.0 = insertelement <4 x i32> zeroinitializer, i32 %g.arr.elem.0, i64 0 + %cmp.0 = icmp ult <4 x i32> %insert.0, + + %all.g.arr.elem.0 = shufflevector <4 x i32> %insert.0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer + %or.0 = or <4 x i32> %all.g.arr.elem.0, + + %sel.0 = select <4 x i1> %cmp.0, <4 x i32> zeroinitializer, <4 x i32> %or.0 + + %trunc.0 = trunc <4 x i32> %sel.0 to <4 x i8> + + %mul.0 = mul <4 x i8> %loaded.arr, %trunc.0 + %reduced.mul.0 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> %mul.0) + + ret i8 %reduced.mul.0 +} From 064a0840f0150efd89a9902e679d745fb859aaa7 Mon Sep 17 00:00:00 2001 From: Patrick O'Neill Date: Tue, 14 May 2024 19:26:03 -0700 Subject: [PATCH 2/3] fixup! [DAGCombiner] Mark vectors as not AllAddOne/AllSubOne on undef or type mismatch --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 53519274f9fa6..2b181cd3ab1db 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12140,15 +12140,12 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) { for (unsigned i = 0; i != Elts; ++i) { SDValue N1Elt = N1.getOperand(i); SDValue N2Elt = N2.getOperand(i); - if (N1Elt.isUndef() || N2Elt.isUndef()) { - AllAddOne = false; - AllSubOne = false; + if (N1Elt.isUndef() || N2Elt.isUndef()) continue; - } if (N1Elt.getValueType() != N2Elt.getValueType()) { AllAddOne = false; AllSubOne = false; - continue; + break; } const APInt &C1 = N1Elt->getAsAPIntVal(); @@ -12158,8 +12155,6 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) { if (C1 != C2 - 1) AllSubOne = false; } - assert(!(AllAddOne && AllSubOne) && - "Y=X+1 and Y=X-1 cannot be true for any given X and Y."); // Further simplifications for the extra-special cases where the constants are // all 0 or all -1 should be implemented as folds of these patterns. From 46104b694414a048c8462a83ac764e8f78a7bd68 Mon Sep 17 00:00:00 2001 From: Patrick O'Neill Date: Wed, 15 May 2024 10:52:27 -0700 Subject: [PATCH 3/3] fixup! [DAGCombiner] Mark vectors as not AllAddOne/AllSubOne on undef or type mismatch --- .../RISCV/dag-combine-vselect-datatype.ll | 42 ------------------- llvm/test/CodeGen/RISCV/pr92193.ll | 21 ++++++++++ 2 files changed, 21 insertions(+), 42 deletions(-) delete mode 100644 llvm/test/CodeGen/RISCV/dag-combine-vselect-datatype.ll create mode 100644 llvm/test/CodeGen/RISCV/pr92193.ll diff --git a/llvm/test/CodeGen/RISCV/dag-combine-vselect-datatype.ll b/llvm/test/CodeGen/RISCV/dag-combine-vselect-datatype.ll deleted file mode 100644 index 3a1dbd543546a..0000000000000 --- a/llvm/test/CodeGen/RISCV/dag-combine-vselect-datatype.ll +++ /dev/null @@ -1,42 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc -O1 < %s | FileCheck %s - -; Dag-combine used to improperly combine a vector vselect of 0 and 5 into -; 5 + condition(0/1) because one of the two args was transformed from an i32->i64. - -target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128" -target triple = "riscv64-unknown-linux-gnu" - -@g.var.0 = global i8 5 -@g.arr.0 = global i32 0 - -define i8 @foo() { -; CHECK-LABEL: foo: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lui a0, %hi(g.arr.0) -; CHECK-NEXT: li a1, 4 -; CHECK-NEXT: sw a1, %lo(g.arr.0)(a0) -; CHECK-NEXT: li a0, 0 -; CHECK-NEXT: ret -entry: - store i32 4, ptr @g.arr.0, align 32 - - %g.var.0.val = load i8, ptr @g.var.0, align 1 - %loaded.arr = insertelement <4 x i8> , i8 %g.var.0.val, i64 0 - - %g.arr.elem.0 = load i32, ptr @g.arr.0, align 32 - %insert.0 = insertelement <4 x i32> zeroinitializer, i32 %g.arr.elem.0, i64 0 - %cmp.0 = icmp ult <4 x i32> %insert.0, - - %all.g.arr.elem.0 = shufflevector <4 x i32> %insert.0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer - %or.0 = or <4 x i32> %all.g.arr.elem.0, - - %sel.0 = select <4 x i1> %cmp.0, <4 x i32> zeroinitializer, <4 x i32> %or.0 - - %trunc.0 = trunc <4 x i32> %sel.0 to <4 x i8> - - %mul.0 = mul <4 x i8> %loaded.arr, %trunc.0 - %reduced.mul.0 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> %mul.0) - - ret i8 %reduced.mul.0 -} diff --git a/llvm/test/CodeGen/RISCV/pr92193.ll b/llvm/test/CodeGen/RISCV/pr92193.ll new file mode 100644 index 0000000000000..8c8398c4b45fa --- /dev/null +++ b/llvm/test/CodeGen/RISCV/pr92193.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=riscv64-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -mtriple=riscv32-unknown-linux-gnu < %s | FileCheck %s + +; Dag-combine used to improperly combine a vector vselect of 0 and 2 into +; 2 + condition(0/1) because one of the two args was transformed from an i32->i64. + +define i16 @foo() { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a0, 0 +; CHECK-NEXT: ret +entry: + %insert.0 = insertelement <4 x i16> zeroinitializer, i16 2, i64 0 + %all.two = shufflevector <4 x i16> %insert.0, <4 x i16> zeroinitializer, <4 x i32> zeroinitializer + %sel.0 = select <4 x i1> , <4 x i16> zeroinitializer, <4 x i16> %all.two + %mul.0 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> %sel.0) + ret i16 %mul.0 +} + +declare i16 @llvm.vector.reduce.mul.v4i32(<4 x i16>)