-
Notifications
You must be signed in to change notification settings - Fork 10.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[LoongArch] Select {DIV,MOD}.{W,WU} instruction to eliminate explicit sign extension #92205
Conversation
@llvm/pr-subscribers-backend-loongarch Author: hev (heiher) ChangesFull diff: https://github.com/llvm/llvm-project/pull/92205.diff 2 Files Affected:
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index f56f8f7e1179c..d3b577c48cb5f 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -1110,9 +1110,13 @@ def : PatGprImm_32<add, ADDI_W, simm12>;
def : PatGprGpr<sub, SUB_D>;
def : PatGprGpr_32<sub, SUB_W>;
def : PatGprGpr<sdiv, DIV_D>;
+def : PatGprGpr_32<sdiv, DIV_W>;
def : PatGprGpr<udiv, DIV_DU>;
+def : PatGprGpr_32<udiv, DIV_WU>;
def : PatGprGpr<srem, MOD_D>;
+def : PatGprGpr_32<srem, MOD_W>;
def : PatGprGpr<urem, MOD_DU>;
+def : PatGprGpr_32<urem, MOD_WU>;
def : PatGprGpr<rotr, ROTR_D>;
def : PatGprGpr<loongarch_rotr_w, ROTR_W>;
def : PatGprGpr_32<rotr, ROTR_W>;
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
index 2064c398948fe..aa4492b19e3ff 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
@@ -191,8 +191,7 @@ define signext i32 @sdiv_si32_ui32_ui32(i32 %a, i32 %b) {
; LA64: # %bb.0: # %entry
; LA64-NEXT: addi.w $a1, $a1, 0
; LA64-NEXT: addi.w $a0, $a0, 0
-; LA64-NEXT: div.d $a0, $a0, $a1
-; LA64-NEXT: addi.w $a0, $a0, 0
+; LA64-NEXT: div.w $a0, $a0, $a1
; LA64-NEXT: ret
;
; LA32-TRAP-LABEL: sdiv_si32_ui32_ui32:
@@ -208,12 +207,11 @@ define signext i32 @sdiv_si32_ui32_ui32(i32 %a, i32 %b) {
; LA64-TRAP: # %bb.0: # %entry
; LA64-TRAP-NEXT: addi.w $a1, $a1, 0
; LA64-TRAP-NEXT: addi.w $a0, $a0, 0
-; LA64-TRAP-NEXT: div.d $a0, $a0, $a1
+; LA64-TRAP-NEXT: div.w $a0, $a0, $a1
; LA64-TRAP-NEXT: bnez $a1, .LBB5_2
; LA64-TRAP-NEXT: # %bb.1: # %entry
; LA64-TRAP-NEXT: break 7
; LA64-TRAP-NEXT: .LBB5_2: # %entry
-; LA64-TRAP-NEXT: addi.w $a0, $a0, 0
; LA64-TRAP-NEXT: ret
entry:
%r = sdiv i32 %a, %b
@@ -228,8 +226,7 @@ define signext i32 @sdiv_si32_si32_si32(i32 signext %a, i32 signext %b) {
;
; LA64-LABEL: sdiv_si32_si32_si32:
; LA64: # %bb.0: # %entry
-; LA64-NEXT: div.d $a0, $a0, $a1
-; LA64-NEXT: addi.w $a0, $a0, 0
+; LA64-NEXT: div.w $a0, $a0, $a1
; LA64-NEXT: ret
;
; LA32-TRAP-LABEL: sdiv_si32_si32_si32:
@@ -243,12 +240,11 @@ define signext i32 @sdiv_si32_si32_si32(i32 signext %a, i32 signext %b) {
;
; LA64-TRAP-LABEL: sdiv_si32_si32_si32:
; LA64-TRAP: # %bb.0: # %entry
-; LA64-TRAP-NEXT: div.d $a0, $a0, $a1
+; LA64-TRAP-NEXT: div.w $a0, $a0, $a1
; LA64-TRAP-NEXT: bnez $a1, .LBB6_2
; LA64-TRAP-NEXT: # %bb.1: # %entry
; LA64-TRAP-NEXT: break 7
; LA64-TRAP-NEXT: .LBB6_2: # %entry
-; LA64-TRAP-NEXT: addi.w $a0, $a0, 0
; LA64-TRAP-NEXT: ret
entry:
%r = sdiv i32 %a, %b
@@ -483,8 +479,7 @@ define signext i32 @udiv_si32_ui32_ui32(i32 %a, i32 %b) {
; LA64: # %bb.0: # %entry
; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0
; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0
-; LA64-NEXT: div.du $a0, $a0, $a1
-; LA64-NEXT: addi.w $a0, $a0, 0
+; LA64-NEXT: div.wu $a0, $a0, $a1
; LA64-NEXT: ret
;
; LA32-TRAP-LABEL: udiv_si32_ui32_ui32:
@@ -500,12 +495,11 @@ define signext i32 @udiv_si32_ui32_ui32(i32 %a, i32 %b) {
; LA64-TRAP: # %bb.0: # %entry
; LA64-TRAP-NEXT: bstrpick.d $a1, $a1, 31, 0
; LA64-TRAP-NEXT: bstrpick.d $a0, $a0, 31, 0
-; LA64-TRAP-NEXT: div.du $a0, $a0, $a1
+; LA64-TRAP-NEXT: div.wu $a0, $a0, $a1
; LA64-TRAP-NEXT: bnez $a1, .LBB13_2
; LA64-TRAP-NEXT: # %bb.1: # %entry
; LA64-TRAP-NEXT: break 7
; LA64-TRAP-NEXT: .LBB13_2: # %entry
-; LA64-TRAP-NEXT: addi.w $a0, $a0, 0
; LA64-TRAP-NEXT: ret
entry:
%r = udiv i32 %a, %b
@@ -522,8 +516,7 @@ define signext i32 @udiv_si32_si32_si32(i32 signext %a, i32 signext %b) {
; LA64: # %bb.0: # %entry
; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0
; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0
-; LA64-NEXT: div.du $a0, $a0, $a1
-; LA64-NEXT: addi.w $a0, $a0, 0
+; LA64-NEXT: div.wu $a0, $a0, $a1
; LA64-NEXT: ret
;
; LA32-TRAP-LABEL: udiv_si32_si32_si32:
@@ -539,12 +532,11 @@ define signext i32 @udiv_si32_si32_si32(i32 signext %a, i32 signext %b) {
; LA64-TRAP: # %bb.0: # %entry
; LA64-TRAP-NEXT: bstrpick.d $a1, $a1, 31, 0
; LA64-TRAP-NEXT: bstrpick.d $a0, $a0, 31, 0
-; LA64-TRAP-NEXT: div.du $a0, $a0, $a1
+; LA64-TRAP-NEXT: div.wu $a0, $a0, $a1
; LA64-TRAP-NEXT: bnez $a1, .LBB14_2
; LA64-TRAP-NEXT: # %bb.1: # %entry
; LA64-TRAP-NEXT: break 7
; LA64-TRAP-NEXT: .LBB14_2: # %entry
-; LA64-TRAP-NEXT: addi.w $a0, $a0, 0
; LA64-TRAP-NEXT: ret
entry:
%r = udiv i32 %a, %b
@@ -1071,8 +1063,7 @@ define signext i32 @urem_si32_ui32_ui32(i32 %a, i32 %b) {
; LA64: # %bb.0: # %entry
; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0
; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0
-; LA64-NEXT: mod.du $a0, $a0, $a1
-; LA64-NEXT: addi.w $a0, $a0, 0
+; LA64-NEXT: mod.wu $a0, $a0, $a1
; LA64-NEXT: ret
;
; LA32-TRAP-LABEL: urem_si32_ui32_ui32:
@@ -1088,12 +1079,11 @@ define signext i32 @urem_si32_ui32_ui32(i32 %a, i32 %b) {
; LA64-TRAP: # %bb.0: # %entry
; LA64-TRAP-NEXT: bstrpick.d $a1, $a1, 31, 0
; LA64-TRAP-NEXT: bstrpick.d $a0, $a0, 31, 0
-; LA64-TRAP-NEXT: mod.du $a0, $a0, $a1
+; LA64-TRAP-NEXT: mod.wu $a0, $a0, $a1
; LA64-TRAP-NEXT: bnez $a1, .LBB29_2
; LA64-TRAP-NEXT: # %bb.1: # %entry
; LA64-TRAP-NEXT: break 7
; LA64-TRAP-NEXT: .LBB29_2: # %entry
-; LA64-TRAP-NEXT: addi.w $a0, $a0, 0
; LA64-TRAP-NEXT: ret
entry:
%r = urem i32 %a, %b
@@ -1110,8 +1100,7 @@ define signext i32 @urem_si32_si32_si32(i32 signext %a, i32 signext %b) {
; LA64: # %bb.0: # %entry
; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0
; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0
-; LA64-NEXT: mod.du $a0, $a0, $a1
-; LA64-NEXT: addi.w $a0, $a0, 0
+; LA64-NEXT: mod.wu $a0, $a0, $a1
; LA64-NEXT: ret
;
; LA32-TRAP-LABEL: urem_si32_si32_si32:
@@ -1127,12 +1116,11 @@ define signext i32 @urem_si32_si32_si32(i32 signext %a, i32 signext %b) {
; LA64-TRAP: # %bb.0: # %entry
; LA64-TRAP-NEXT: bstrpick.d $a1, $a1, 31, 0
; LA64-TRAP-NEXT: bstrpick.d $a0, $a0, 31, 0
-; LA64-TRAP-NEXT: mod.du $a0, $a0, $a1
+; LA64-TRAP-NEXT: mod.wu $a0, $a0, $a1
; LA64-TRAP-NEXT: bnez $a1, .LBB30_2
; LA64-TRAP-NEXT: # %bb.1: # %entry
; LA64-TRAP-NEXT: break 7
; LA64-TRAP-NEXT: .LBB30_2: # %entry
-; LA64-TRAP-NEXT: addi.w $a0, $a0, 0
; LA64-TRAP-NEXT: ret
entry:
%r = urem i32 %a, %b
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems this case also benefits from this change:
define signext i32 @test(i32 signext %a, i32 signext %b) {
entry:
%r = sdiv i32 %a, %b
ret i32 %r
}
Before:
div.d $a0, $a0, $a1
addi.w $a0, $a0, 0
ret
After:
div.w $a0, $a0, $a1
ret
Could you add this test?
Sure. It has been added to a test called Thanks. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We suppose there is a description error in ISA Manual (Volume I) - V1.10:
在 LoongArch64 位兼容的机器上,执行 DIV.W[U]和 MOD.W[U]指令时,如果通用寄存器 rj 或 rk 中的
数值超过了 32 位有符号数/无符号数的数值范围,则指令执行结果可以为无意义的任意值。
In V1.03, it is:
在 LoongArch64 位兼容的机器上,执行 DIV.W[U]和 MOD.W[U]指令时,如果通用寄存器 rj 或 rk 中的
数值超过了 32 位有符号数的数值范围,则指令执行结果可以为无意义的任意值。
The difference is /无符号数
. According the hardware behavior and gcc's output, it is incorrect which means this PR is right. I will accept.
No description provided.