Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[LoongArch] Select {DIV,MOD}.{W,WU} instruction to eliminate explicit sign extension #92205

Merged
merged 1 commit into from
May 20, 2024

Conversation

heiher
Copy link
Member

@heiher heiher commented May 15, 2024

No description provided.

@llvmbot
Copy link
Collaborator

llvmbot commented May 15, 2024

@llvm/pr-subscribers-backend-loongarch

Author: hev (heiher)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/92205.diff

2 Files Affected:

  • (modified) llvm/lib/Target/LoongArch/LoongArchInstrInfo.td (+4)
  • (modified) llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll (+12-24)
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index f56f8f7e1179c..d3b577c48cb5f 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -1110,9 +1110,13 @@ def : PatGprImm_32<add, ADDI_W, simm12>;
 def : PatGprGpr<sub, SUB_D>;
 def : PatGprGpr_32<sub, SUB_W>;
 def : PatGprGpr<sdiv, DIV_D>;
+def : PatGprGpr_32<sdiv, DIV_W>;
 def : PatGprGpr<udiv, DIV_DU>;
+def : PatGprGpr_32<udiv, DIV_WU>;
 def : PatGprGpr<srem, MOD_D>;
+def : PatGprGpr_32<srem, MOD_W>;
 def : PatGprGpr<urem, MOD_DU>;
+def : PatGprGpr_32<urem, MOD_WU>;
 def : PatGprGpr<rotr, ROTR_D>;
 def : PatGprGpr<loongarch_rotr_w, ROTR_W>;
 def : PatGprGpr_32<rotr, ROTR_W>;
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
index 2064c398948fe..aa4492b19e3ff 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
@@ -191,8 +191,7 @@ define signext i32 @sdiv_si32_ui32_ui32(i32 %a, i32 %b) {
 ; LA64:       # %bb.0: # %entry
 ; LA64-NEXT:    addi.w $a1, $a1, 0
 ; LA64-NEXT:    addi.w $a0, $a0, 0
-; LA64-NEXT:    div.d $a0, $a0, $a1
-; LA64-NEXT:    addi.w $a0, $a0, 0
+; LA64-NEXT:    div.w $a0, $a0, $a1
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: sdiv_si32_ui32_ui32:
@@ -208,12 +207,11 @@ define signext i32 @sdiv_si32_ui32_ui32(i32 %a, i32 %b) {
 ; LA64-TRAP:       # %bb.0: # %entry
 ; LA64-TRAP-NEXT:    addi.w $a1, $a1, 0
 ; LA64-TRAP-NEXT:    addi.w $a0, $a0, 0
-; LA64-TRAP-NEXT:    div.d $a0, $a0, $a1
+; LA64-TRAP-NEXT:    div.w $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB5_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7
 ; LA64-TRAP-NEXT:  .LBB5_2: # %entry
-; LA64-TRAP-NEXT:    addi.w $a0, $a0, 0
 ; LA64-TRAP-NEXT:    ret
 entry:
   %r = sdiv i32 %a, %b
@@ -228,8 +226,7 @@ define signext i32 @sdiv_si32_si32_si32(i32 signext %a, i32 signext %b) {
 ;
 ; LA64-LABEL: sdiv_si32_si32_si32:
 ; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    div.d $a0, $a0, $a1
-; LA64-NEXT:    addi.w $a0, $a0, 0
+; LA64-NEXT:    div.w $a0, $a0, $a1
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: sdiv_si32_si32_si32:
@@ -243,12 +240,11 @@ define signext i32 @sdiv_si32_si32_si32(i32 signext %a, i32 signext %b) {
 ;
 ; LA64-TRAP-LABEL: sdiv_si32_si32_si32:
 ; LA64-TRAP:       # %bb.0: # %entry
-; LA64-TRAP-NEXT:    div.d $a0, $a0, $a1
+; LA64-TRAP-NEXT:    div.w $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB6_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7
 ; LA64-TRAP-NEXT:  .LBB6_2: # %entry
-; LA64-TRAP-NEXT:    addi.w $a0, $a0, 0
 ; LA64-TRAP-NEXT:    ret
 entry:
   %r = sdiv i32 %a, %b
@@ -483,8 +479,7 @@ define signext i32 @udiv_si32_ui32_ui32(i32 %a, i32 %b) {
 ; LA64:       # %bb.0: # %entry
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 31, 0
 ; LA64-NEXT:    bstrpick.d $a0, $a0, 31, 0
-; LA64-NEXT:    div.du $a0, $a0, $a1
-; LA64-NEXT:    addi.w $a0, $a0, 0
+; LA64-NEXT:    div.wu $a0, $a0, $a1
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: udiv_si32_ui32_ui32:
@@ -500,12 +495,11 @@ define signext i32 @udiv_si32_ui32_ui32(i32 %a, i32 %b) {
 ; LA64-TRAP:       # %bb.0: # %entry
 ; LA64-TRAP-NEXT:    bstrpick.d $a1, $a1, 31, 0
 ; LA64-TRAP-NEXT:    bstrpick.d $a0, $a0, 31, 0
-; LA64-TRAP-NEXT:    div.du $a0, $a0, $a1
+; LA64-TRAP-NEXT:    div.wu $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB13_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7
 ; LA64-TRAP-NEXT:  .LBB13_2: # %entry
-; LA64-TRAP-NEXT:    addi.w $a0, $a0, 0
 ; LA64-TRAP-NEXT:    ret
 entry:
   %r = udiv i32 %a, %b
@@ -522,8 +516,7 @@ define signext i32 @udiv_si32_si32_si32(i32 signext %a, i32 signext %b) {
 ; LA64:       # %bb.0: # %entry
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 31, 0
 ; LA64-NEXT:    bstrpick.d $a0, $a0, 31, 0
-; LA64-NEXT:    div.du $a0, $a0, $a1
-; LA64-NEXT:    addi.w $a0, $a0, 0
+; LA64-NEXT:    div.wu $a0, $a0, $a1
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: udiv_si32_si32_si32:
@@ -539,12 +532,11 @@ define signext i32 @udiv_si32_si32_si32(i32 signext %a, i32 signext %b) {
 ; LA64-TRAP:       # %bb.0: # %entry
 ; LA64-TRAP-NEXT:    bstrpick.d $a1, $a1, 31, 0
 ; LA64-TRAP-NEXT:    bstrpick.d $a0, $a0, 31, 0
-; LA64-TRAP-NEXT:    div.du $a0, $a0, $a1
+; LA64-TRAP-NEXT:    div.wu $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB14_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7
 ; LA64-TRAP-NEXT:  .LBB14_2: # %entry
-; LA64-TRAP-NEXT:    addi.w $a0, $a0, 0
 ; LA64-TRAP-NEXT:    ret
 entry:
   %r = udiv i32 %a, %b
@@ -1071,8 +1063,7 @@ define signext i32 @urem_si32_ui32_ui32(i32 %a, i32 %b) {
 ; LA64:       # %bb.0: # %entry
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 31, 0
 ; LA64-NEXT:    bstrpick.d $a0, $a0, 31, 0
-; LA64-NEXT:    mod.du $a0, $a0, $a1
-; LA64-NEXT:    addi.w $a0, $a0, 0
+; LA64-NEXT:    mod.wu $a0, $a0, $a1
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: urem_si32_ui32_ui32:
@@ -1088,12 +1079,11 @@ define signext i32 @urem_si32_ui32_ui32(i32 %a, i32 %b) {
 ; LA64-TRAP:       # %bb.0: # %entry
 ; LA64-TRAP-NEXT:    bstrpick.d $a1, $a1, 31, 0
 ; LA64-TRAP-NEXT:    bstrpick.d $a0, $a0, 31, 0
-; LA64-TRAP-NEXT:    mod.du $a0, $a0, $a1
+; LA64-TRAP-NEXT:    mod.wu $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB29_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7
 ; LA64-TRAP-NEXT:  .LBB29_2: # %entry
-; LA64-TRAP-NEXT:    addi.w $a0, $a0, 0
 ; LA64-TRAP-NEXT:    ret
 entry:
   %r = urem i32 %a, %b
@@ -1110,8 +1100,7 @@ define signext i32 @urem_si32_si32_si32(i32 signext %a, i32 signext %b) {
 ; LA64:       # %bb.0: # %entry
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 31, 0
 ; LA64-NEXT:    bstrpick.d $a0, $a0, 31, 0
-; LA64-NEXT:    mod.du $a0, $a0, $a1
-; LA64-NEXT:    addi.w $a0, $a0, 0
+; LA64-NEXT:    mod.wu $a0, $a0, $a1
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: urem_si32_si32_si32:
@@ -1127,12 +1116,11 @@ define signext i32 @urem_si32_si32_si32(i32 signext %a, i32 signext %b) {
 ; LA64-TRAP:       # %bb.0: # %entry
 ; LA64-TRAP-NEXT:    bstrpick.d $a1, $a1, 31, 0
 ; LA64-TRAP-NEXT:    bstrpick.d $a0, $a0, 31, 0
-; LA64-TRAP-NEXT:    mod.du $a0, $a0, $a1
+; LA64-TRAP-NEXT:    mod.wu $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB30_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7
 ; LA64-TRAP-NEXT:  .LBB30_2: # %entry
-; LA64-TRAP-NEXT:    addi.w $a0, $a0, 0
 ; LA64-TRAP-NEXT:    ret
 entry:
   %r = urem i32 %a, %b

@heiher heiher marked this pull request as draft May 15, 2024 03:22
@heiher heiher marked this pull request as ready for review May 15, 2024 08:58
Copy link
Contributor

@SixWeining SixWeining left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems this case also benefits from this change:

define signext i32 @test(i32 signext %a, i32 signext %b) {
entry:
  %r = sdiv i32 %a, %b
  ret i32 %r
}

Before:

        div.d   $a0, $a0, $a1
        addi.w  $a0, $a0, 0
        ret

After:

        div.w   $a0, $a0, $a1
        ret

Could you add this test?

@heiher
Copy link
Member Author

heiher commented May 16, 2024

Seems this case also benefits from this change:

define signext i32 @test(i32 signext %a, i32 signext %b) {
entry:
  %r = sdiv i32 %a, %b
  ret i32 %r
}

Before:

        div.d   $a0, $a0, $a1
        addi.w  $a0, $a0, 0
        ret

After:

        div.w   $a0, $a0, $a1
        ret

Could you add this test?

Sure. It has been added to a test called sdiv_si32_si32_si32:

https://github.com/llvm/llvm-project/pull/92205/files#diff-5c4e077e14446f5a434927f881ccabd5122c2508f6591e1643772ed168aba410L223-L256

Thanks.

Copy link
Contributor

@SixWeining SixWeining left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We suppose there is a description error in ISA Manual (Volume I) - V1.10:

在 LoongArch64 位兼容的机器上,执行 DIV.W[U]和 MOD.W[U]指令时,如果通用寄存器 rj 或 rk 中的
数值超过了 32 位有符号数/无符号数的数值范围,则指令执行结果可以为无意义的任意值。

In V1.03, it is:

在 LoongArch64 位兼容的机器上,执行 DIV.W[U]和 MOD.W[U]指令时,如果通用寄存器 rj 或 rk 中的
数值超过了 32 位有符号数的数值范围,则指令执行结果可以为无意义的任意值。

The difference is /无符号数. According the hardware behavior and gcc's output, it is incorrect which means this PR is right. I will accept.

@heiher heiher merged commit a027bea into llvm:main May 20, 2024
6 checks passed
@heiher heiher deleted the div-w branch May 20, 2024 07:24
VyacheslavLevytskyy pushed a commit to VyacheslavLevytskyy/llvm-project that referenced this pull request May 22, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

4 participants