-
Notifications
You must be signed in to change notification settings - Fork 15.5k
[RISCV] Fold (X & -4096) == 0 -> (X >> 12) == 0 #154233
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This is a more general form of the recently added isel pattern (seteq (i64 (and GPR:$rs1, 0x8000000000000000)), 0) -> (XORI (i64 (SRLI GPR:$rs1, 63)), 1) We can use a shift right for any AND mask that is a negated power of 2. But for every other constant we need to use seqz instead of xori. I don't think there is a benefit to xori over seqz as neither are compressible. We already do this transform from target independent code when the setcc constant is a non-zero subset of the AND mask that is not a legal icmp immediate. I don't believe any of these patterns comparing MSBs to 0 are canonical according to InstCombine. The canonical form is (X < 4096). I'm curious if these appear during SelectionDAG and if so, how. My goal here was just to remove the special case isel patterns.
|
@llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) ChangesThis is a more general form of the recently added isel pattern We can use a shift right for any AND mask that is a negated power We already do this transform from target independent code when the setcc I don't believe any of these patterns comparing MSBs to 0 are My goal here was just to remove the special case isel patterns. Full diff: https://github.com/llvm/llvm-project/pull/154233.diff 4 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index ce03818b49502..e8db56a716476 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16588,8 +16588,10 @@ combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC,
// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
// can become a sext.w instead of a shift pair.
-static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
+static SDValue performSETCCCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
const RISCVSubtarget &Subtarget) {
+ SelectionDAG &DAG = DCI.DAG;
SDLoc dl(N);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -16605,6 +16607,20 @@ static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
combineVectorSizedSetCCEquality(VT, N0, N1, Cond, dl, DAG, Subtarget))
return V;
+ // (X & -4096) == 0 -> (X >> 12) == 0 if the AND constant can't use ANDI.
+ if (DCI.isAfterLegalizeDAG() && isNullConstant(N1) &&
+ N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
+ isa<ConstantSDNode>(N0.getOperand(1))) {
+ const APInt &AndRHSC =
+ cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ if (!isInt<12>(AndRHSC.getSExtValue()) && AndRHSC.isNegatedPowerOf2()) {
+ unsigned ShiftBits = AndRHSC.countr_zero();
+ SDValue Shift = DAG.getNode(ISD::SRL, dl, VT, N0.getOperand(0),
+ DAG.getConstant(ShiftBits, dl, VT));
+ return DAG.getSetCC(dl, VT, Shift, N1, Cond);
+ }
+ }
+
if (OpVT != MVT::i64 || !Subtarget.is64Bit())
return SDValue();
@@ -20086,7 +20102,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return SDValue();
}
case ISD::SETCC:
- return performSETCCCombine(N, DAG, Subtarget);
+ return performSETCCCombine(N, DCI, Subtarget);
case ISD::SIGN_EXTEND_INREG:
return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
case ISD::ZERO_EXTEND:
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 8cbdf0ec7fa33..836a2b166feb9 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1695,13 +1695,9 @@ multiclass SelectCC_GPR_riirr<DAGOperand valty, DAGOperand imm> {
}
let Predicates = [IsRV32] in {
-def : Pat<(i32 (seteq (i32 (and GPR:$rs1, 0xffffffff80000000)), 0)),
- (XORI (i32 (SRLI GPR:$rs1, 31)), 1)>;
def : Pat<(i32 (setlt (i32 GPR:$rs1), 0)), (SRLI GPR:$rs1, 31)>; // compressible
}
let Predicates = [IsRV64] in {
-def : Pat<(i64 (seteq (i64 (and GPR:$rs1, 0x8000000000000000)), 0)),
- (XORI (i64 (SRLI GPR:$rs1, 63)), 1)>;
def : Pat<(i64 (seteq (i64 (and GPR:$rs1, 0x0000000080000000)), 0)),
(XORI (i64 (SRLIW GPR:$rs1, 31)), 1)>;
def : Pat<(i64 (setlt (i64 GPR:$rs1), 0)), (SRLI GPR:$rs1, 63)>; // compressible
diff --git a/llvm/test/CodeGen/RISCV/and-negpow2-cmp.ll b/llvm/test/CodeGen/RISCV/and-negpow2-cmp.ll
index be3de37927564..b16672d3c4a16 100644
--- a/llvm/test/CodeGen/RISCV/and-negpow2-cmp.ll
+++ b/llvm/test/CodeGen/RISCV/and-negpow2-cmp.ll
@@ -1,14 +1,66 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple=riscv64 | FileCheck %s
+; RUN: llc < %s -mtriple=riscv32 | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc < %s -mtriple=riscv64 | FileCheck %s --check-prefixes=CHECK,RV64
-define i1 @src(i64 %x) {
-; CHECK-LABEL: src:
+define i1 @test1(i64 %x) {
+; RV32-LABEL: test1:
+; RV32: # %bb.0:
+; RV32-NEXT: slli a2, a1, 2
+; RV32-NEXT: srli a0, a0, 30
+; RV32-NEXT: srai a1, a1, 30
+; RV32-NEXT: or a0, a0, a2
+; RV32-NEXT: xori a0, a0, -2
+; RV32-NEXT: not a1, a1
+; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: seqz a0, a0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: test1:
+; RV64: # %bb.0:
+; RV64-NEXT: srai a0, a0, 30
+; RV64-NEXT: addi a0, a0, 2
+; RV64-NEXT: seqz a0, a0
+; RV64-NEXT: ret
+ %a = and i64 %x, -1073741824
+ %b = icmp eq i64 %a, -2147483648
+ ret i1 %b
+}
+
+define i1 @test2(i32 signext %x) {
+; CHECK-LABEL: test2:
; CHECK: # %bb.0:
-; CHECK-NEXT: srai a0, a0, 30
-; CHECK-NEXT: addi a0, a0, 2
+; CHECK-NEXT: srli a0, a0, 30
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: ret
- %a = and i64 %x, -1073741824
- %b = icmp eq i64 %a, -2147483648
+ %a = and i32 %x, -1073741824
+ %b = icmp eq i32 %a, 0
+ ret i1 %b
+}
+
+define i1 @test3(i32 signext %x) {
+; CHECK-LABEL: test3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: srli a0, a0, 29
+; CHECK-NEXT: snez a0, a0
+; CHECK-NEXT: ret
+ %a = and i32 %x, -536870912
+ %b = icmp ne i32 %a, 0
+ ret i1 %b
+}
+
+define i1 @test4(i64 %x) {
+; RV32-LABEL: test4:
+; RV32: # %bb.0:
+; RV32-NEXT: srli a1, a1, 14
+; RV32-NEXT: seqz a0, a1
+; RV32-NEXT: ret
+;
+; RV64-LABEL: test4:
+; RV64: # %bb.0:
+; RV64-NEXT: srli a0, a0, 46
+; RV64-NEXT: seqz a0, a0
+; RV64-NEXT: ret
+ %a = and i64 %x, -70368744177664
+ %b = icmp eq i64 %a, 0
ret i1 %b
}
diff --git a/llvm/test/CodeGen/RISCV/bittest.ll b/llvm/test/CodeGen/RISCV/bittest.ll
index 95c577f833a37..40e3168d08b27 100644
--- a/llvm/test/CodeGen/RISCV/bittest.ll
+++ b/llvm/test/CodeGen/RISCV/bittest.ll
@@ -3512,7 +3512,7 @@ define i32 @bittest_31_andeq0_i64(i64 %x) {
; RV32-LABEL: bittest_31_andeq0_i64:
; RV32: # %bb.0:
; RV32-NEXT: srli a0, a0, 31
-; RV32-NEXT: xori a0, a0, 1
+; RV32-NEXT: seqz a0, a0
; RV32-NEXT: ret
;
; RV64-LABEL: bittest_31_andeq0_i64:
@@ -3530,13 +3530,13 @@ define i32 @bittest_63_andeq0_i64(i64 %x) {
; RV32-LABEL: bittest_63_andeq0_i64:
; RV32: # %bb.0:
; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: xori a0, a1, 1
+; RV32-NEXT: seqz a0, a1
; RV32-NEXT: ret
;
; RV64-LABEL: bittest_63_andeq0_i64:
; RV64: # %bb.0:
; RV64-NEXT: srli a0, a0, 63
-; RV64-NEXT: xori a0, a0, 1
+; RV64-NEXT: seqz a0, a0
; RV64-NEXT: ret
%and = and i64 %x, 9223372036854775808
%cmp = icmp eq i64 %and, 0
|
lenary
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
|
LGTM |
|
I think BSETI+SLTU would be even better, if we can hoist BSETI (or LI 1 + SLLI) before the loop or have this single-bit constant already available for another operation? |
This is a more general form of the recently added isel pattern
(seteq (i64 (and GPR:$rs1, 0x8000000000000000)), 0)
-> (XORI (i64 (SRLI GPR:$rs1, 63)), 1)
We can use a shift right for any AND mask that is a negated power
of 2. But for every other constant we need to use seqz instead of
xori. I don't think there is a benefit to xori over seqz as neither
are compressible.
We already do this transform from target independent code when the setcc
constant is a non-zero subset of the AND mask that is not a legal icmp
immediate.
I don't believe any of these patterns comparing MSBs to 0 are
canonical according to InstCombine. The canonical form is (X < 4096).
I'm curious if these appear during SelectionDAG and if so, how.
My goal here was just to remove the special case isel patterns.