-
Notifications
You must be signed in to change notification settings - Fork 12k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Don't emit vxrm writes for vnclip(u).wi with shift of 0. #76578
Conversation
If there's no shift being performed, the rounding mode doesn't matter. We could do the same for vssra and vssrl, but they are no-ops with a shift of 0 so would be better off being removed earlier.
CC @sun-jacobi |
@llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) ChangesIf there's no shift being performed, the rounding mode doesn't matter. We could do the same for vssra and vssrl, but they are no-ops with a shift of 0 so would be better off being removed earlier. Full diff: https://github.com/llvm/llvm-project/pull/76578.diff 3 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp b/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp
index de2227f8219238..e487cc8b2e20c9 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp
@@ -198,13 +198,23 @@ char RISCVInsertWriteVXRM::ID = 0;
INITIALIZE_PASS(RISCVInsertWriteVXRM, DEBUG_TYPE, RISCV_INSERT_WRITE_VXRM_NAME,
false, false)
+static bool ignoresVXRM(const MachineInstr &MI) {
+ switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
+ default:
+ return false;
+ case RISCV::VNCLIP_WI:
+ case RISCV::VNCLIPU_WI:
+ return MI.getOperand(3).getImm() == 0;
+ }
+}
+
bool RISCVInsertWriteVXRM::computeVXRMChanges(const MachineBasicBlock &MBB) {
BlockData &BBInfo = BlockInfo[MBB.getNumber()];
bool NeedVXRMWrite = false;
for (const MachineInstr &MI : MBB) {
int VXRMIdx = RISCVII::getVXRMOpNum(MI.getDesc());
- if (VXRMIdx >= 0) {
+ if (VXRMIdx >= 0 && !ignoresVXRM(MI)) {
unsigned NewVXRMImm = MI.getOperand(VXRMIdx).getImm();
if (!BBInfo.VXRMUse.isValid())
@@ -356,7 +366,7 @@ void RISCVInsertWriteVXRM::emitWriteVXRM(MachineBasicBlock &MBB) {
for (MachineInstr &MI : MBB) {
int VXRMIdx = RISCVII::getVXRMOpNum(MI.getDesc());
- if (VXRMIdx >= 0) {
+ if (VXRMIdx >= 0 && !ignoresVXRM(MI)) {
unsigned NewVXRMImm = MI.getOperand(VXRMIdx).getImm();
if (PendingInsert || !Info.isStatic() ||
diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
index e1ebf2afda657e..f1a82b9e427e25 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
@@ -40,7 +40,6 @@ define <2 x i32> @stest_f64i32(<2 x double> %x) {
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8
; CHECK-V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-V-NEXT: csrwi vxrm, 0
; CHECK-V-NEXT: vnclip.wi v8, v8, 0
; CHECK-V-NEXT: ret
entry:
@@ -77,7 +76,6 @@ define <2 x i32> @utest_f64i32(<2 x double> %x) {
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-V-NEXT: vfcvt.rtz.xu.f.v v8, v8
; CHECK-V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-V-NEXT: csrwi vxrm, 0
; CHECK-V-NEXT: vnclipu.wi v8, v8, 0
; CHECK-V-NEXT: ret
entry:
@@ -193,7 +191,6 @@ define <4 x i32> @stest_f32i32(<4 x float> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vfwcvt.rtz.x.f.v v10, v8
-; CHECK-V-NEXT: csrwi vxrm, 0
; CHECK-V-NEXT: vnclip.wi v8, v10, 0
; CHECK-V-NEXT: ret
entry:
@@ -247,7 +244,6 @@ define <4 x i32> @utest_f32i32(<4 x float> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vfwcvt.rtz.xu.f.v v10, v8
-; CHECK-V-NEXT: csrwi vxrm, 0
; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
; CHECK-V-NEXT: ret
entry:
@@ -497,7 +493,6 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 3
; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-V-NEXT: csrwi vxrm, 0
; CHECK-V-NEXT: vnclip.wi v8, v10, 0
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 2
@@ -666,7 +661,6 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 3
; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-V-NEXT: csrwi vxrm, 0
; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 2
@@ -907,7 +901,6 @@ define <2 x i16> @stest_f64i16(<2 x double> %x) {
; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-V-NEXT: vfncvt.rtz.x.f.w v9, v8
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-V-NEXT: csrwi vxrm, 0
; CHECK-V-NEXT: vnclip.wi v8, v9, 0
; CHECK-V-NEXT: ret
entry:
@@ -944,7 +937,6 @@ define <2 x i16> @utest_f64i16(<2 x double> %x) {
; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-V-NEXT: vfncvt.rtz.xu.f.w v9, v8
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-V-NEXT: csrwi vxrm, 0
; CHECK-V-NEXT: vnclipu.wi v8, v9, 0
; CHECK-V-NEXT: ret
entry:
@@ -1063,7 +1055,6 @@ define <4 x i16> @stest_f32i16(<4 x float> %x) {
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-V-NEXT: csrwi vxrm, 0
; CHECK-V-NEXT: vnclip.wi v8, v8, 0
; CHECK-V-NEXT: ret
entry:
@@ -1118,7 +1109,6 @@ define <4 x i16> @utest_f32i16(<4 x float> %x) {
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vfcvt.rtz.xu.f.v v8, v8
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-V-NEXT: csrwi vxrm, 0
; CHECK-V-NEXT: vnclipu.wi v8, v8, 0
; CHECK-V-NEXT: ret
entry:
@@ -1495,7 +1485,6 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 7
; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-V-NEXT: csrwi vxrm, 0
; CHECK-V-NEXT: vnclip.wi v8, v10, 0
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
@@ -1774,7 +1763,6 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 7
; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-V-NEXT: csrwi vxrm, 0
; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
@@ -3349,7 +3337,6 @@ define <2 x i32> @stest_f64i32_mm(<2 x double> %x) {
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8
; CHECK-V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-V-NEXT: csrwi vxrm, 0
; CHECK-V-NEXT: vnclip.wi v8, v8, 0
; CHECK-V-NEXT: ret
entry:
@@ -3384,7 +3371,6 @@ define <2 x i32> @utest_f64i32_mm(<2 x double> %x) {
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-V-NEXT: vfcvt.rtz.xu.f.v v8, v8
; CHECK-V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-V-NEXT: csrwi vxrm, 0
; CHECK-V-NEXT: vnclipu.wi v8, v8, 0
; CHECK-V-NEXT: ret
entry:
@@ -3497,7 +3483,6 @@ define <4 x i32> @stest_f32i32_mm(<4 x float> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vfwcvt.rtz.x.f.v v10, v8
-; CHECK-V-NEXT: csrwi vxrm, 0
; CHECK-V-NEXT: vnclip.wi v8, v10, 0
; CHECK-V-NEXT: ret
entry:
@@ -3549,7 +3534,6 @@ define <4 x i32> @utest_f32i32_mm(<4 x float> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vfwcvt.rtz.xu.f.v v10, v8
-; CHECK-V-NEXT: csrwi vxrm, 0
; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
; CHECK-V-NEXT: ret
entry:
@@ -3796,7 +3780,6 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 3
; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-V-NEXT: csrwi vxrm, 0
; CHECK-V-NEXT: vnclip.wi v8, v10, 0
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 2
@@ -3963,7 +3946,6 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 3
; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-V-NEXT: csrwi vxrm, 0
; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 2
@@ -4201,7 +4183,6 @@ define <2 x i16> @stest_f64i16_mm(<2 x double> %x) {
; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-V-NEXT: vfncvt.rtz.x.f.w v9, v8
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-V-NEXT: csrwi vxrm, 0
; CHECK-V-NEXT: vnclip.wi v8, v9, 0
; CHECK-V-NEXT: ret
entry:
@@ -4236,7 +4217,6 @@ define <2 x i16> @utest_f64i16_mm(<2 x double> %x) {
; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-V-NEXT: vfncvt.rtz.xu.f.w v9, v8
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-V-NEXT: csrwi vxrm, 0
; CHECK-V-NEXT: vnclipu.wi v8, v9, 0
; CHECK-V-NEXT: ret
entry:
@@ -4352,7 +4332,6 @@ define <4 x i16> @stest_f32i16_mm(<4 x float> %x) {
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-V-NEXT: csrwi vxrm, 0
; CHECK-V-NEXT: vnclip.wi v8, v8, 0
; CHECK-V-NEXT: ret
entry:
@@ -4405,7 +4384,6 @@ define <4 x i16> @utest_f32i16_mm(<4 x float> %x) {
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vfcvt.rtz.xu.f.v v8, v8
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-V-NEXT: csrwi vxrm, 0
; CHECK-V-NEXT: vnclipu.wi v8, v8, 0
; CHECK-V-NEXT: ret
entry:
@@ -4779,7 +4757,6 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 7
; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-V-NEXT: csrwi vxrm, 0
; CHECK-V-NEXT: vnclip.wi v8, v10, 0
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
@@ -5054,7 +5031,6 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 7
; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-V-NEXT: csrwi vxrm, 0
; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
diff --git a/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip.ll b/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip.ll
index e12c9e515a9fd4..42577408f71b00 100644
--- a/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip.ll
@@ -20,7 +20,6 @@ define void @trunc_sat_i8i16_maxmin(ptr %x, ptr %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: vnclip.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
@@ -37,7 +36,6 @@ define void @trunc_sat_i8i16_minmax(ptr %x, ptr %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: vnclip.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
@@ -75,7 +73,6 @@ define void @trunc_sat_u8u16_min(ptr %x, ptr %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
@@ -109,7 +106,6 @@ define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
@@ -126,7 +122,6 @@ define void @trunc_sat_u8u16_minmax(ptr %x, ptr %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
@@ -166,7 +161,6 @@ define void @trunc_sat_i16i32_maxmin(ptr %x, ptr %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: vnclip.wi v8, v8, 0
; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: ret
@@ -183,7 +177,6 @@ define void @trunc_sat_i16i32_minmax(ptr %x, ptr %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: vnclip.wi v8, v8, 0
; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: ret
@@ -219,7 +212,6 @@ define void @trunc_sat_u16u32_min(ptr %x, ptr %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: ret
@@ -235,7 +227,6 @@ define void @trunc_sat_u16u32_minmax(ptr %x, ptr %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: ret
@@ -252,7 +243,6 @@ define void @trunc_sat_u16u32_maxmin(ptr %x, ptr %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: ret
@@ -293,7 +283,6 @@ define void @trunc_sat_i32i64_maxmin(ptr %x, ptr %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: vnclip.wi v10, v8, 0
; CHECK-NEXT: vse32.v v10, (a1)
; CHECK-NEXT: ret
@@ -310,7 +299,6 @@ define void @trunc_sat_i32i64_minmax(ptr %x, ptr %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: vnclip.wi v10, v8, 0
; CHECK-NEXT: vse32.v v10, (a1)
; CHECK-NEXT: ret
@@ -347,7 +335,6 @@ define void @trunc_sat_u32u64_min(ptr %x, ptr %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vse32.v v10, (a1)
; CHECK-NEXT: ret
@@ -364,7 +351,6 @@ define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vse32.v v10, (a1)
; CHECK-NEXT: ret
@@ -381,7 +367,6 @@ define void @trunc_sat_u32u64_minmax(ptr %x, ptr %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vse32.v v10, (a1)
; CHECK-NEXT: ret
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
If there's no shift being performed, the rounding mode doesn't matter.
We could do the same for vssra and vssrl, but they are no-ops with a shift of 0 so would be better off being removed earlier.