From 998e529f8d9adda6db834a5d796b0fe76a7809d1 Mon Sep 17 00:00:00 2001 From: Robin Eklind Date: Thu, 26 Mar 2020 19:15:58 +0100 Subject: [PATCH] llvm: update test cases to LLVM 10.0 Steps taken: 1. Update VER in testdata/llvm/Makefile from 9.0.0 to 10.0.0 2. Run `make` from testdata/llvm 3. Run `./skip.sh` from testdata/llvm 4. Run `git add .` from testdata/llvm This is just the first step to update the test cases to LLVM 10.0. A follow-up commit will update `*.ll.golden` test cases. And another follow-up commit will update the test cases of Coreutils and SQLite, which requires Clang 10.0 being installed on the system. Updates llir/llvm#132. --- llvm/Makefile | 4 +- .../Analysis/BasicAA/assume-index-positive.ll | 116 + llvm/test/Analysis/BasicAA/cs-cs.ll | 40 +- llvm/test/Analysis/BasicAA/dereferenceable.ll | 149 + llvm/test/Analysis/BasicAA/gep-alias.ll | 2 +- llvm/test/Analysis/BasicAA/intrinsics.ll | 4 +- llvm/test/Analysis/BasicAA/ptrmask.ll | 29 + llvm/test/Analysis/BasicAA/store-promote.ll | 10 +- .../Analysis/BranchProbabilityInfo/basic.ll | 18 + .../Analysis/BranchProbabilityInfo/fcmp.ll | 41 + .../BranchProbabilityInfo/noreturn.ll | 26 + .../Analysis/BranchProbabilityInfo/pr22718.ll | 6 +- .../ConstantFolding/binop-identity-undef.ll | 50 + .../test/Analysis/ConstantFolding/copysign.ll | 53 + .../Analysis/ConstantFolding/gep-alias.ll | 17 + .../ConstantFolding/gep-zeroinit-vector.ll | 2 +- .../Analysis/ConstantFolding/insertelement.ll | 19 + llvm/test/Analysis/ConstantFolding/math-1.ll | 195 + llvm/test/Analysis/ConstantFolding/math-2.ll | 48 + llvm/test/Analysis/ConstantFolding/rint.ll | 109 + llvm/test/Analysis/ConstantFolding/round.ll | 92 + .../Analysis/ConstantFolding/shufflevector.ll | 11 + llvm/test/Analysis/ConstantFolding/trunc.ll | 105 + .../Analysis/CostModel/AArch64/aggregates.ll | 142 + .../test/Analysis/CostModel/AMDGPU/add-sub.ll | 73 +- .../CostModel/AMDGPU/addrspacecast.ll | 1 + .../test/Analysis/CostModel/AMDGPU/bit-ops.ll | 57 +- .../CostModel/AMDGPU/extractelement.ll | 11 +- llvm/test/Analysis/CostModel/AMDGPU/fabs.ll | 21 +- llvm/test/Analysis/CostModel/AMDGPU/fadd.ll | 30 +- llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll | 205 +- llvm/test/Analysis/CostModel/AMDGPU/fma.ll | 120 + llvm/test/Analysis/CostModel/AMDGPU/fmul.ll | 30 +- llvm/test/Analysis/CostModel/AMDGPU/fsub.ll | 22 +- .../CostModel/AMDGPU/insertelement.ll | 10 +- llvm/test/Analysis/CostModel/AMDGPU/mul.ll | 74 +- llvm/test/Analysis/CostModel/AMDGPU/shifts.ll | 63 +- .../CostModel/AMDGPU/shufflevector.ll | 27 +- llvm/test/Analysis/CostModel/ARM/arith.ll | 733 + llvm/test/Analysis/CostModel/ARM/cast.ll | 924 +- llvm/test/Analysis/CostModel/ARM/divrem.ll | 1284 +- llvm/test/Analysis/CostModel/ARM/fparith.ll | 172 + llvm/test/Analysis/CostModel/ARM/freeshift.ll | 96 + .../test/Analysis/CostModel/ARM/load_store.ll | 278 + llvm/test/Analysis/CostModel/ARM/select.ll | 101 +- llvm/test/Analysis/CostModel/ARM/shuffle.ll | 137 +- .../CostModel/PowerPC/future-cost-model.ll | 16 + .../CostModel/PowerPC/insert_extract.ll | 48 +- .../Analysis/CostModel/SystemZ/fp-cast.ll | 36 +- .../CostModel/SystemZ/intrinsic-cost-crash.ll | 2 +- .../Analysis/CostModel/SystemZ/intrinsics.ll | 22 +- .../CostModel/SystemZ/logic-miscext3.ll | 24 +- .../test/Analysis/CostModel/X86/aggregates.ll | 142 + .../CostModel/X86/alternate-shuffle-cost.ll | 36 +- llvm/test/Analysis/CostModel/X86/arith-fix.ll | 48 +- .../Analysis/CostModel/X86/arith-overflow.ll | 84 +- .../test/Analysis/CostModel/X86/arith-ssat.ll | 12 +- .../test/Analysis/CostModel/X86/arith-usat.ll | 12 +- llvm/test/Analysis/CostModel/X86/arith.ll | 32 +- llvm/test/Analysis/CostModel/X86/cast.ll | 64 +- llvm/test/Analysis/CostModel/X86/ctlz.ll | 95 +- llvm/test/Analysis/CostModel/X86/ctpop.ll | 8 +- llvm/test/Analysis/CostModel/X86/extend.ll | 226 +- llvm/test/Analysis/CostModel/X86/fptosi.ll | 129 +- llvm/test/Analysis/CostModel/X86/fptoui.ll | 134 +- llvm/test/Analysis/CostModel/X86/fshl.ll | 24 +- llvm/test/Analysis/CostModel/X86/fshr.ll | 24 +- llvm/test/Analysis/CostModel/X86/icmp.ll | 232 +- .../CostModel/X86/masked-intrinsic-cost.ll | 80 +- .../CostModel/X86/min-legal-vector-width.ll | 18 +- .../test/Analysis/CostModel/X86/reduce-add.ll | 288 +- .../test/Analysis/CostModel/X86/reduce-and.ll | 52 +- .../test/Analysis/CostModel/X86/reduce-mul.ll | 112 +- llvm/test/Analysis/CostModel/X86/reduce-or.ll | 52 +- .../Analysis/CostModel/X86/reduce-smax.ll | 72 +- .../Analysis/CostModel/X86/reduce-smin.ll | 72 +- .../Analysis/CostModel/X86/reduce-umax.ll | 76 +- .../Analysis/CostModel/X86/reduce-umin.ll | 76 +- .../test/Analysis/CostModel/X86/reduce-xor.ll | 10 +- llvm/test/Analysis/CostModel/X86/reduction.ll | 455 +- .../X86/shuffle-extract_subvector.ll | 1753 ++- .../CostModel/X86/shuffle-transpose.ll | 8 +- llvm/test/Analysis/CostModel/X86/sitofp.ll | 26 +- .../Analysis/CostModel/X86/slm-arith-costs.ll | 24 +- .../Analysis/CostModel/X86/testshiftashr.ll | 40 +- .../Analysis/CostModel/X86/testshiftlshr.ll | 38 +- .../Analysis/CostModel/X86/testshiftshl.ll | 40 +- llvm/test/Analysis/CostModel/X86/trunc.ll | 200 +- llvm/test/Analysis/CostModel/X86/uitofp.ll | 38 +- .../Analysis/CostModel/X86/vector-extract.ll | 728 +- .../Analysis/CostModel/X86/vector-insert.ll | 74 + llvm/test/Analysis/DDG/basic-a.ll | 202 + llvm/test/Analysis/DDG/basic-b.ll | 233 + llvm/test/Analysis/DDG/basic-loopnest.ll | 456 + llvm/test/Analysis/DDG/root-node.ll | 49 + .../constant_functions_multi_dim.ll | 4 +- llvm/test/Analysis/DependenceAnalysis/Dump.ll | 50 + .../DependenceAnalysis/MIVCheckConst.ll | 2 +- .../NonCanonicalizedSubscript.ll | 2 +- .../SimpleSIVNoValidityCheck.ll | 1 + .../AMDGPU/b42473-r1-crash.ll | 111 + .../intrinsic_addressnottaken1.ll | 44 + .../intrinsic_addressnottaken2.ll | 50 + .../GlobalsModRef/intrinsic_addresstaken.ll | 52 + .../lvi-after-jumpthreading.ll | 10 +- .../LoopCacheAnalysis/PowerPC/compute-cost.ll | 35 + .../LoopCacheAnalysis/PowerPC/loads-store.ll | 88 + .../LoopCacheAnalysis/PowerPC/matmul.ll | 81 + .../LoopCacheAnalysis/PowerPC/matvecmul.ll | 185 + .../LoopCacheAnalysis/PowerPC/single-store.ll | 77 + .../LoopCacheAnalysis/PowerPC/stencil.ll | 98 + .../invariant.group-bug.ll | 4 +- llvm/test/Analysis/MemorySSA/debugvalue.ll | 44 + llvm/test/Analysis/MemorySSA/debugvalue2.ll | 54 + .../MemorySSA/loop-rotate-disablebasicaa.ll | 26 + .../MemorySSA/loop-rotate-simplified-clone.ll | 29 + llvm/test/Analysis/MemorySSA/loop-unswitch.ll | 26 + llvm/test/Analysis/MemorySSA/pr28880.ll | 2 +- llvm/test/Analysis/MemorySSA/pr39197.ll | 2 +- llvm/test/Analysis/MemorySSA/pr40038.ll | 2 +- llvm/test/Analysis/MemorySSA/pr40754.ll | 57 +- llvm/test/Analysis/MemorySSA/pr41640.ll | 5 +- llvm/test/Analysis/MemorySSA/pr42940.ll | 189 + llvm/test/Analysis/MemorySSA/pr43044.ll | 52 + llvm/test/Analysis/MemorySSA/pr43317.ll | 35 + llvm/test/Analysis/MemorySSA/pr43320.ll | 33 + llvm/test/Analysis/MemorySSA/pr43426.ll | 40 + llvm/test/Analysis/MemorySSA/pr43427.ll | 42 + llvm/test/Analysis/MemorySSA/pr43438.ll | 100 + llvm/test/Analysis/MemorySSA/pr43493.ll | 27 + llvm/test/Analysis/MemorySSA/pr43540.ll | 34 + llvm/test/Analysis/MemorySSA/pr43541.ll | 50 + llvm/test/Analysis/MemorySSA/pr43569.ll | 49 + llvm/test/Analysis/MemorySSA/pr43641.ll | 22 + llvm/test/Analysis/MemorySSA/pr44027.ll | 27 + llvm/test/Analysis/MemorySSA/pr44029.ll | 63 + llvm/test/Analysis/MemorySSA/renamephis.ll | 51 + llvm/test/Analysis/MemorySSA/unreachable.ll | 31 + .../MustExecute/must_be_executed_context.ll | 399 + .../Analysis/ScalarEvolution/limit-depth.ll | 2 +- .../ScalarEvolution/max-expr-cache.ll | 4 +- .../max-trip-count-address-space.ll | 2 +- .../ScalarEvolution/max-trip-count.ll | 2 +- .../multiple-max-iterations.ll | 2 + llvm/test/Analysis/ScalarEvolution/nsw.ll | 2 +- llvm/test/Analysis/ScalarEvolution/pr22674.ll | 2 +- .../Analysis/ScalarEvolution/range_nw_flag.ll | 121 + .../ScalarEvolution/scev-canonical-mode.ll | 2 +- .../test/Analysis/ScalarEvolution/sext-mul.ll | 4 +- .../ScalarEvolution/trip-count-andor.ll | 365 + .../Analysis/ScalarEvolution/trip-count.ll | 70 +- .../Analysis/ScalarEvolution/trip-count10.ll | 78 +- .../Analysis/ScalarEvolution/trip-count12.ll | 2 +- .../Analysis/ScalarEvolution/trip-count15.ll | 121 + .../Analysis/ScalarEvolution/trip-count2.ll | 50 +- .../Analysis/ScalarEvolution/trip-count3.ll | 14 +- .../Analysis/ScalarEvolution/trip-count4.ll | 37 +- .../Analysis/ScalarEvolution/trip-count6.ll | 17 +- .../Analysis/ScalarEvolution/trip-count7.ll | 133 +- .../Analysis/ScalarEvolution/trip-count8.ll | 41 +- .../Analysis/ScalarEvolution/trip-count9.ll | 263 +- .../ScalarEvolution/umin-umax-folds.ll | 317 + .../ScalarEvolution/widenable-condition.ll | 45 + .../TypeBasedAliasAnalysis/PR17620.ll | 4 +- .../TypeBasedAliasAnalysis/functionattrs.ll | 2 +- .../TypeBasedAliasAnalysis/intrinsics.ll | 4 +- .../TypeBasedAliasAnalysis/memcpyopt.ll | 5 +- .../TypeBasedAliasAnalysis/tbaa-path.ll | 2 +- .../ValueTracking/known-nonnull-at.ll | 122 +- .../ValueTracking/memory-dereferenceable.ll | 8 + .../ValueTracking/non-negative-phi-bits.ll | 2 +- .../Assembler/2003-11-11-ImplicitRename.ll | 7 + llvm/test/Assembler/asm-path-writer.ll | 4 +- .../test/Assembler/auto_upgrade_intrinsics.ll | 15 + llvm/test/Assembler/block-labels.ll | 4 +- llvm/test/Assembler/byval-type-attr.ll | 20 +- .../call-nonzero-program-addrspace-2.ll.x | 4 +- ...talayout-invalid-function-ptr-alignment.ll | 5 + ...alayout-invalid-stack-natural-alignment.ll | 5 + llvm/test/Assembler/debug-info.ll | 9 +- llvm/test/Assembler/dimodule.ll | 4 +- .../export-symbol-anonymous-class.ll | 38 + .../Assembler/incorrect-tdep-attrs-parsing.ll | 2 +- llvm/test/Assembler/invalid-arg-num-1.ll | 6 + llvm/test/Assembler/invalid-arg-num-2.ll | 6 + llvm/test/Assembler/invalid-arg-num-3.ll | 6 + .../invoke-nonzero-program-addrspace.ll.x | 5 +- llvm/test/Assembler/multi-mod-disassemble.ll | 15 + .../Assembler/multi-summary-disassemble.ll | 18 + .../Assembler/source-filename-backslash.ll | 5 +- llvm/test/Assembler/thinlto-summary.ll | 10 +- llvm/test/Assembler/thinlto-vtable-summary.ll | 2 +- llvm/test/Bindings/llvm-c/debug_info.ll | 92 +- llvm/test/Bindings/llvm-c/echo.ll | 18 + llvm/test/Bindings/llvm-c/freeze.ll | 23 + llvm/test/Bitcode/DILocation-implicit-code.ll | 4 +- llvm/test/Bitcode/aarch64-addp-upgrade.ll | 18 + llvm/test/Bitcode/attributes-3.3.ll | 36 +- llvm/test/Bitcode/attributes.ll | 72 +- llvm/test/Bitcode/avr-calling-conventions.ll | 12 +- llvm/test/Bitcode/calling-conventions.3.2.ll | 9 + llvm/test/Bitcode/compatibility-3.6.ll.x | 15 +- llvm/test/Bitcode/compatibility-3.7.ll.x | 15 +- llvm/test/Bitcode/compatibility-3.8.ll | 15 +- llvm/test/Bitcode/compatibility-3.9.ll | 15 +- llvm/test/Bitcode/compatibility-4.0.ll | 15 +- llvm/test/Bitcode/compatibility-5.0.ll | 19 +- llvm/test/Bitcode/compatibility-6.0.ll | 19 +- llvm/test/Bitcode/compatibility.ll | 167 +- llvm/test/Bitcode/drop-debug-info.3.5.ll.x | 2 +- .../test/Bitcode/invalid-functionptr-align.ll | 5 + .../Bitcode/invalid-type-for-null-constant.ll | 6 + llvm/test/Bitcode/multi-module.ll | 5 - .../Bitcode/operand-bundles-bc-analyzer.ll | 1 + llvm/test/Bitcode/summary_version.ll | 2 +- llvm/test/Bitcode/thinlto-alias3.ll | 11 + llvm/test/Bitcode/thinlto-deadstrip-flag.ll | 4 +- .../Bitcode/thinlto-synthetic-count-flag.ll | 4 +- .../upgrade-arc-runtime-calls-bitcast.ll | 21 + .../test/Bitcode/upgrade-arc-runtime-calls.ll | 125 + llvm/test/Bitcode/upgrade-clang-arc-use.ll | 4 +- llvm/test/Bitcode/upgrade-datalayout.ll | 9 + llvm/test/Bitcode/upgrade-datalayout2.ll | 10 + llvm/test/Bitcode/upgrade-datalayout3.ll | 8 + llvm/test/Bitcode/upgrade-frame-pointer.ll | 33 + .../test/Bitcode/upgrade-memory-intrinsics.ll | 2 +- llvm/test/Bitcode/upgrade-tbaa.ll | 2 +- llvm/test/BugPoint/attr-crash.ll | 16 + llvm/test/BugPoint/func-attrs-keyval.ll | 2 +- llvm/test/BugPoint/func-attrs.ll | 12 +- llvm/test/BugPoint/metadata.ll | 16 +- .../test/BugPoint/retain-crashing-metadata.ll | 22 + .../AArch64/GlobalISel/arm64-callingconv.ll | 37 +- .../AArch64/GlobalISel/arm64-fallback.ll | 68 +- .../GlobalISel/arm64-irtranslator-gep.ll | 51 + .../GlobalISel/arm64-irtranslator-switch.ll | 46 +- .../AArch64/GlobalISel/arm64-irtranslator.ll | 245 +- .../GlobalISel/call-lowering-i128-on-stack.ll | 12 + .../AArch64/GlobalISel/call-translator-cse.ll | 6 +- .../AArch64/GlobalISel/call-translator-ios.ll | 10 +- .../GlobalISel/call-translator-musttail.ll | 13 + .../call-translator-tail-call-weak.ll | 15 + .../GlobalISel/call-translator-tail-call.ll | 269 + .../call-translator-variadic-musttail.ll | 223 + .../AArch64/GlobalISel/call-translator.ll | 27 +- .../combiner-load-store-indexing.ll | 182 + .../CodeGen/AArch64/GlobalISel/const-0.ll | 25 + .../AArch64/GlobalISel/dynamic-alloca.ll | 75 +- .../GlobalISel/gisel-commandline-option.ll | 11 +- .../gisel-fail-intermediate-legalizer.ll | 6 +- .../GlobalISel/integration-shuffle-vector.ll | 25 + .../GlobalISel/irtranslator-exceptions.ll | 4 +- .../GlobalISel/irtranslator-extends.ll | 30 + .../irtranslator-split-vector-arg.ll | 22 + .../AArch64/GlobalISel/irtranslator-tbaa.ll | 19 + .../AArch64/GlobalISel/legalize-sext-128.ll | 9 + .../AArch64/GlobalISel/memcpy_chk_no_tail.ll | 30 + .../AArch64/GlobalISel/no-neon-no-fp.ll | 13 + .../CodeGen/AArch64/GlobalISel/swifterror.ll | 92 +- .../CodeGen/AArch64/GlobalISel/swiftself.ll | 62 + .../GlobalISel/tail-call-no-save-fp-lr.ll | 23 + .../AArch64/GlobalISel/translate-gep.ll | 16 +- llvm/test/CodeGen/AArch64/O0-pipeline.ll | 16 +- llvm/test/CodeGen/AArch64/O3-pipeline.ll | 27 +- ...aarch64-2014-08-11-MachineCombinerCrash.ll | 2 +- .../AArch64/aarch64-a57-fp-load-balancing.ll | 4 +- .../AArch64/aarch64-dynamic-stack-layout.ll | 124 +- .../AArch64/aarch64-fix-cortex-a53-835769.ll | 16 +- llvm/test/CodeGen/AArch64/aarch64-smull.ll | 60 + .../CodeGen/AArch64/aarch64-stp-cluster.ll | 20 +- .../AArch64/aarch64-sve-asm-negative.ll | 12 + llvm/test/CodeGen/AArch64/aarch64-sve-asm.ll | 70 + .../AArch64/addsub-constant-folding.ll | 72 +- llvm/test/CodeGen/AArch64/addsub-shifted.ll | 41 +- llvm/test/CodeGen/AArch64/addsub_ext.ll | 53 +- llvm/test/CodeGen/AArch64/align-down.ll | 149 + llvm/test/CodeGen/AArch64/alloca.ll | 13 +- llvm/test/CodeGen/AArch64/arm64-aapcs.ll | 2 +- .../test/CodeGen/AArch64/arm64-abi-varargs.ll | 147 +- llvm/test/CodeGen/AArch64/arm64-abi_align.ll | 16 +- .../arm64-alloca-frame-pointer-offset.ll | 4 +- .../CodeGen/AArch64/arm64-blockaddress.ll | 8 +- .../CodeGen/AArch64/arm64-call-tailcalls.ll | 1 + .../AArch64/arm64-code-model-large-darwin.ll | 16 + .../arm64-collect-loh-garbage-crash.ll | 1 + .../CodeGen/AArch64/arm64-collect-loh-str.ll | 1 + .../test/CodeGen/AArch64/arm64-collect-loh.ll | 115 +- .../CodeGen/AArch64/arm64-detect-vec-redux.ll | 2 +- .../AArch64/arm64-fast-isel-addr-offset.ll | 2 +- .../arm64-fastisel-gep-promote-before-add.ll | 2 +- .../arm64-fma-combine-with-fpfusion.ll | 2 +- llvm/test/CodeGen/AArch64/arm64-fmadd.ll | 18 + llvm/test/CodeGen/AArch64/arm64-fp.ll | 47 +- .../CodeGen/AArch64/arm64-indexed-memory.ll | 1 + .../AArch64/arm64-indexed-vector-ldst-2.ll | 2 +- llvm/test/CodeGen/AArch64/arm64-inline-asm.ll | 47 +- .../test/CodeGen/AArch64/arm64-large-frame.ll | 2 +- .../test/CodeGen/AArch64/arm64-ldp-cluster.ll | 36 +- llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll | 73 + .../CodeGen/AArch64/arm64-memset-inline.ll | 4 +- .../AArch64/arm64-memset-to-bzero-pgso.ll | 128 + .../AArch64/arm64-misched-basic-A53.ll | 2 +- .../AArch64/arm64-misched-basic-A57.ll | 2 +- .../test/CodeGen/AArch64/arm64-neon-2velem.ll | 1142 +- .../arm64-neon-vector-shuffle-extract.ll | 26 + .../CodeGen/AArch64/arm64-preserve-most.ll | 38 + llvm/test/CodeGen/AArch64/arm64-rev.ll | 243 +- .../CodeGen/AArch64/arm64-shrink-wrapping.ll | 4 +- llvm/test/CodeGen/AArch64/arm64-st1.ll | 4 +- llvm/test/CodeGen/AArch64/arm64-stacksave.ll | 4 +- .../CodeGen/AArch64/arm64-storebytesmerge.ll | 4 +- llvm/test/CodeGen/AArch64/arm64-tls-darwin.ll | 1 + .../CodeGen/AArch64/arm64-tls-initial-exec.ll | 51 + .../CodeGen/AArch64/arm64-tls-local-exec.ll | 106 + .../AArch64/arm64-triv-disjoint-mem-access.ll | 2 +- .../CodeGen/AArch64/arm64-variadic-aapcs.ll | 9 +- llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll | 145 + llvm/test/CodeGen/AArch64/arm64-vmul.ll | 1440 +- llvm/test/CodeGen/AArch64/arm64-vshift.ll | 209 +- .../AArch64/arm64-zero-cycle-zeroing.ll | 2 +- llvm/test/CodeGen/AArch64/arm64_32-addrs.ll | 44 + llvm/test/CodeGen/AArch64/arm64_32-atomics.ll | 261 + .../test/CodeGen/AArch64/arm64_32-fastisel.ll | 49 + .../AArch64/arm64_32-frame-pointers.ll | 26 + .../test/CodeGen/AArch64/arm64_32-gep-sink.ll | 61 + llvm/test/CodeGen/AArch64/arm64_32-memcpy.ll | 66 + llvm/test/CodeGen/AArch64/arm64_32-neon.ll | 198 + llvm/test/CodeGen/AArch64/arm64_32-null.ll | 28 + .../AArch64/arm64_32-pointer-extend.ll | 49 + .../AArch64/arm64_32-stack-pointers.ll | 13 + llvm/test/CodeGen/AArch64/arm64_32-tls.ll | 22 + llvm/test/CodeGen/AArch64/arm64_32-va.ll | 56 + llvm/test/CodeGen/AArch64/arm64_32.ll | 730 + llvm/test/CodeGen/AArch64/basic-pic.ll | 10 +- llvm/test/CodeGen/AArch64/bitfield-insert.ll | 12 +- ...ranch-target-enforcement-indirect-calls.ll | 3 + .../CodeGen/AArch64/bti-branch-relaxation.ll | 64 + .../CodeGen/AArch64/callbr-asm-obj-file.ll | 30 +- llvm/test/CodeGen/AArch64/cfguard-checks.ll | 147 + .../CodeGen/AArch64/cfguard-module-flag.ll | 25 + llvm/test/CodeGen/AArch64/cgp-usubo.ll | 12 +- llvm/test/CodeGen/AArch64/cls.ll | 20 + llvm/test/CodeGen/AArch64/cmpxchg-O0.ll | 6 +- .../CodeGen/AArch64/code-model-large-abs.ll | 11 + llvm/test/CodeGen/AArch64/consthoist-gep.ll | 2 +- llvm/test/CodeGen/AArch64/cpus.ll | 6 +- llvm/test/CodeGen/AArch64/csr-split.ll | 224 + llvm/test/CodeGen/AArch64/cxx-tlscc.ll | 2 +- .../CodeGen/AArch64/dag-combine-invaraints.ll | 4 +- .../CodeGen/AArch64/dbg-declare-tag-offset.ll | 19 +- .../CodeGen/AArch64/dbg-value-tag-offset.ll | 68 + .../div-rem-pair-recomposition-signed.ll | 319 + .../div-rem-pair-recomposition-unsigned.ll | 319 + llvm/test/CodeGen/AArch64/dllimport.ll | 4 +- llvm/test/CodeGen/AArch64/eon.ll | 3 + llvm/test/CodeGen/AArch64/expand-select.ll | 40 +- llvm/test/CodeGen/AArch64/f16-instructions.ll | 8 +- llvm/test/CodeGen/AArch64/fadd-combines.ll | 51 +- .../AArch64/fast-isel-branch-uncond-debug.ll | 44 + .../CodeGen/AArch64/fast-isel-sp-adjust.ll | 1 + llvm/test/CodeGen/AArch64/fastcc-reserved.ll | 9 +- llvm/test/CodeGen/AArch64/fastcc.ll | 28 +- llvm/test/CodeGen/AArch64/fp-intrinsics.ll | 1568 +++ llvm/test/CodeGen/AArch64/fp16-fmla.ll | 220 + .../CodeGen/AArch64/fp16_intrinsic_lane.ll | 8 +- .../AArch64/fp16_intrinsic_scalar_3op.ll | 85 +- .../fpconv-vector-op-scalarize-strict.ll | 33 + .../AArch64/global-merge-group-by-use.ll | 4 +- .../AArch64/global-merge-hidden-minsize.ll | 25 + ...st-and-by-const-from-lshr-in-eqcmp-zero.ll | 92 +- ...ist-and-by-const-from-shl-in-eqcmp-zero.ll | 82 +- .../CodeGen/AArch64/hwasan-check-memaccess.ll | 50 +- .../AArch64/i128_volatile_load_store.ll | 117 + .../test/CodeGen/AArch64/illegal-float-ops.ll | 72 +- llvm/test/CodeGen/AArch64/irg_sp_tagp.ll | 2 +- llvm/test/CodeGen/AArch64/jump-table-32.ll | 42 + .../test/CodeGen/AArch64/jump-table-exynos.ll | 2 - llvm/test/CodeGen/AArch64/large-stack.ll | 49 + .../CodeGen/AArch64/ldst-paired-aliasing.ll | 31 +- .../AArch64/load-combine-big-endian.ll | 158 +- llvm/test/CodeGen/AArch64/load-combine.ll | 142 +- .../AArch64/loopvectorize_pr33804_double.ll | 2 +- llvm/test/CodeGen/AArch64/lower-ptrmask.ll | 29 + .../CodeGen/AArch64/machine-combiner-madd.ll | 2 - .../AArch64/machine-outliner-remarks.ll | 2 +- .../machine-outliner-retaddr-sign-cfi.ll | 91 + ...tliner-retaddr-sign-diff-scope-same-key.ll | 77 + .../machine-outliner-retaddr-sign-non-leaf.ll | 79 + ...tliner-retaddr-sign-same-scope-diff-key.ll | 78 + ...iner-retaddr-sign-same-scope-same-key-a.ll | 75 + ...iner-retaddr-sign-same-scope-same-key-b.ll | 81 + .../machine-outliner-retaddr-sign-sp-mod.ll | 44 + ...machine-outliner-retaddr-sign-subtarget.ll | 87 + .../machine-outliner-retaddr-sign-thunk.ll | 76 + .../machine-outliner-retaddr-sign-v8-3.ll | 83 + llvm/test/CodeGen/AArch64/machine-outliner.ll | 29 +- llvm/test/CodeGen/AArch64/macro-fusion.ll | 20 + llvm/test/CodeGen/AArch64/max-jump-table.ll | 145 +- .../CodeGen/AArch64/merge-store-dependency.ll | 46 +- llvm/test/CodeGen/AArch64/merge-store.ll | 20 +- .../CodeGen/AArch64/misched-fusion-aes.ll | 2 - llvm/test/CodeGen/AArch64/neon-dot-product.ll | 148 +- llvm/test/CodeGen/AArch64/neon-mla-mls.ll | 145 +- llvm/test/CodeGen/AArch64/neon-vcadd.ll | 67 + llvm/test/CodeGen/AArch64/no-quad-ldp-stp.ll | 1 - llvm/test/CodeGen/AArch64/no_cfi.ll | 13 + .../AArch64/note-gnu-property-pac-bti-0.ll | 14 + .../AArch64/note-gnu-property-pac-bti-1.ll | 18 + .../AArch64/note-gnu-property-pac-bti-2.ll | 18 + .../AArch64/note-gnu-property-pac-bti-3.ll | 18 + .../AArch64/note-gnu-property-pac-bti-4.ll | 25 + .../AArch64/note-gnu-property-pac-bti-5.ll | 26 + .../AArch64/note-gnu-property-pac-bti-6.ll | 22 + .../AArch64/note-gnu-property-pac-bti-7.ll | 23 + .../AArch64/note-gnu-property-pac-bti-8.ll | 21 + .../CodeGen/AArch64/overeager_mla_fusing.ll | 59 + .../AArch64/patchable-function-entry-bti.ll | 86 + .../AArch64/patchable-function-entry.ll | 105 + llvm/test/CodeGen/AArch64/powi-windows.ll | 46 + .../CodeGen/AArch64/preferred-alignment.ll | 4 +- .../AArch64/preferred-function-alignment.ll | 16 +- .../AArch64/ragreedy-local-interval-cost.ll | 339 + .../AArch64/regress-w29-reserved-with-fp.ll | 2 +- llvm/test/CodeGen/AArch64/remat.ll | 10 +- llvm/test/CodeGen/AArch64/sadd_sat.ll | 73 +- llvm/test/CodeGen/AArch64/sadd_sat_plus.ll | 94 + llvm/test/CodeGen/AArch64/sadd_sat_vec.ll | 621 +- .../CodeGen/AArch64/sched-past-vector-ldst.ll | 26 +- llvm/test/CodeGen/AArch64/sdivpow2.ll | 19 + llvm/test/CodeGen/AArch64/seh-finally.ll | 8 +- llvm/test/CodeGen/AArch64/select_const.ll | 625 + .../CodeGen/AArch64/selectcc-to-shiftand.ll | 95 + .../test/CodeGen/AArch64/shadow-call-stack.ll | 2 +- llvm/test/CodeGen/AArch64/shift-amount-mod.ll | 12 + llvm/test/CodeGen/AArch64/shift-by-signext.ll | 122 + llvm/test/CodeGen/AArch64/shift-logic.ll | 153 + llvm/test/CodeGen/AArch64/shift-mod.ll | 77 + llvm/test/CodeGen/AArch64/shift_minsize.ll | 5 + llvm/test/CodeGen/AArch64/sibling-call.ll | 24 +- .../CodeGen/AArch64/sign-return-address.ll | 75 +- .../AArch64/sink-copy-for-shrink-wrap.ll | 2 +- llvm/test/CodeGen/AArch64/space.ll | 16 + .../AArch64/speculation-hardening-dagisel.ll | 2 +- .../AArch64/speculation-hardening-loads.ll | 2 +- .../CodeGen/AArch64/speculation-hardening.ll | 37 +- llvm/test/CodeGen/AArch64/sponentry.ll | 4 +- llvm/test/CodeGen/AArch64/srem-lkk.ll | 149 + .../CodeGen/AArch64/srem-seteq-optsize.ll | 39 + .../AArch64/srem-seteq-vec-nonsplat.ll | 847 ++ .../CodeGen/AArch64/srem-seteq-vec-splat.ll | 226 + llvm/test/CodeGen/AArch64/srem-seteq.ll | 286 + llvm/test/CodeGen/AArch64/srem-vector-lkk.ll | 324 + llvm/test/CodeGen/AArch64/ssub_sat.ll | 74 +- llvm/test/CodeGen/AArch64/ssub_sat_plus.ll | 94 + llvm/test/CodeGen/AArch64/ssub_sat_vec.ll | 662 +- .../CodeGen/AArch64/stack-guard-reassign.ll | 7 +- .../test/CodeGen/AArch64/stack-guard-vaarg.ll | 2 +- .../CodeGen/AArch64/stack-tagging-ex-1.ll | 69 + .../CodeGen/AArch64/stack-tagging-ex-2.ll | 183 + .../stack-tagging-initializer-merge.ll | 308 + .../AArch64/stack-tagging-unchecked-ld-st.ll | 260 + .../AArch64/stack-tagging-untag-placement.ll | 82 + .../test/CodeGen/AArch64/stack_guard_remat.ll | 2 +- llvm/test/CodeGen/AArch64/stackmap.ll | 492 + llvm/test/CodeGen/AArch64/stgp.ll | 2 +- .../CodeGen/AArch64/strict-fp-int-promote.ll | 67 + llvm/test/CodeGen/AArch64/strqu.ll | 4 +- llvm/test/CodeGen/AArch64/sub-of-bias.ll | 103 + .../CodeGen/AArch64/sve-alloca-stackid.ll | 17 + .../CodeGen/AArch64/sve-calling-convention.ll | 121 + llvm/test/CodeGen/AArch64/sve-fp.ll | 129 + .../AArch64/sve-gather-scatter-dag-combine.ll | 72 + .../test/CodeGen/AArch64/sve-int-arith-imm.ll | 448 + .../CodeGen/AArch64/sve-int-arith-pred.ll | 406 + llvm/test/CodeGen/AArch64/sve-int-arith.ll | 216 + llvm/test/CodeGen/AArch64/sve-int-div-pred.ll | 91 + llvm/test/CodeGen/AArch64/sve-int-imm.ll | 519 + llvm/test/CodeGen/AArch64/sve-int-log-imm.ll | 128 + llvm/test/CodeGen/AArch64/sve-int-log-pred.ll | 179 + llvm/test/CodeGen/AArch64/sve-int-log.ll | 96 + llvm/test/CodeGen/AArch64/sve-int-mad-pred.ll | 199 + llvm/test/CodeGen/AArch64/sve-int-mul-pred.ll | 134 + .../CodeGen/AArch64/sve-int-reduce-pred.ll | 400 + .../AArch64/sve-intrinsics-conversion.ll | 159 + .../AArch64/sve-intrinsics-counting-bits.ll | 181 + .../AArch64/sve-intrinsics-counting-elems.ll | 171 + .../AArch64/sve-intrinsics-fp-arith.ll | 1681 +++ .../AArch64/sve-intrinsics-fp-compares.ll | 267 + .../AArch64/sve-intrinsics-fp-converts.ll | 400 + .../AArch64/sve-intrinsics-fp-reduce.ll | 214 + ...nsics-gather-loads-32bit-scaled-offsets.ll | 255 + ...ics-gather-loads-32bit-unscaled-offsets.ll | 348 + ...insics-gather-loads-64bit-scaled-offset.ll | 80 + ...sics-gather-loads-64bit-unscaled-offset.ll | 103 + ...sve-intrinsics-gather-loads-vector-base.ll | 186 + .../AArch64/sve-intrinsics-int-arith.ll | 192 + .../sve-intrinsics-int-compares-with-imm.ll | 1353 ++ .../AArch64/sve-intrinsics-int-compares.ll | 645 + .../CodeGen/AArch64/sve-intrinsics-loads.ll | 88 + .../CodeGen/AArch64/sve-intrinsics-logical.ll | 99 + .../AArch64/sve-intrinsics-perm-select.ll | 1765 +++ .../AArch64/sve-intrinsics-pred-creation.ll | 42 + .../AArch64/sve-intrinsics-pred-operations.ll | 130 + .../AArch64/sve-intrinsics-pred-testing.ll | 36 + .../AArch64/sve-intrinsics-reversal.ll | 166 + ...ics-scatter-stores-32bit-scaled-offsets.ll | 193 + ...s-scatter-stores-32bit-unscaled-offsets.ll | 248 + ...sics-scatter-stores-64bit-scaled-offset.ll | 58 + ...cs-scatter-stores-64bit-unscaled-offset.ll | 70 + ...e-intrinsics-scatter-stores-vector-base.ll | 133 + .../CodeGen/AArch64/sve-intrinsics-shifts.ll | 367 + .../CodeGen/AArch64/sve-intrinsics-sqdec.ll | 337 + .../CodeGen/AArch64/sve-intrinsics-sqinc.ll | 337 + .../CodeGen/AArch64/sve-intrinsics-stores.ll | 95 + .../CodeGen/AArch64/sve-intrinsics-uqdec.ll | 257 + .../CodeGen/AArch64/sve-intrinsics-uqinc.ll | 257 + .../CodeGen/AArch64/sve-intrinsics-while.ll | 309 + .../CodeGen/AArch64/sve-masked-ldst-nonext.ll | 193 + .../CodeGen/AArch64/sve-masked-ldst-sext.ll | 66 + .../CodeGen/AArch64/sve-masked-ldst-trunc.ll | 66 + .../CodeGen/AArch64/sve-masked-ldst-zext.ll | 72 + .../AArch64/sve-neg-int-arith-imm-2.ll | 12 + .../CodeGen/AArch64/sve-neg-int-arith-imm.ll | 11 + llvm/test/CodeGen/AArch64/sve-pred-log.ll | 286 + llvm/test/CodeGen/AArch64/sve-select.ll | 85 + llvm/test/CodeGen/AArch64/sve-setcc.ll | 26 + llvm/test/CodeGen/AArch64/sve-vector-splat.ll | 135 + ...ve2-intrinsics-binary-narrowing-add-sub.ll | 278 + .../sve2-intrinsics-binary-narrowing-shr.ll | 512 + .../AArch64/sve2-intrinsics-fp-converts.ll | 84 + ...sve2-intrinsics-fp-int-binary-logarithm.ll | 39 + .../sve2-intrinsics-fp-widening-mul-acc.ll | 127 + ...-intrinsics-non-widening-pairwise-arith.ll | 191 + .../sve2-intrinsics-unary-narrowing.ll | 202 + .../CodeGen/AArch64/sve2-intrinsics-while.ll | 309 + llvm/test/CodeGen/AArch64/swift-return.ll | 4 +- llvm/test/CodeGen/AArch64/swiftcc.ll | 2 + llvm/test/CodeGen/AArch64/swifterror.ll | 131 +- llvm/test/CodeGen/AArch64/swiftself.ll | 29 +- llvm/test/CodeGen/AArch64/tagged-globals.ll | 32 + llvm/test/CodeGen/AArch64/tail-call.ll | 120 +- .../AArch64/tailcall-bitcast-memcpy.ll | 18 + .../CodeGen/AArch64/tailcall-ccmismatch.ll | 1 + .../AArch64/tailcall-mem-intrinsics.ll | 1 + .../CodeGen/AArch64/tailcall-string-rvo.ll | 1 + .../CodeGen/AArch64/tailcall_misched_graph.ll | 46 +- llvm/test/CodeGen/AArch64/tbz-tbnz.ll | 2 +- llvm/test/CodeGen/AArch64/tme.ll | 44 + llvm/test/CodeGen/AArch64/uadd_sat.ll | 44 +- llvm/test/CodeGen/AArch64/uadd_sat_plus.ll | 77 + llvm/test/CodeGen/AArch64/uadd_sat_vec.ll | 226 +- .../umulo-128-legalisation-lowering.ll | 4 +- ...asked-merge-scalar-constmask-innerouter.ll | 16 +- ...-merge-scalar-constmask-interleavedbits.ll | 16 +- ...-scalar-constmask-interleavedbytehalves.ll | 16 +- ...d-masked-merge-scalar-constmask-lowhigh.ll | 16 +- ...unfold-masked-merge-scalar-variablemask.ll | 16 +- llvm/test/CodeGen/AArch64/urem-lkk.ll | 103 + .../CodeGen/AArch64/urem-seteq-nonzero.ll | 243 + .../CodeGen/AArch64/urem-seteq-optsize.ll | 3 - .../AArch64/urem-seteq-vec-nonsplat.ll | 278 +- .../CodeGen/AArch64/urem-seteq-vec-nonzero.ll | 115 + .../CodeGen/AArch64/urem-seteq-vec-splat.ll | 107 +- .../AArch64/urem-seteq-vec-tautological.ll | 97 + llvm/test/CodeGen/AArch64/urem-seteq.ll | 79 +- llvm/test/CodeGen/AArch64/urem-vector-lkk.ll | 267 + .../AArch64/use-cr-result-of-dom-icmp-st.ll | 547 + llvm/test/CodeGen/AArch64/usub_sat.ll | 43 +- llvm/test/CodeGen/AArch64/usub_sat_plus.ll | 76 + llvm/test/CodeGen/AArch64/usub_sat_vec.ll | 136 +- llvm/test/CodeGen/AArch64/vararg-tallcall.ll | 2 + .../AArch64/vecreduce-and-legalization.ll | 18 +- .../vecreduce-fadd-legalization-strict.ll | 128 + .../vecreduce-fmax-legalization-nan.ll | 88 + .../AArch64/vecreduce-fmax-legalization.ll | 2 +- .../vecreduce-fmul-legalization-strict.ll | 114 + .../CodeGen/AArch64/vector_merge_dep_check.ll | 2 +- .../test/CodeGen/AArch64/vselect-constants.ll | 195 + llvm/test/CodeGen/AArch64/win64-no-uwtable.ll | 35 + llvm/test/CodeGen/AArch64/win64_vararg.ll | 74 +- .../CodeGen/AArch64/windows-extern-weak.ll | 27 + .../AArch64/wineh-try-catch-realign.ll | 12 +- llvm/test/CodeGen/AArch64/wineh-try-catch.ll | 36 +- .../AMDGPU/GlobalISel/bool-legalization.ll | 105 + .../GlobalISel/divergent-control-flow.ll | 61 + .../AMDGPU/GlobalISel/extractelement.ll | 1289 ++ .../AMDGPU/GlobalISel/function-returns.ll | 1063 ++ .../CodeGen/AMDGPU/GlobalISel/global-value.ll | 156 + .../GlobalISel/irtranslator-amdgcn-sendmsg.ll | 15 + .../GlobalISel/irtranslator-amdgpu_kernel.ll | 380 +- .../GlobalISel/irtranslator-amdgpu_ps.ll | 31 +- .../GlobalISel/irtranslator-amdgpu_vs.ll | 25 +- .../GlobalISel/irtranslator-atomicrmw.ll | 48 + .../GlobalISel/irtranslator-function-args.ll | 2227 +++ .../irtranslator-struct-return-intrinsics.ll | 5 +- .../GlobalISel/lds-global-non-entry-func.ll | 13 + .../AMDGPU/GlobalISel/lds-global-value.ll | 35 + .../CodeGen/AMDGPU/GlobalISel/lds-size.ll | 1 + .../AMDGPU/GlobalISel/lds-zero-initializer.ll | 5 + .../llvm.amdgcn.ds.ordered.add.gfx10.ll | 1 + .../GlobalISel/llvm.amdgcn.ds.ordered.add.ll | 5 + .../GlobalISel/llvm.amdgcn.ds.ordered.swap.ll | 5 + .../GlobalISel/llvm.amdgcn.end.cf.i32.ll | 3 + .../GlobalISel/llvm.amdgcn.end.cf.i64.ll | 3 + .../GlobalISel/llvm.amdgcn.if.break.i32.ll | 3 +- .../GlobalISel/llvm.amdgcn.if.break.i64.ll | 6 +- .../GlobalISel/llvm.amdgcn.init.exec.ll | 2 + .../llvm.amdgcn.init.exec.wave32.ll | 2 + .../GlobalISel/llvm.amdgcn.is.private.ll | 109 + .../GlobalISel/llvm.amdgcn.is.shared.ll | 109 + .../llvm.amdgcn.kernarg.segment.ptr.ll | 2 +- ...llvm.amdgcn.raw.buffer.store.format.f16.ll | 504 + ...llvm.amdgcn.raw.buffer.store.format.f32.ll | 306 + .../llvm.amdgcn.raw.buffer.store.ll | 776 ++ .../AMDGPU/GlobalISel/llvm.amdgcn.s.sleep.ll | 45 + .../GlobalISel/llvm.amdgcn.workgroup.id.ll | 4 +- .../GlobalISel/llvm.amdgcn.workitem.id.ll | 31 +- .../AMDGPU/GlobalISel/llvm.amdgcn.wqm.vote.ll | 3 + .../AMDGPU/GlobalISel/read_register.ll | 2 + .../AMDGPU/GlobalISel/readcyclecounter.ll | 3 + .../regbankselect-amdgcn.image.load.1d.ll | 181 + .../regbankselect-amdgcn.image.sample.1d.ll | 268 + .../regbankselect-amdgcn.raw.buffer.load.ll | 173 + ...regbankselect-amdgcn.struct.buffer.load.ll | 179 + ...egbankselect-amdgcn.struct.buffer.store.ll | 174 + llvm/test/CodeGen/AMDGPU/GlobalISel/ret.ll | 3 + llvm/test/CodeGen/AMDGPU/add.i16.ll | 4 +- .../AMDGPU/addrspacecast-constantexpr.ll | 2 +- .../CodeGen/AMDGPU/agpr-register-count.ll | 139 +- .../CodeGen/AMDGPU/amdgcn.private-memory.ll | 9 +- .../AMDGPU/amdgpu-codegenprepare-idiv.ll | 104 +- .../AMDGPU/amdgpu-codegenprepare-mul24.ll | 219 +- llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll | 7 + .../CodeGen/AMDGPU/amdgpu-mul24-knownbits.ll | 34 + .../CodeGen/AMDGPU/amdgpu-unroll-threshold.ll | 52 + .../CodeGen/AMDGPU/amdgpu.private-memory.ll | 5 +- .../AMDGPU/annotate-kernel-features-hsa.ll | 19 + .../test/CodeGen/AMDGPU/array-ptr-calc-i32.ll | 2 +- .../AMDGPU/atomic_optimizations_buffer.ll | 92 +- .../atomic_optimizations_global_pointer.ll | 118 +- .../atomic_optimizations_local_pointer.ll | 5120 ++++++- .../atomic_optimizations_pixelshader.ll | 36 +- .../AMDGPU/atomic_optimizations_raw_buffer.ll | 70 +- .../atomic_optimizations_struct_buffer.ll | 70 +- llvm/test/CodeGen/AMDGPU/atomicrmw-nand.ll | 12 +- .../attr-amdgpu-flat-work-group-size-v3.ll | 18 +- .../attr-amdgpu-flat-work-group-size.ll | 18 +- .../CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll | 17 +- llvm/test/CodeGen/AMDGPU/basic-branch.ll | 4 +- .../AMDGPU/bitcast-constant-to-vector.ll | 4 +- llvm/test/CodeGen/AMDGPU/bitreverse.ll | 329 +- .../test/CodeGen/AMDGPU/branch-relax-spill.ll | 6 +- llvm/test/CodeGen/AMDGPU/branch-relaxation.ll | 3 +- llvm/test/CodeGen/AMDGPU/branch-uniformity.ll | 4 +- llvm/test/CodeGen/AMDGPU/bswap.ll | 197 +- .../AMDGPU/buffer-intrinsics-mmo-offsets.ll | 414 + llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll | 3 +- .../CodeGen/AMDGPU/call-argument-types.ll | 60 +- llvm/test/CodeGen/AMDGPU/call-constant.ll | 45 + .../AMDGPU/call-graph-register-usage.ll | 28 + .../AMDGPU/callee-special-input-vgprs.ll | 86 +- .../CodeGen/AMDGPU/calling-conventions.ll | 31 + llvm/test/CodeGen/AMDGPU/cc-sgpr-limit.ll | 138 + .../test/CodeGen/AMDGPU/cc-sgpr-over-limit.ll | 101 + .../CodeGen/AMDGPU/cgp-addressing-modes.ll | 5 +- llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll | 260 +- .../CodeGen/AMDGPU/combine-cond-add-sub.ll | 136 +- llvm/test/CodeGen/AMDGPU/commute-shifts.ll | 16 +- .../CodeGen/AMDGPU/computeNumSignBits-mul.ll | 172 + .../AMDGPU/control-flow-fastregalloc.ll | 7 +- .../CodeGen/AMDGPU/control-flow-optnone.ll | 4 +- llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll | 612 +- .../AMDGPU/cross-block-use-is-not-abi-copy.ll | 89 + .../CodeGen/AMDGPU/cse-phi-incoming-val.ll | 42 + llvm/test/CodeGen/AMDGPU/ctpop.ll | 2 +- llvm/test/CodeGen/AMDGPU/ctpop16.ll | 2 +- llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll | 75 +- .../CodeGen/AMDGPU/dagcombine-setcc-select.ll | 73 +- .../dead-machine-elim-after-dead-lane.ll | 28 + llvm/test/CodeGen/AMDGPU/debug-value2.ll | 8 +- llvm/test/CodeGen/AMDGPU/div_i128.ll | 2 +- llvm/test/CodeGen/AMDGPU/divergence-at-use.ll | 20 + .../divergent-branch-uniform-condition.ll | 36 +- llvm/test/CodeGen/AMDGPU/divrem24-assume.ll | 2 +- llvm/test/CodeGen/AMDGPU/dpp_combine.ll | 53 + ...ds-negative-offset-addressing-mode-loop.ll | 6 +- llvm/test/CodeGen/AMDGPU/ds_read2.ll | 2 +- .../AMDGPU/enable-no-signed-zeros-fp-math.ll | 26 +- .../AMDGPU/extract-subvector-equal-length.ll | 4 +- llvm/test/CodeGen/AMDGPU/extract-subvector.ll | 40 + .../AMDGPU/extract_subvector_vec4_vec3.ll | 21 +- .../CodeGen/AMDGPU/extract_vector_dynelt.ll | 4 +- llvm/test/CodeGen/AMDGPU/fabs.ll | 12 +- .../CodeGen/AMDGPU/fadd-fma-fmul-combine.ll | 118 +- llvm/test/CodeGen/AMDGPU/fadd.f16.ll | 2 +- llvm/test/CodeGen/AMDGPU/fdiv.ll | 77 +- .../CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll | 58 +- llvm/test/CodeGen/AMDGPU/fence-barrier.ll | 3 +- llvm/test/CodeGen/AMDGPU/fexp.ll | 1 - llvm/test/CodeGen/AMDGPU/ffloor.f64.ll | 28 +- .../test/CodeGen/AMDGPU/flat-address-space.ll | 27 +- llvm/test/CodeGen/AMDGPU/fma.f64.ll | 155 +- llvm/test/CodeGen/AMDGPU/fmac.sdwa.ll | 138 +- llvm/test/CodeGen/AMDGPU/fmin_legacy.ll | 8 +- .../AMDGPU/fmul-2-combine-multi-use.ll | 12 +- llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll | 6 +- llvm/test/CodeGen/AMDGPU/fneg-combines.ll | 15 +- llvm/test/CodeGen/AMDGPU/fneg-fabs.ll | 16 +- .../fneg-fold-legalize-dag-increase-insts.ll | 24 + llvm/test/CodeGen/AMDGPU/fneg.ll | 29 +- llvm/test/CodeGen/AMDGPU/fpow.ll | 562 + llvm/test/CodeGen/AMDGPU/fptrunc.ll | 10 + .../CodeGen/AMDGPU/frame-index-elimination.ll | 44 +- llvm/test/CodeGen/AMDGPU/fsqrt.ll | 38 +- llvm/test/CodeGen/AMDGPU/fsub.ll | 12 +- llvm/test/CodeGen/AMDGPU/function-returns.ll | 20 + llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll | 29 + llvm/test/CodeGen/AMDGPU/global-constant.ll | 48 +- llvm/test/CodeGen/AMDGPU/global-saddr.ll | 2 +- llvm/test/CodeGen/AMDGPU/global_atomics.ll | 9 +- .../test/CodeGen/AMDGPU/global_atomics_i64.ll | 4 +- llvm/test/CodeGen/AMDGPU/global_smrd.ll | 2 +- .../hsa-metadata-from-llvm-ir-full-v3.ll | 1 + .../AMDGPU/hsa-metadata-from-llvm-ir-full.ll | 1 + .../AMDGPU/hsa-metadata-hostcall-absent-v3.ll | 55 + .../AMDGPU/hsa-metadata-hostcall-absent.ll | 52 + .../hsa-metadata-hostcall-present-v3.ll | 59 + .../AMDGPU/hsa-metadata-hostcall-present.ll | 58 + .../hsa-metadata-kernel-code-props-v3.ll | 19 +- .../AMDGPU/hsa-metadata-kernel-code-props.ll | 26 +- llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll | 27 +- .../AMDGPU/i1-copy-phi-uniform-branch.ll | 1 - llvm/test/CodeGen/AMDGPU/idiv-licm.ll | 439 +- llvm/test/CodeGen/AMDGPU/idot2.ll | 938 +- llvm/test/CodeGen/AMDGPU/idot4s.ll | 700 +- llvm/test/CodeGen/AMDGPU/idot4u.ll | 1032 +- llvm/test/CodeGen/AMDGPU/idot8s.ll | 2368 ++-- llvm/test/CodeGen/AMDGPU/idot8u.ll | 3019 ++-- .../AMDGPU/illegal-sgpr-to-vgpr-copy.ll | 9 +- llvm/test/CodeGen/AMDGPU/implicit-def-muse.ll | 7 +- .../AMDGPU/indirect-addressing-si-noopt.ll | 4 - .../AMDGPU/indirect-addressing-term.ll | 18 +- llvm/test/CodeGen/AMDGPU/inline-asm.ll | 23 +- .../test/CodeGen/AMDGPU/inline-constraints.ll | 5 +- .../AMDGPU/insert-subvector-unused-scratch.ll | 4 +- llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll | 1738 ++- .../CodeGen/AMDGPU/insert_vector_elt.v2i16.ll | 1894 ++- llvm/test/CodeGen/AMDGPU/kernel-args.ll | 2 +- .../AMDGPU/kernel-argument-dag-lowering.ll | 2 +- .../test/CodeGen/AMDGPU/kill-infinite-loop.ll | 68 + .../AMDGPU/large-work-group-promote-alloca.ll | 7 +- llvm/test/CodeGen/AMDGPU/lcssa-optnone.ll | 26 + .../CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll | 524 +- .../CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll | 2 +- .../AMDGPU/llvm.amdgcn.ds.gws.barrier.ll | 91 +- .../CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll | 48 +- .../AMDGPU/llvm.amdgcn.ds.gws.sema.br.ll | 7 +- .../AMDGPU/llvm.amdgcn.ds.gws.sema.p.ll | 7 +- .../llvm.amdgcn.ds.gws.sema.release.all.ll | 7 +- .../AMDGPU/llvm.amdgcn.ds.gws.sema.v.ll | 7 +- .../AMDGPU/llvm.amdgcn.ds.ordered.swap.ll | 4 +- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll | 8 +- .../CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll | 7 +- .../AMDGPU/llvm.amdgcn.init.exec.wave32.ll | 31 + .../CodeGen/AMDGPU/llvm.amdgcn.interp.f16.ll | 8 +- .../CodeGen/AMDGPU/llvm.amdgcn.is.private.ll | 50 + .../CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll | 51 + llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll | 118 +- .../CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll | 25 + .../AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll | 2 +- .../AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll | 2 +- .../AMDGPU/llvm.amdgcn.raw.buffer.atomic.ll | 10 +- .../AMDGPU/llvm.amdgcn.raw.buffer.load.ll | 109 +- .../AMDGPU/llvm.amdgcn.raw.buffer.store.ll | 97 + .../AMDGPU/llvm.amdgcn.readfirstlane.ll | 4 +- .../CodeGen/AMDGPU/llvm.amdgcn.readlane.ll | 3 +- .../CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll | 56 +- .../AMDGPU/llvm.amdgcn.s.buffer.load.ll | 160 +- .../CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll | 188 + .../llvm.amdgcn.struct.buffer.atomic.ll | 10 +- .../AMDGPU/llvm.amdgcn.struct.buffer.load.ll | 79 + .../AMDGPU/llvm.amdgcn.struct.buffer.store.ll | 53 +- .../CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll | 65 +- .../CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll | 21 +- .../CodeGen/AMDGPU/llvm.amdgcn.writelane.ll | 20 +- llvm/test/CodeGen/AMDGPU/llvm.fma.f16.ll | 132 +- llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll | 27 +- llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll | 27 +- llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll | 762 +- llvm/test/CodeGen/AMDGPU/load-lo16.ll | 2038 ++- .../CodeGen/AMDGPU/local-memory.amdgcn.ll | 8 +- llvm/test/CodeGen/AMDGPU/loop_break.ll | 26 +- .../lower-range-metadata-intrinsic-call.ll | 2 +- llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll | 554 +- llvm/test/CodeGen/AMDGPU/mad_int24.ll | 105 +- llvm/test/CodeGen/AMDGPU/madak.ll | 16 +- llvm/test/CodeGen/AMDGPU/max.i16.ll | 122 +- llvm/test/CodeGen/AMDGPU/max.ll | 30 +- llvm/test/CodeGen/AMDGPU/memory_clause.ll | 233 +- llvm/test/CodeGen/AMDGPU/mfma-loop.ll | 491 + llvm/test/CodeGen/AMDGPU/min.ll | 2 +- llvm/test/CodeGen/AMDGPU/movreld-bug.ll | 18 +- .../CodeGen/AMDGPU/mul24-pass-ordering.ll | 261 + llvm/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll | 2 +- llvm/test/CodeGen/AMDGPU/multilevel-break.ll | 72 +- .../no-initializer-constant-addrspace.ll | 8 +- llvm/test/CodeGen/AMDGPU/occupancy-levels.ll | 288 + llvm/test/CodeGen/AMDGPU/offset-split-flat.ll | 1470 ++ .../CodeGen/AMDGPU/offset-split-global.ll | 1408 ++ .../AMDGPU/opencl-printf-no-hostcall.ll | 18 + llvm/test/CodeGen/AMDGPU/opencl-printf.ll | 34 + llvm/test/CodeGen/AMDGPU/operand-folding.ll | 24 + llvm/test/CodeGen/AMDGPU/packed-op-sel.ll | 6 +- llvm/test/CodeGen/AMDGPU/preserve-hi16.ll | 154 +- .../CodeGen/AMDGPU/private-memory-r600.ll | 2 +- .../AMDGPU/promote-alloca-addrspacecast.ll | 2 +- .../AMDGPU/promote-alloca-to-lds-icmp.ll | 2 +- .../AMDGPU/promote-alloca-to-lds-phi.ll | 2 +- .../AMDGPU/promote-alloca-to-lds-select.ll | 2 +- .../AMDGPU/promote-constOffset-to-imm.ll | 133 +- .../AMDGPU/r600-constant-array-fixup.ll | 2 +- llvm/test/CodeGen/AMDGPU/r600-export-fix.ll | 51 +- llvm/test/CodeGen/AMDGPU/read_register.ll | 20 +- .../reduce-build-vec-ext-to-ext-build-vec.ll | 6 +- llvm/test/CodeGen/AMDGPU/ret.ll | 12 +- .../rewrite-out-arguments-address-space.ll | 4 +- .../CodeGen/AMDGPU/rewrite-out-arguments.ll | 56 +- llvm/test/CodeGen/AMDGPU/rsq.ll | 2 +- llvm/test/CodeGen/AMDGPU/s_code_end.ll | 37 +- llvm/test/CodeGen/AMDGPU/saddo.ll | 463 +- llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll | 201 +- llvm/test/CodeGen/AMDGPU/scratch-simple.ll | 2 +- llvm/test/CodeGen/AMDGPU/sdiv.ll | 64 +- llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll | 31 +- llvm/test/CodeGen/AMDGPU/select-opt.ll | 4 +- llvm/test/CodeGen/AMDGPU/select.f16.ll | 30 +- llvm/test/CodeGen/AMDGPU/selectcc-opt.ll | 2 +- llvm/test/CodeGen/AMDGPU/setcc-opt.ll | 16 +- llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll | 3 +- llvm/test/CodeGen/AMDGPU/sgpr-copy.ll | 3 +- llvm/test/CodeGen/AMDGPU/shift-i128.ll | 207 +- llvm/test/CodeGen/AMDGPU/shl.ll | 1712 ++- llvm/test/CodeGen/AMDGPU/shl.v2i16.ll | 578 +- llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll | 2 +- .../CodeGen/AMDGPU/shrink-add-sub-constant.ll | 603 +- llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll | 4 +- llvm/test/CodeGen/AMDGPU/si-scheduler.ll | 2 +- .../CodeGen/AMDGPU/si-spill-sgpr-stack.ll | 12 +- llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll | 268 +- llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll | 69 +- llvm/test/CodeGen/AMDGPU/sint_to_fp.ll | 2 +- llvm/test/CodeGen/AMDGPU/sitofp.f16.ll | 14 +- llvm/test/CodeGen/AMDGPU/sminmax.ll | 29 +- llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll | 18 +- llvm/test/CodeGen/AMDGPU/smrd-vccz-bug.ll | 6 +- llvm/test/CodeGen/AMDGPU/smrd.ll | 1 - llvm/test/CodeGen/AMDGPU/smrd_vmem_war.ll | 29 + llvm/test/CodeGen/AMDGPU/spill-m0.ll | 132 +- .../test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll | 24 +- llvm/test/CodeGen/AMDGPU/spill-wide-sgpr.ll | 80 +- .../CodeGen/AMDGPU/split-arg-dbg-value.ll | 223 + ...tack-pointer-offset-relative-frameindex.ll | 66 + .../CodeGen/AMDGPU/stack-realign-kernel.ll | 1 - llvm/test/CodeGen/AMDGPU/store-hi16.ll | 32 +- .../CodeGen/AMDGPU/sub-zext-cc-zext-cc.ll | 34 + llvm/test/CodeGen/AMDGPU/sub.i16.ll | 8 +- llvm/test/CodeGen/AMDGPU/sub.v2i16.ll | 42 +- .../AMDGPU/subreg-coalescer-undef-use.ll | 49 +- llvm/test/CodeGen/AMDGPU/trunc-combine.ll | 134 +- llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll | 97 +- llvm/test/CodeGen/AMDGPU/uint_to_fp.ll | 2 +- llvm/test/CodeGen/AMDGPU/uitofp.f16.ll | 13 +- llvm/test/CodeGen/AMDGPU/uniform-cfg.ll | 5 +- .../AMDGPU/uniform-loop-inside-nonuniform.ll | 5 +- llvm/test/CodeGen/AMDGPU/unsupported-calls.ll | 10 + .../CodeGen/AMDGPU/use-sgpr-multiple-times.ll | 9 +- llvm/test/CodeGen/AMDGPU/v_mac_f16.ll | 17 +- llvm/test/CodeGen/AMDGPU/valu-i1.ll | 11 +- .../CodeGen/AMDGPU/vector_shuffle.packed.ll | 122 +- ...r-descriptor-waterfall-loop-idom-update.ll | 47 + ...vgpr-spill-emergency-stack-slot-compute.ll | 1 + llvm/test/CodeGen/AMDGPU/wait.ll | 8 +- llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll | 50 +- llvm/test/CodeGen/AMDGPU/wave32.ll | 64 +- llvm/test/CodeGen/AMDGPU/wqm.ll | 3 + llvm/test/CodeGen/AMDGPU/write_register.ll | 20 +- llvm/test/CodeGen/AMDGPU/wwm-reserved.ll | 6 +- llvm/test/CodeGen/AMDGPU/zero_extend.ll | 6 +- .../CodeGen/ARM/2007-01-19-InfiniteLoop.ll | 2 +- .../CodeGen/ARM/2009-07-18-RewriterBug.ll | 26 +- .../CodeGen/ARM/2010-11-29-PrologueBug.ll | 2 +- llvm/test/CodeGen/ARM/2010-12-07-PEIBug.ll | 2 +- llvm/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll | 2 +- .../CodeGen/ARM/2014-05-14-DwarfEHCrash.ll | 2 +- .../ARM/2016-05-01-RegScavengerAssert.ll | 2 +- .../ARM/GlobalISel/arm-irtranslator.ll | 30 +- .../ARM/GlobalISel/arm-param-lowering.ll | 36 +- .../irtranslator-varargs-lowering.ll | 8 +- llvm/test/CodeGen/ARM/O3-pipeline.ll | 32 +- llvm/test/CodeGen/ARM/ParallelDSP/blocks.ll | 295 + .../ARM/ParallelDSP/complex_dot_prod.ll | 150 + llvm/test/CodeGen/ARM/ParallelDSP/exchange.ll | 452 + .../ARM/ParallelDSP/inner-full-unroll.ll | 4 +- .../ARM/ParallelDSP/multi-use-loads.ll | 351 +- .../CodeGen/ARM/ParallelDSP/overlapping.ll | 220 + llvm/test/CodeGen/ARM/ParallelDSP/pr42729.ll | 84 + llvm/test/CodeGen/ARM/ParallelDSP/pr43073.ll | 294 + llvm/test/CodeGen/ARM/ParallelDSP/sext-acc.ll | 186 + llvm/test/CodeGen/ARM/ParallelDSP/smlad11.ll | 4 +- llvm/test/CodeGen/ARM/ParallelDSP/smlad12.ll | 2 +- llvm/test/CodeGen/ARM/ParallelDSP/smladx-1.ll | 9 +- .../test/CodeGen/ARM/ParallelDSP/smlaldx-1.ll | 9 +- .../test/CodeGen/ARM/ParallelDSP/smlaldx-2.ll | 9 +- .../ARM/ParallelDSP/unroll-n-jam-smlad.ll | 14 +- llvm/test/CodeGen/ARM/Windows/wineh-basic.ll | 4 +- llvm/test/CodeGen/ARM/a15-partial-update.ll | 71 +- .../test/CodeGen/ARM/addsubcarry-promotion.ll | 26 +- llvm/test/CodeGen/ARM/addsubo-legalization.ll | 143 +- llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll | 28 +- llvm/test/CodeGen/ARM/arm-storebytesmerge.ll | 2 +- llvm/test/CodeGen/ARM/bfi.ll | 101 +- llvm/test/CodeGen/ARM/bswap16.ll | 37 +- .../CodeGen/ARM/build-attributes-fn-attr6.ll | 1 + .../build-attributes-optimization-minsize.ll | 6 +- .../build-attributes-optimization-mixed.ll | 6 +- .../build-attributes-optimization-optnone.ll | 6 +- .../build-attributes-optimization-optsize.ll | 6 +- .../ARM/build-attributes-optimization.ll | 6 +- llvm/test/CodeGen/ARM/build-attributes.ll | 96 +- llvm/test/CodeGen/ARM/byval_load_align.ll | 4 +- llvm/test/CodeGen/ARM/call-tc.ll | 20 +- llvm/test/CodeGen/ARM/cfguard-checks.ll | 151 + llvm/test/CodeGen/ARM/cfguard-module-flag.ll | 26 + llvm/test/CodeGen/ARM/clang-section.ll | 8 +- llvm/test/CodeGen/ARM/cls.ll | 27 + llvm/test/CodeGen/ARM/cmov_fp16.ll | 261 + llvm/test/CodeGen/ARM/coalesce-dbgvalue.ll | 4 +- llvm/test/CodeGen/ARM/combine-vmovdrr.ll | 4 +- llvm/test/CodeGen/ARM/compare-call.ll | 2 +- llvm/test/CodeGen/ARM/constantpool-align.ll | 25 + .../CodeGen/ARM/constantpool-promote-dbg.ll | 2 +- llvm/test/CodeGen/ARM/constantpool-promote.ll | 4 +- .../ARM/cortex-a57-misched-ldm-wrback.ll | 2 +- .../CodeGen/ARM/cortex-a57-misched-ldm.ll | 2 +- .../ARM/cortex-a57-misched-stm-wrback.ll | 2 +- .../CodeGen/ARM/cortex-a57-misched-stm.ll | 2 +- .../CodeGen/ARM/cortex-a57-misched-vfma.ll | 18 +- .../ARM/cortex-a57-misched-vldm-wrback.ll | 2 +- .../CodeGen/ARM/cortex-a57-misched-vldm.ll | 2 +- .../ARM/cortex-a57-misched-vstm-wrback.ll | 2 +- .../CodeGen/ARM/cortex-a57-misched-vstm.ll | 2 +- llvm/test/CodeGen/ARM/csr-split.ll | 137 + llvm/test/CodeGen/ARM/cxx-tlscc.ll | 14 +- .../CodeGen/ARM/dagcombine-anyexttozeroext.ll | 9 +- llvm/test/CodeGen/ARM/debug-info-arg.ll | 2 +- llvm/test/CodeGen/ARM/debug-info-blocks.ll | 2 +- llvm/test/CodeGen/ARM/debug-info-sreg2.ll | 2 +- .../CodeGen/ARM/debuginfo-split-carryexpr.ll | 51 + llvm/test/CodeGen/ARM/disable-fp-elim.ll | 4 +- llvm/test/CodeGen/ARM/dsp-mlal.ll | 223 +- llvm/test/CodeGen/ARM/dwarf-frame.ll | 38 + llvm/test/CodeGen/ARM/dwarf-unwind.ll | 2 +- llvm/test/CodeGen/ARM/early-cfi-sections.ll | 2 +- llvm/test/CodeGen/ARM/fcmp-xo.ll | 12 +- llvm/test/CodeGen/ARM/fold-stack-adjust.ll | 13 + llvm/test/CodeGen/ARM/fp-intrinsics.ll | 1083 ++ llvm/test/CodeGen/ARM/fp16-fullfp16.ll | 2 +- llvm/test/CodeGen/ARM/fp16-fusedMAC.ll | 429 + llvm/test/CodeGen/ARM/fp16-instructions.ll | 64 +- llvm/test/CodeGen/ARM/fp16-promote.ll | 17 +- llvm/test/CodeGen/ARM/fpcmp.ll | 10 +- .../ARM/fragmented-args-multiple-regs.ll | 72 + llvm/test/CodeGen/ARM/fusedMAC.ll | 50 +- llvm/test/CodeGen/ARM/global-merge-1.ll | 6 +- llvm/test/CodeGen/ARM/gnu_mcount_nc.ll | 41 + llvm/test/CodeGen/ARM/hello.ll | 2 +- ...st-and-by-const-from-lshr-in-eqcmp-zero.ll | 875 +- ...ist-and-by-const-from-shl-in-eqcmp-zero.ll | 973 +- llvm/test/CodeGen/ARM/ifcvt-iter-indbr.ll | 2 +- llvm/test/CodeGen/ARM/ifcvt10.ll | 2 +- llvm/test/CodeGen/ARM/ifcvt11.ll | 6 +- llvm/test/CodeGen/ARM/ifcvt5.ll | 4 +- .../CodeGen/ARM/inlineasm-X-allocation.ll | 2 +- llvm/test/CodeGen/ARM/insn-sched1.ll | 2 +- llvm/test/CodeGen/ARM/intrinsics-cmse.ll | 45 + .../test/CodeGen/ARM/ipra-exact-definition.ll | 44 + llvm/test/CodeGen/ARM/ipra-no-csr.ll | 22 + llvm/test/CodeGen/ARM/ipra-r0-returned.ll | 18 + llvm/test/CodeGen/ARM/ipra-reg-usage.ll | 2 +- llvm/test/CodeGen/ARM/ipra.ll | 202 + llvm/test/CodeGen/ARM/isel-v8i32-crash.ll | 2 +- llvm/test/CodeGen/ARM/ldrd.ll | 18 +- llvm/test/CodeGen/ARM/legalize-bitcast.ll | 59 + .../CodeGen/ARM/load-combine-big-endian.ll | 761 +- llvm/test/CodeGen/ARM/load-combine.ll | 662 +- .../test/CodeGen/ARM/loopvectorize_pr33804.ll | 2 +- llvm/test/CodeGen/ARM/lsr-unfolded-offset.ll | 2 +- llvm/test/CodeGen/ARM/memcpy-ldm-stm.ll | 12 +- llvm/test/CodeGen/ARM/memfunc.ll | 18 +- llvm/test/CodeGen/ARM/mul_const.ll | 191 +- llvm/test/CodeGen/ARM/neon-v8.1a.ll | 48 +- llvm/test/CodeGen/ARM/neon-vcadd.ll | 54 + .../test/CodeGen/ARM/neon-vqaddsub-upgrade.ll | 330 + llvm/test/CodeGen/ARM/noreturn.ll | 10 +- llvm/test/CodeGen/ARM/out-of-registers.ll | 2 +- llvm/test/CodeGen/ARM/postrasched.ll | 30 + llvm/test/CodeGen/ARM/pow.ll | 16 + llvm/test/CodeGen/ARM/prefetch.ll | 49 + llvm/test/CodeGen/ARM/qdadd.ll | 186 + llvm/test/CodeGen/ARM/rev.ll | 57 +- llvm/test/CodeGen/ARM/ror.ll | 13 +- llvm/test/CodeGen/ARM/sadd_sat.ll | 428 + llvm/test/CodeGen/ARM/sadd_sat_plus.ll | 379 + llvm/test/CodeGen/ARM/select.ll | 4 +- llvm/test/CodeGen/ARM/select_const.ll | 763 +- llvm/test/CodeGen/ARM/shift_minsize.ll | 8 +- llvm/test/CodeGen/ARM/shift_parts.ll | 65 +- llvm/test/CodeGen/ARM/signext-inreg.ll | 17 + llvm/test/CodeGen/ARM/smml.ll | 7 + .../CodeGen/ARM/softfp-constant-comparison.ll | 46 + llvm/test/CodeGen/ARM/ssub_sat.ll | 678 + llvm/test/CodeGen/ARM/ssub_sat_plus.ll | 384 + llvm/test/CodeGen/ARM/stack-guard-reassign.ll | 7 +- .../ARM/stack-protector-bmovpcb_call.ll | 4 +- llvm/test/CodeGen/ARM/stack-size-section.ll | 2 +- llvm/test/CodeGen/ARM/stack_guard_remat.ll | 2 +- .../CodeGen/ARM/struct-byval-frame-index.ll | 2 +- llvm/test/CodeGen/ARM/swifterror.ll | 2 +- llvm/test/CodeGen/ARM/swiftself.ll | 12 +- llvm/test/CodeGen/ARM/tail-call-weak.ll | 9 +- .../CodeGen/ARM/thumb1_return_sequence.ll | 4 +- llvm/test/CodeGen/ARM/uadd_sat.ll | 185 + llvm/test/CodeGen/ARM/uadd_sat_plus.ll | 232 + .../test/CodeGen/ARM/unschedule-first-call.ll | 2 +- llvm/test/CodeGen/ARM/urem-opt-size.ll | 2 +- llvm/test/CodeGen/ARM/useaa.ll | 2 +- llvm/test/CodeGen/ARM/usub_sat.ll | 188 + llvm/test/CodeGen/ARM/usub_sat_plus.ll | 220 + llvm/test/CodeGen/ARM/uxtb.ll | 75 +- llvm/test/CodeGen/ARM/v7k-abi-align.ll | 22 +- llvm/test/CodeGen/ARM/va_arg.ll | 51 +- .../vecreduce-fadd-legalization-soft-float.ll | 63 + .../ARM/vecreduce-fadd-legalization-strict.ll | 166 + .../ARM/vecreduce-fmul-legalization-strict.ll | 166 + llvm/test/CodeGen/ARM/vector-spilling.ll | 2 +- llvm/test/CodeGen/ARM/vfp.ll | 2 +- llvm/test/CodeGen/ARM/vldm-sched-a9.ll | 2 +- llvm/test/CodeGen/ARM/vmul.ll | 4 +- llvm/test/CodeGen/ARM/vqadd.ll | 64 +- llvm/test/CodeGen/ARM/vqdmul.ll | 24 +- llvm/test/CodeGen/ARM/vqsub.ll | 64 +- llvm/test/CodeGen/ARM/vrev.ll | 243 +- llvm/test/CodeGen/ARM/vsel-fp16.ll | 40 +- llvm/test/CodeGen/ARM/vsel.ll | 80 +- llvm/test/CodeGen/ARM/vstlane.ll | 383 +- llvm/test/CodeGen/ARM/warn-stack.ll | 4 +- llvm/test/CodeGen/AVR/rot.ll | 8 +- llvm/test/CodeGen/BPF/32-bit-subreg-alu.ll | 1 + .../CodeGen/BPF/32-bit-subreg-cond-select.ll | 17 +- .../BPF/32-bit-subreg-peephole-phi-1.ll | 34 + .../BPF/32-bit-subreg-peephole-phi-2.ll | 34 + .../BPF/32-bit-subreg-peephole-phi-3.ll | 52 + .../CodeGen/BPF/32-bit-subreg-peephole.ll | 27 +- llvm/test/CodeGen/BPF/BTF/array-size-0.ll | 2 +- llvm/test/CodeGen/BPF/BTF/binary-format.ll | 28 +- .../test/CodeGen/BPF/BTF/char-no-debuginfo.ll | 2 +- llvm/test/CodeGen/BPF/BTF/extern-builtin.ll | 89 + llvm/test/CodeGen/BPF/BTF/extern-func-arg.ll | 79 + .../test/CodeGen/BPF/BTF/extern-global-var.ll | 4 +- .../BPF/BTF/extern-var-func-weak-section.ll | 90 + .../CodeGen/BPF/BTF/extern-var-func-weak.ll | 90 + llvm/test/CodeGen/BPF/BTF/extern-var-func.ll | 91 + .../CodeGen/BPF/BTF/extern-var-section.ll | 119 + .../CodeGen/BPF/BTF/extern-var-struct-weak.ll | 109 + .../test/CodeGen/BPF/BTF/extern-var-struct.ll | 110 + .../BPF/BTF/extern-var-weak-section.ll | 117 + llvm/test/CodeGen/BPF/BTF/filename.ll | 8 +- llvm/test/CodeGen/BPF/BTF/func-func-ptr.ll | 8 +- llvm/test/CodeGen/BPF/BTF/func-non-void.ll | 8 +- llvm/test/CodeGen/BPF/BTF/func-source.ll | 8 +- llvm/test/CodeGen/BPF/BTF/func-typedef.ll | 8 +- llvm/test/CodeGen/BPF/BTF/func-unused-arg.ll | 8 +- llvm/test/CodeGen/BPF/BTF/func-void.ll | 8 +- .../test/CodeGen/BPF/BTF/global-var-inited.ll | 20 +- llvm/test/CodeGen/BPF/BTF/local-var.ll | 4 +- llvm/test/CodeGen/BPF/BTF/static-func.ll | 96 + .../BPF/BTF/static-var-derived-type.ll | 4 +- .../CodeGen/BPF/BTF/static-var-inited-sec.ll | 4 +- .../test/CodeGen/BPF/BTF/static-var-inited.ll | 4 +- .../BPF/BTF/static-var-readonly-sec.ll | 4 +- .../CodeGen/BPF/BTF/static-var-readonly.ll | 4 +- llvm/test/CodeGen/BPF/BTF/static-var-sec.ll | 4 +- .../BPF/BTF/static-var-zerolen-array.ll | 4 +- llvm/test/CodeGen/BPF/BTF/static-var.ll | 4 +- llvm/test/CodeGen/BPF/BTF/weak-global-2.ll | 66 + llvm/test/CodeGen/BPF/BTF/weak-global.ll | 65 + .../CodeGen/BPF/CORE/field-reloc-alu32.ll | 73 + .../BPF/CORE/field-reloc-bitfield-1.ll | 126 + .../BPF/CORE/field-reloc-bitfield-2.ll | 124 + llvm/test/CodeGen/BPF/CORE/intrinsic-array.ll | 12 +- .../CORE/intrinsic-fieldinfo-byte-size-1.ll | 153 + .../CORE/intrinsic-fieldinfo-byte-size-2.ll | 142 + .../CORE/intrinsic-fieldinfo-byte-size-3.ll | 133 + .../CORE/intrinsic-fieldinfo-byte-size-4.ll | 86 + .../CORE/intrinsic-fieldinfo-existence-1.ll | 167 + .../CORE/intrinsic-fieldinfo-existence-2.ll | 124 + .../CORE/intrinsic-fieldinfo-existence-3.ll | 132 + .../BPF/CORE/intrinsic-fieldinfo-lshift-1.ll | 158 + .../BPF/CORE/intrinsic-fieldinfo-lshift-2.ll | 125 + .../BPF/CORE/intrinsic-fieldinfo-rshift-1.ll | 153 + .../BPF/CORE/intrinsic-fieldinfo-rshift-2.ll | 124 + .../BPF/CORE/intrinsic-fieldinfo-rshift-3.ll | 134 + .../CORE/intrinsic-fieldinfo-signedness-1.ll | 167 + .../CORE/intrinsic-fieldinfo-signedness-2.ll | 155 + .../CORE/intrinsic-fieldinfo-signedness-3.ll | 152 + .../test/CodeGen/BPF/CORE/intrinsic-struct.ll | 10 +- .../CodeGen/BPF/CORE/intrinsic-transforms.ll | 120 + llvm/test/CodeGen/BPF/CORE/intrinsic-union.ll | 10 +- .../CodeGen/BPF/CORE/no-elf-ama-symbol.ll | 65 + llvm/test/CodeGen/BPF/CORE/no-narrow-load.ll | 156 + .../BPF/CORE/offset-reloc-access-str.ll | 8 +- .../CodeGen/BPF/CORE/offset-reloc-basic.ll | 19 +- .../BPF/CORE/offset-reloc-cast-array-1.ll | 128 + .../BPF/CORE/offset-reloc-cast-array-2.ll | 135 + .../BPF/CORE/offset-reloc-cast-struct-1.ll | 116 + .../BPF/CORE/offset-reloc-cast-struct-2.ll | 121 + .../BPF/CORE/offset-reloc-cast-struct-3.ll | 120 + .../BPF/CORE/offset-reloc-cast-union-1.ll | 121 + .../BPF/CORE/offset-reloc-cast-union-2.ll | 122 + .../CodeGen/BPF/CORE/offset-reloc-end-load.ll | 83 + .../CodeGen/BPF/CORE/offset-reloc-end-ret.ll | 78 + .../BPF/CORE/offset-reloc-fieldinfo-1.ll | 195 + .../BPF/CORE/offset-reloc-fieldinfo-2.ll | 258 + .../CodeGen/BPF/CORE/offset-reloc-global-1.ll | 82 + .../CodeGen/BPF/CORE/offset-reloc-global-2.ll | 98 + .../CodeGen/BPF/CORE/offset-reloc-global-3.ll | 87 + .../CodeGen/BPF/CORE/offset-reloc-ignore.ll | 64 + .../BPF/CORE/offset-reloc-middle-chain.ll | 132 + .../BPF/CORE/offset-reloc-multi-array-1.ll | 104 + .../BPF/CORE/offset-reloc-multi-array-2.ll | 110 + .../BPF/CORE/offset-reloc-multilevel.ll | 23 +- .../BPF/CORE/offset-reloc-pointer-1.ll | 86 + .../BPF/CORE/offset-reloc-pointer-2.ll | 88 + .../BPF/CORE/offset-reloc-struct-anonymous.ll | 25 +- .../BPF/CORE/offset-reloc-struct-array.ll | 25 +- .../BPF/CORE/offset-reloc-typedef-array.ll | 9 +- .../BPF/CORE/offset-reloc-typedef-struct.ll | 7 +- .../BPF/CORE/offset-reloc-typedef-union.ll | 7 +- .../CodeGen/BPF/CORE/offset-reloc-typedef.ll | 9 +- .../CodeGen/BPF/CORE/offset-reloc-union.ll | 23 +- llvm/test/CodeGen/BPF/callx.ll | 20 + llvm/test/CodeGen/BPF/dwarfdump.ll | 2 +- llvm/test/CodeGen/BPF/objdump_two_funcs.ll | 2 +- llvm/test/CodeGen/BPF/optnone-1.ll | 52 + llvm/test/CodeGen/BPF/reloc-btf-2.ll | 2 +- llvm/test/CodeGen/BPF/reloc-btf.ll | 2 +- llvm/test/CodeGen/BPF/remove_truncate_6.ll | 80 + llvm/test/CodeGen/BPF/shifts.ll | 2 +- llvm/test/CodeGen/BPF/warn-stack.ll | 4 +- llvm/test/CodeGen/BPF/xadd.ll | 2 +- .../test/CodeGen/Generic/DbgValueAggregate.ll | 36 + llvm/test/CodeGen/Generic/cfi-sections.ll | 4 +- .../Generic/expand-experimental-reductions.ll | 13 + llvm/test/CodeGen/Hexagon/64bit_tstbit.ll | 34 + llvm/test/CodeGen/Hexagon/NVJumpCmp.ll | 2 +- llvm/test/CodeGen/Hexagon/addh-sext-trunc.ll | 2 +- llvm/test/CodeGen/Hexagon/alu64.ll | 2 +- .../Hexagon/autohvx/isel-setcc-v256i1.ll | 15 + .../CodeGen/Hexagon/autohvx/minmax-128b.ll | 344 + .../CodeGen/Hexagon/autohvx/minmax-64b.ll | 344 + .../Hexagon/autohvx/vector-compare-128b.ll | 180 +- .../Hexagon/autohvx/vector-compare-64b.ll | 180 +- llvm/test/CodeGen/Hexagon/base-offset-addr.ll | 2 +- llvm/test/CodeGen/Hexagon/base-offset-post.ll | 2 +- .../CodeGen/Hexagon/bit-loop-rc-mismatch.ll | 4 +- llvm/test/CodeGen/Hexagon/builtin-prefetch.ll | 2 +- llvm/test/CodeGen/Hexagon/cfi-offset.ll | 2 +- llvm/test/CodeGen/Hexagon/cmp-extend.ll | 2 +- llvm/test/CodeGen/Hexagon/cmp.ll | 2 +- .../CodeGen/Hexagon/constp-combine-neg.ll | 2 +- llvm/test/CodeGen/Hexagon/constp-extract.ll | 2 +- llvm/test/CodeGen/Hexagon/constp-physreg.ll | 2 +- llvm/test/CodeGen/Hexagon/dead-store-stack.ll | 4 +- llvm/test/CodeGen/Hexagon/dealloc_return.ll | 2 +- .../Hexagon/early-if-conversion-bug1.ll | 4 +- llvm/test/CodeGen/Hexagon/ehabi.ll | 2 +- llvm/test/CodeGen/Hexagon/fminmax.ll | 2 +- llvm/test/CodeGen/Hexagon/fpelim-basic.ll | 4 +- llvm/test/CodeGen/Hexagon/hasfp-crash1.ll | 2 +- llvm/test/CodeGen/Hexagon/hasfp-crash2.ll | 2 +- .../hexagon_vector_loop_carried_reuse.ll | 2 +- ...agon_vector_loop_carried_reuse_constant.ll | 2 +- llvm/test/CodeGen/Hexagon/hwloop-crit-edge.ll | 2 +- llvm/test/CodeGen/Hexagon/hwloop-preheader.ll | 2 +- llvm/test/CodeGen/Hexagon/insert4.ll | 2 +- .../CodeGen/Hexagon/isel-bitcast-v8i1-i8.ll | 18 + .../Hexagon/isel-bitcast-v8i8-v4i16.ll | 13 + .../CodeGen/Hexagon/isel-minmax-v64bit.ll | 202 + llvm/test/CodeGen/Hexagon/isel-prefer.ll | 4 +- .../test/CodeGen/Hexagon/isel-vselect-v4i8.ll | 9 + llvm/test/CodeGen/Hexagon/jt-in-text.ll | 2 +- llvm/test/CodeGen/Hexagon/mem-fi-add.ll | 4 +- llvm/test/CodeGen/Hexagon/memops-stack.ll | 4 +- llvm/test/CodeGen/Hexagon/muxii-bug.ll | 30 + .../CodeGen/Hexagon/packetizer-resources.ll | 29 + llvm/test/CodeGen/Hexagon/pic-regusage.ll | 2 +- llvm/test/CodeGen/Hexagon/postinc-offset.ll | 2 +- llvm/test/CodeGen/Hexagon/rdf-dead-loop.ll | 2 +- .../CodeGen/Hexagon/rdf-inline-asm-fixed.ll | 2 +- llvm/test/CodeGen/Hexagon/rdf-inline-asm.ll | 2 +- llvm/test/CodeGen/Hexagon/reg-by-name.ll | 780 ++ .../test/CodeGen/Hexagon/reg-scavengebug-3.ll | 4 +- llvm/test/CodeGen/Hexagon/runtime-stkchk.ll | 4 +- llvm/test/CodeGen/Hexagon/sdr-shr32.ll | 2 +- llvm/test/CodeGen/Hexagon/signext-inreg.ll | 272 + .../Hexagon/switch-lut-explicit-section.ll | 2 +- .../Hexagon/switch-lut-function-section.ll | 2 +- .../Hexagon/switch-lut-multiple-functions.ll | 2 +- .../Hexagon/switch-lut-text-section.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-art-deps-rec.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-bad-sched.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-carried-1.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-chain-refs.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-change-dep1.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-change-deps.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-check-offset.ll | 6 +- llvm/test/CodeGen/Hexagon/swp-const-tc1.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-const-tc2.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-const-tc3.ll | 2 +- .../CodeGen/Hexagon/swp-conv3x3-nested.ll | 4 +- .../test/CodeGen/Hexagon/swp-copytophi-dag.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-crash-iter.ll | 32 + .../CodeGen/Hexagon/swp-dep-neg-offset.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-disable-Os.ll | 2 +- .../CodeGen/Hexagon/swp-epilog-numphis.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-epilog-phi10.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-epilog-phi12.ll | 54 + llvm/test/CodeGen/Hexagon/swp-epilog-phi2.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-epilog-phi4.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-epilog-phi5.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-epilog-phi7.ll | 4 +- llvm/test/CodeGen/Hexagon/swp-epilog-phi8.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-kernel-phi1.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-large-rec.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-listen-loop3.ll | 2 +- .../Hexagon/swp-loop-carried-unknown.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-lots-deps.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-max.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-maxstart.ll | 2 +- .../CodeGen/Hexagon/swp-memrefs-epilog.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-multi-loops.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-new-phi.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-order-copies.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-order-deps7.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-order.ll | 2 +- .../test/CodeGen/Hexagon/swp-phi-ch-offset.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-phi-chains.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-phi-dep.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-phi-ref.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-prolog-phi.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-prolog-phi4.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-rename.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-resmii-1.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-resmii.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-reuse-phi-6.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-sigma.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-stages4.ll | 6 +- llvm/test/CodeGen/Hexagon/swp-subreg.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-swap.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-tfri.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-vect-dotprod.ll | 6 +- llvm/test/CodeGen/Hexagon/swp-vmult.ll | 2 +- llvm/test/CodeGen/Hexagon/swp-vsum.ll | 4 +- llvm/test/CodeGen/Hexagon/tstbit.ll | 85 +- llvm/test/CodeGen/Hexagon/usr-ovf-dep.ll | 2 +- .../CodeGen/Hexagon/v6-unaligned-spill.ll | 2 +- llvm/test/CodeGen/Hexagon/v60Intrins.ll | 2 +- llvm/test/CodeGen/Hexagon/v60small.ll | 2 +- llvm/test/CodeGen/Hexagon/v6vec-vprint.ll | 2 +- llvm/test/CodeGen/Hexagon/vec-pred-spill1.ll | 4 +- .../CodeGen/Hexagon/vect/vect-bad-bitcast.ll | 4 +- .../test/CodeGen/Hexagon/vect/vect-extract.ll | 2 +- .../test/CodeGen/Hexagon/vect/vect-shuffle.ll | 2 +- llvm/test/CodeGen/Hexagon/vect/vect-v4i16.ll | 2 +- .../test/CodeGen/Hexagon/vect/vect-vshifts.ll | 2 +- llvm/test/CodeGen/Hexagon/vect/vect-xor.ll | 2 +- .../MIR/AMDGPU/machine-function-info.ll | 33 +- .../CodeGen/MIR/Mips/setRegClassOrRegBank.ll | 27 + llvm/test/CodeGen/MSP430/cc_args.ll | 2 +- llvm/test/CodeGen/MSP430/cc_ret.ll | 2 +- llvm/test/CodeGen/MSP430/interrupt.ll | 9 + llvm/test/CodeGen/MSP430/selectcc.ll | 46 + .../MSP430/shift-amount-threshold-b.ll | 50 + .../CodeGen/MSP430/shift-amount-threshold.ll | 210 + .../MSP430/transient-stack-alignment.ll | 2 +- llvm/test/CodeGen/Mips/Fast-ISel/br1.ll | 2 +- .../test/CodeGen/Mips/Fast-ISel/loadstore2.ll | 2 +- .../CodeGen/Mips/Fast-ISel/loadstrconst.ll | 2 +- llvm/test/CodeGen/Mips/Fast-ISel/logopm.ll | 4 +- .../CodeGen/Mips/Fast-ISel/simplestorefp1.ll | 2 +- .../CodeGen/Mips/Fast-ISel/simplestorei.ll | 2 +- .../irtranslator/aggregate_struct_return.ll | 132 + .../Mips/GlobalISel/irtranslator/call.ll | 36 +- .../GlobalISel/irtranslator/extend_args.ll | 12 +- .../GlobalISel/irtranslator/sret_pointer.ll | 39 + .../GlobalISel/irtranslator/stack_args.ll | 2 +- .../Mips/GlobalISel/irtranslator/var_arg.ll | 66 + .../CodeGen/Mips/GlobalISel/llvm-ir/add.ll | 79 +- .../Mips/GlobalISel/llvm-ir/add_vec.ll | 70 + .../GlobalISel/llvm-ir/add_vec_builtin.ll | 138 + .../llvm-ir/aggregate_struct_return.ll | 114 + .../Mips/GlobalISel/llvm-ir/bitreverse.ll | 184 + .../Mips/GlobalISel/llvm-ir/bitwise.ll | 198 + .../CodeGen/Mips/GlobalISel/llvm-ir/branch.ll | 3 +- .../Mips/GlobalISel/llvm-ir/brindirect.ll | 33 + .../CodeGen/Mips/GlobalISel/llvm-ir/bswap.ll | 68 + .../Mips/GlobalISel/llvm-ir/constants.ll | 14 +- .../Mips/GlobalISel/llvm-ir/dyn_stackalloc.ll | 64 + .../Mips/GlobalISel/llvm-ir/fabs_vec.ll | 34 + .../GlobalISel/llvm-ir/fabs_vec_builtin.ll | 35 + .../CodeGen/Mips/GlobalISel/llvm-ir/fcmp.ll | 4 +- .../CodeGen/Mips/GlobalISel/llvm-ir/fence.ll | 13 + ...loating_point_vec_arithmetic_operations.ll | 145 + ...point_vec_arithmetic_operations_builtin.ll | 146 + .../GlobalISel/llvm-ir/fptosi_and_fptoui.ll | 219 + .../Mips/GlobalISel/llvm-ir/fsqrt_vec.ll | 34 + .../GlobalISel/llvm-ir/fsqrt_vec_builtin.ll | 35 + .../CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll | 35 +- .../Mips/GlobalISel/llvm-ir/implicit_def.ll | 83 + .../llvm-ir/inttoptr_and_ptrtoint.ll | 24 + .../GlobalISel/llvm-ir/jump_table_and_brjt.ll | 287 + .../CodeGen/Mips/GlobalISel/llvm-ir/load.ll | 4 +- .../GlobalISel/llvm-ir/load_store_fold.ll | 105 + .../Mips/GlobalISel/llvm-ir/load_store_vec.ll | 80 + .../llvm-ir/long_ambiguous_chain_s32.ll | 210 +- .../llvm-ir/long_ambiguous_chain_s64.ll | 266 +- .../CodeGen/Mips/GlobalISel/llvm-ir/mul.ll | 50 +- .../Mips/GlobalISel/llvm-ir/mul_vec.ll | 70 + .../GlobalISel/llvm-ir/mul_vec_builtin.ll | 74 + .../CodeGen/Mips/GlobalISel/llvm-ir/phi.ll | 27 +- .../Mips/GlobalISel/llvm-ir/rem_and_div.ll | 36 +- .../GlobalISel/llvm-ir/rem_and_div_vec.ll | 274 + .../llvm-ir/rem_and_div_vec_builtin.ll | 290 + .../CodeGen/Mips/GlobalISel/llvm-ir/select.ll | 35 +- .../GlobalISel/llvm-ir/sitofp_and_uitofp.ll | 201 + .../Mips/GlobalISel/llvm-ir/sret_pointer.ll | 38 + .../Mips/GlobalISel/llvm-ir/stack_args.ll | 4 +- .../CodeGen/Mips/GlobalISel/llvm-ir/store.ll | 4 +- .../CodeGen/Mips/GlobalISel/llvm-ir/sub.ll | 26 +- .../Mips/GlobalISel/llvm-ir/sub_vec.ll | 70 + .../GlobalISel/llvm-ir/sub_vec_builtin.ll | 138 + .../GlobalISel/llvm-ir/test_TypeInfoforMF.ll | 12 +- .../CodeGen/Mips/GlobalISel/llvm-ir/trap.ll | 14 + .../CodeGen/Mips/GlobalISel/llvm-ir/trunc.ll | 13 + .../llvm-ir/truncStore_and_aExtLoad.ll | 3 +- .../Mips/GlobalISel/llvm-ir/var_arg.ll | 67 + .../llvm-ir/zextLoad_and_sextLoad.ll | 28 + .../Mips/GlobalISel/llvm-ir/zext_and_sext.ll | 27 + llvm/test/CodeGen/Mips/address-selection.ll | 6 +- llvm/test/CodeGen/Mips/atomic-min-max-64.ll | 158 + llvm/test/CodeGen/Mips/atomic-min-max.ll | 4674 +++++++ llvm/test/CodeGen/Mips/atomic.ll | 2 +- llvm/test/CodeGen/Mips/beqzc.ll | 2 +- llvm/test/CodeGen/Mips/beqzc1.ll | 2 +- llvm/test/CodeGen/Mips/biggot.ll | 14 +- llvm/test/CodeGen/Mips/blockaddr.ll | 39 +- .../Mips/branch-relaxation-with-hazard.ll | 2 + llvm/test/CodeGen/Mips/brsize3.ll | 2 +- llvm/test/CodeGen/Mips/brsize3a.ll | 2 +- llvm/test/CodeGen/Mips/cconv/vector.ll | 1068 +- llvm/test/CodeGen/Mips/ci2.ll | 2 +- llvm/test/CodeGen/Mips/cmplarge.ll | 4 +- .../Mips/compactbranches/no-beqzc-bnezc.ll | 4 +- llvm/test/CodeGen/Mips/const1.ll | 2 +- llvm/test/CodeGen/Mips/const4a.ll | 4 +- llvm/test/CodeGen/Mips/const6.ll | 4 +- llvm/test/CodeGen/Mips/const6a.ll | 2 +- llvm/test/CodeGen/Mips/constraint-empty.ll | 14 + llvm/test/CodeGen/Mips/copy-fp64.ll | 29 + llvm/test/CodeGen/Mips/cpus-no-mips64.ll | 16 + llvm/test/CodeGen/Mips/cpus.ll | 69 + llvm/test/CodeGen/Mips/ctlz.ll | 2 +- .../CodeGen/Mips/delay-slot-fill-forward.ll | 2 +- .../CodeGen/Mips/dynamic-stack-realignment.ll | 20 +- llvm/test/CodeGen/Mips/f16abs.ll | 4 +- llvm/test/CodeGen/Mips/fp16instrinsmc.ll | 4 +- llvm/test/CodeGen/Mips/fpneeded.ll | 2 +- llvm/test/CodeGen/Mips/fpnotneeded.ll | 2 +- llvm/test/CodeGen/Mips/frame-address.ll | 25 +- llvm/test/CodeGen/Mips/global-address.ll | 19 +- llvm/test/CodeGen/Mips/hf16call32.ll | 4 +- llvm/test/CodeGen/Mips/hf16call32_body.ll | 2 +- llvm/test/CodeGen/Mips/hfptrcall.ll | 4 +- .../Mips/indirect-jump-hazard/long-calls.ll | 25 +- .../Mips/inlineasm-constraint-reg64.ll | 19 +- llvm/test/CodeGen/Mips/l3mc.ll | 2 +- llvm/test/CodeGen/Mips/lcb2.ll | 2 +- llvm/test/CodeGen/Mips/lcb3c.ll | 2 +- llvm/test/CodeGen/Mips/lcb4a.ll | 2 +- llvm/test/CodeGen/Mips/lcb5.ll | 2 +- llvm/test/CodeGen/Mips/llvm-ir/lshr.ll | 258 +- llvm/test/CodeGen/Mips/llvm-ir/shl.ll | 246 +- llvm/test/CodeGen/Mips/long-calls.ll | 14 +- llvm/test/CodeGen/Mips/mbrsize4a.ll | 4 +- llvm/test/CodeGen/Mips/mcount.ll | 123 + .../Mips/micromips-ase-function-attribute.ll | 2 +- llvm/test/CodeGen/Mips/micromips-attr.ll | 4 +- llvm/test/CodeGen/Mips/mips16-hf-attr-2.ll | 12 +- llvm/test/CodeGen/Mips/mips16-hf-attr.ll | 12 +- llvm/test/CodeGen/Mips/mips16_32_1.ll | 2 +- llvm/test/CodeGen/Mips/mips16_32_10.ll | 6 +- llvm/test/CodeGen/Mips/mips16_32_3.ll | 6 +- llvm/test/CodeGen/Mips/mips16_32_4.ll | 6 +- llvm/test/CodeGen/Mips/mips16_32_5.ll | 6 +- llvm/test/CodeGen/Mips/mips16_32_6.ll | 6 +- llvm/test/CodeGen/Mips/mips16_32_7.ll | 6 +- llvm/test/CodeGen/Mips/mips16_32_8.ll | 8 +- llvm/test/CodeGen/Mips/mips16_32_9.ll | 6 +- .../test/CodeGen/Mips/msa/2r_vector_scalar.ll | 5 +- llvm/test/CodeGen/Mips/msa/arithmetic.ll | 64 + llvm/test/CodeGen/Mips/msa/nori.b.ll | 26 + llvm/test/CodeGen/Mips/msa/remat-ldi.ll | 107 + .../CodeGen/Mips/no-frame-pointer-elim.ll | 37 + llvm/test/CodeGen/Mips/nomips16.ll | 4 +- .../CodeGen/Mips/pbqp-reserved-physreg.ll | 2 +- llvm/test/CodeGen/Mips/powif64_16.ll | 2 +- llvm/test/CodeGen/Mips/pr42736.ll | 28 + llvm/test/CodeGen/Mips/reloc-jalr.ll | 143 +- llvm/test/CodeGen/Mips/s2rem.ll | 4 +- llvm/test/CodeGen/Mips/sel1c.ll | 2 +- llvm/test/CodeGen/Mips/sel2c.ll | 2 +- llvm/test/CodeGen/Mips/simplebr.ll | 4 +- llvm/test/CodeGen/Mips/sr1.ll | 4 +- llvm/test/CodeGen/Mips/stack-alignment.ll | 2 +- llvm/test/CodeGen/Mips/start-asm-file.ll | 83 +- llvm/test/CodeGen/Mips/tnaked.ll | 6 +- llvm/test/CodeGen/Mips/v2i16tof32.ll | 9 +- llvm/test/CodeGen/NVPTX/bug21465.ll | 2 +- llvm/test/CodeGen/NVPTX/bug22322.ll | 2 +- llvm/test/CodeGen/NVPTX/bug41651.ll | 13 + llvm/test/CodeGen/NVPTX/pr17529.ll | 2 +- llvm/test/CodeGen/NVPTX/shfl-p.ll | 172 + llvm/test/CodeGen/NVPTX/shfl-sync-p.ll | 180 + .../CodeGen/PowerPC/2007-09-08-unaligned.ll | 3 +- .../CodeGen/PowerPC/2008-10-28-f128-i32.ll | 53 +- .../test/CodeGen/PowerPC/2010-02-12-saveCR.ll | 2 +- llvm/test/CodeGen/PowerPC/CSR-fit.ll | 46 +- .../PowerPC/CompareEliminationSpillIssue.ll | 2 +- .../PowerPC/MCSE-caller-preserved-reg.ll | 19 +- .../CodeGen/PowerPC/PR35812-neg-cmpxchg.ll | 6 +- .../test/CodeGen/PowerPC/VSX-XForm-Scalars.ll | 6 +- .../PowerPC/aantidep-inline-asm-use.ll | 2 +- .../PowerPC/absol-jump-table-enabled.ll | 100 + llvm/test/CodeGen/PowerPC/add_cmp.ll | 44 +- llvm/test/CodeGen/PowerPC/addi-licm.ll | 2 +- llvm/test/CodeGen/PowerPC/addi-offset-fold.ll | 2 +- llvm/test/CodeGen/PowerPC/addrfuncstr.ll | 4 +- llvm/test/CodeGen/PowerPC/aix-byval-param.ll | 16 + llvm/test/CodeGen/PowerPC/aix-cc-abi.ll | 880 ++ llvm/test/CodeGen/PowerPC/aix-cc-altivec.ll | 23 + .../aix-external-sym-sdnode-lowering.ll | 128 + llvm/test/CodeGen/PowerPC/aix-func-dsc-gen.ll | 112 + .../test/CodeGen/PowerPC/aix-indirect-call.ll | 144 + .../PowerPC/aix-lower-block-address.ll | 72 + .../PowerPC/aix-lower-constant-pool-index.ll | 87 + .../CodeGen/PowerPC/aix-lower-jump-table.ll | 188 + llvm/test/CodeGen/PowerPC/aix-lr.ll | 32 + llvm/test/CodeGen/PowerPC/aix-nest-param.ll | 13 + .../PowerPC/aix-readonly-with-relocation.ll | 19 + .../PowerPC/aix-reference-func-addr-const.ll | 32 + llvm/test/CodeGen/PowerPC/aix-return55.ll | 11 + llvm/test/CodeGen/PowerPC/aix-space.ll | 17 + llvm/test/CodeGen/PowerPC/aix-sret-param.ll | 98 + llvm/test/CodeGen/PowerPC/aix-stackargs.ll | 12 + llvm/test/CodeGen/PowerPC/aix-trampoline.ll | 14 + .../CodeGen/PowerPC/aix-undef-func-call.ll | 29 + .../PowerPC/aix-user-defined-memcpy.ll | 58 + .../PowerPC/aix-weak-undef-func-call.ll | 30 + .../PowerPC/aix-xcoff-data-only-notoc.ll | 19 + llvm/test/CodeGen/PowerPC/aix-xcoff-data.ll | 595 + llvm/test/CodeGen/PowerPC/aix-xcoff-lcomm.ll | 112 + .../PowerPC/aix-xcoff-mergeable-const.ll | 166 + .../PowerPC/aix-xcoff-mergeable-str.ll | 62 + llvm/test/CodeGen/PowerPC/aix-xcoff-rodata.ll | 322 + .../PowerPC/aix-xcoff-textdisassembly.ll | 22 + llvm/test/CodeGen/PowerPC/aix-xcoff-toc.ll | 338 + llvm/test/CodeGen/PowerPC/and-mask.ll | 88 + llvm/test/CodeGen/PowerPC/asm-constraints.ll | 2 +- .../CodeGen/PowerPC/asym-regclass-copy.ll | 2 +- .../CodeGen/PowerPC/available-externally.ll | 3 +- .../CodeGen/PowerPC/bitcasts-direct-move.ll | 4 +- llvm/test/CodeGen/PowerPC/branch-opt.ll | 10 +- llvm/test/CodeGen/PowerPC/brcond.ll | 6 +- .../CodeGen/PowerPC/build-vector-allones.ll | 109 + .../CodeGen/PowerPC/build-vector-tests.ll | 765 +- .../CodeGen/PowerPC/builtins-ppc-p9-f128.ll | 2 +- llvm/test/CodeGen/PowerPC/check-cpu.ll | 11 + llvm/test/CodeGen/PowerPC/combine-fneg.ll | 11 +- .../test/CodeGen/PowerPC/constant-combines.ll | 8 +- llvm/test/CodeGen/PowerPC/cr-spills.ll | 2 +- llvm/test/CodeGen/PowerPC/crypto_bifs.ll | 2 +- llvm/test/CodeGen/PowerPC/csr-split.ll | 263 + llvm/test/CodeGen/PowerPC/ctr-cleanup.ll | 2 +- llvm/test/CodeGen/PowerPC/ctr-minmaxnum.ll | 40 +- llvm/test/CodeGen/PowerPC/ctrloop-intrin.ll | 2 +- llvm/test/CodeGen/PowerPC/ctrloop-le.ll | 3 - llvm/test/CodeGen/PowerPC/ctrloop-lt.ll | 5 - .../CodeGen/PowerPC/ctrloop-shortLoops.ll | 14 +- llvm/test/CodeGen/PowerPC/cxx_tlscc64.ll | 2 +- llvm/test/CodeGen/PowerPC/dform-adjust.ll | 125 + .../CodeGen/PowerPC/direct-move-profit.ll | 2 +- llvm/test/CodeGen/PowerPC/div-e-32.ll | 2 +- llvm/test/CodeGen/PowerPC/div-e-all.ll | 2 +- llvm/test/CodeGen/PowerPC/ec-input.ll | 4 +- llvm/test/CodeGen/PowerPC/eh-dwarf-cfa.ll | 2 +- llvm/test/CodeGen/PowerPC/elf-common.ll | 45 + .../CodeGen/PowerPC/expand-contiguous-isel.ll | 1 + llvm/test/CodeGen/PowerPC/expand-isel.ll | 4 +- .../CodeGen/PowerPC/extra-toc-reg-deps.ll | 4 +- .../test/CodeGen/PowerPC/extract-and-store.ll | 36 +- llvm/test/CodeGen/PowerPC/f128-aggregates.ll | 6 +- llvm/test/CodeGen/PowerPC/f128-fma.ll | 2 +- llvm/test/CodeGen/PowerPC/fast-isel-call.ll | 6 +- llvm/test/CodeGen/PowerPC/fast-isel-const.ll | 2 +- .../CodeGen/PowerPC/fast-isel-load-store.ll | 9 +- llvm/test/CodeGen/PowerPC/fast-isel-ret.ll | 6 +- llvm/test/CodeGen/PowerPC/fdiv.ll | 15 + .../CodeGen/PowerPC/float-load-store-pair.ll | 34 +- .../CodeGen/PowerPC/float-vector-gather.ll | 48 + llvm/test/CodeGen/PowerPC/fma-assoc.ll | 1 - llvm/test/CodeGen/PowerPC/fma-combine.ll | 139 + llvm/test/CodeGen/PowerPC/fma-ext.ll | 1 - .../PowerPC/fma-mutate-duplicate-vreg.ll | 2 +- llvm/test/CodeGen/PowerPC/fma-mutate.ll | 19 +- llvm/test/CodeGen/PowerPC/fmf-propagation.ll | 2 +- llvm/test/CodeGen/PowerPC/fneg.ll | 43 +- llvm/test/CodeGen/PowerPC/fold-rlwinm-1.ll | 44 + .../fp-int-conversions-direct-moves.ll | 307 +- .../CodeGen/PowerPC/fp-int128-fp-combine.ll | 6 +- .../PowerPC/fp-intrinsics-fptosi-legal.ll | 20 + llvm/test/CodeGen/PowerPC/fp64-to-int16.ll | 2 +- llvm/test/CodeGen/PowerPC/frameaddr.ll | 4 +- llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll | 24 +- .../CodeGen/PowerPC/glob-comp-aa-crash.ll | 6 +- llvm/test/CodeGen/PowerPC/gpr-vsr-spill.ll | 2 +- .../PowerPC/ifcvt-forked-bug-2016-08-08.ll | 2 +- llvm/test/CodeGen/PowerPC/ifunc.ll | 25 + llvm/test/CodeGen/PowerPC/inc-of-add.ll | 14 +- .../PowerPC/inline-asm-vsx-clobbers.ll | 32 + .../CodeGen/PowerPC/inlineasm-extendedmne.ll | 72 + llvm/test/CodeGen/PowerPC/instr-properties.ll | 9 + llvm/test/CodeGen/PowerPC/isel-rc-nox0.ll | 2 +- .../PowerPC/jump-tables-collapse-rotate.ll | 4 +- llvm/test/CodeGen/PowerPC/knowCRBitSpill.ll | 7 +- llvm/test/CodeGen/PowerPC/licm-tocReg.ll | 10 +- llvm/test/CodeGen/PowerPC/load-and-splat.ll | 264 + .../PowerPC/load-shuffle-and-shuffle-store.ll | 792 ++ .../PowerPC/loop-instr-form-prepare.ll | 827 ++ .../PowerPC/lower-globaladdr32-aix-asm.ll | 45 + .../CodeGen/PowerPC/lower-globaladdr32-aix.ll | 38 + .../PowerPC/lower-globaladdr64-aix-asm.ll | 45 + .../CodeGen/PowerPC/lower-globaladdr64-aix.ll | 38 + llvm/test/CodeGen/PowerPC/lower-massv-attr.ll | 29 + llvm/test/CodeGen/PowerPC/lower-massv.ll | 603 + llvm/test/CodeGen/PowerPC/lsr-insns-cost.ll | 66 + llvm/test/CodeGen/PowerPC/machine-combiner.ll | 6 +- llvm/test/CodeGen/PowerPC/machine-pre.ll | 10 +- .../memCmpUsedInZeroEqualityComparison.ll | 4 +- llvm/test/CodeGen/PowerPC/memcmpIR.ll | 8 +- llvm/test/CodeGen/PowerPC/memcpy-vec.ll | 8 +- .../test/CodeGen/PowerPC/mi-peephole-splat.ll | 136 + llvm/test/CodeGen/PowerPC/negctr.ll | 2 +- llvm/test/CodeGen/PowerPC/no-dup-spill-fp.ll | 2 +- llvm/test/CodeGen/PowerPC/no-duplicate.ll | 91 + .../CodeGen/PowerPC/not-fixed-frame-object.ll | 2 +- .../CodeGen/PowerPC/opt-cmp-inst-cr0-live.ll | 20 +- llvm/test/CodeGen/PowerPC/popcnt-zext.ll | 339 + llvm/test/CodeGen/PowerPC/popcnt.ll | 8 +- .../PowerPC/power9-moves-and-splats.ll | 36 +- llvm/test/CodeGen/PowerPC/ppc-empty-fs.ll | 2 +- llvm/test/CodeGen/PowerPC/ppc-passname.ll | 20 +- llvm/test/CodeGen/PowerPC/ppc32-lshrti3.ll | 2 +- llvm/test/CodeGen/PowerPC/ppc64-P9-mod.ll | 16 +- .../PowerPC/ppc64-align-long-double.ll | 78 +- llvm/test/CodeGen/PowerPC/ppc64-elf-abi.ll | 2 - .../PowerPC/ppc64-pre-inc-no-extra-phi.ll | 2 +- llvm/test/CodeGen/PowerPC/ppc64-smallarg.ll | 2 +- llvm/test/CodeGen/PowerPC/ppc64le-smallarg.ll | 2 +- .../ppcf128-constrained-fp-intrinsics.ll | 1563 +++ llvm/test/CodeGen/PowerPC/pr17168.ll | 2 +- llvm/test/CodeGen/PowerPC/pr17354.ll | 2 +- llvm/test/CodeGen/PowerPC/pr18663-2.ll | 6 +- llvm/test/CodeGen/PowerPC/pr24546.ll | 4 +- llvm/test/CodeGen/PowerPC/pr25080.ll | 123 + llvm/test/CodeGen/PowerPC/pr26180.ll | 4 +- llvm/test/CodeGen/PowerPC/pr27350.ll | 2 +- llvm/test/CodeGen/PowerPC/pr28130.ll | 2 +- llvm/test/CodeGen/PowerPC/pr31144.ll | 4 +- llvm/test/CodeGen/PowerPC/pr32140.ll | 8 +- llvm/test/CodeGen/PowerPC/pr36068.ll | 2 +- llvm/test/CodeGen/PowerPC/pr36292.ll | 14 +- llvm/test/CodeGen/PowerPC/pr38087.ll | 14 +- llvm/test/CodeGen/PowerPC/pr39815.ll | 11 +- llvm/test/CodeGen/PowerPC/pr41088.ll | 141 + llvm/test/CodeGen/PowerPC/pr42492.ll | 28 +- llvm/test/CodeGen/PowerPC/pr43527.ll | 75 + llvm/test/CodeGen/PowerPC/pr44183.ll | 56 + llvm/test/CodeGen/PowerPC/pr44239.ll | 40 + llvm/test/CodeGen/PowerPC/pre-inc-disable.ll | 223 +- llvm/test/CodeGen/PowerPC/pwr7-gt-nop.ll | 12 +- llvm/test/CodeGen/PowerPC/qpx-load-splat.ll | 7 +- llvm/test/CodeGen/PowerPC/qpx-recipest.ll | 483 +- llvm/test/CodeGen/PowerPC/recipest.ll | 295 +- .../CodeGen/PowerPC/reduce_scalarization02.ll | 87 + .../PowerPC/redundant-copy-after-tail-dup.ll | 3 +- llvm/test/CodeGen/PowerPC/reloc-align.ll | 2 +- .../PowerPC/remove-redundant-load-imm.ll | 51 + .../CodeGen/PowerPC/repeated-fp-divisors.ll | 4 +- llvm/test/CodeGen/PowerPC/rounding-ops.ll | 8 +- llvm/test/CodeGen/PowerPC/sat-add.ll | 68 +- llvm/test/CodeGen/PowerPC/save-bp.ll | 2 +- .../test/CodeGen/PowerPC/save-cr-ppc32svr4.ll | 2 +- .../CodeGen/PowerPC/save-crbp-ppc32svr4.ll | 2 +- llvm/test/CodeGen/PowerPC/scalar-min-max.ll | 203 + .../CodeGen/PowerPC/scalar-rounding-ops.ll | 561 + .../PowerPC/scheduling-mem-dependency.ll | 47 +- .../CodeGen/PowerPC/select-addrRegRegOnly.ll | 2 +- llvm/test/CodeGen/PowerPC/setcc-logic.ll | 8 +- llvm/test/CodeGen/PowerPC/setrnd.ll | 2 +- .../test/CodeGen/PowerPC/sext-vector-inreg.ll | 22 + llvm/test/CodeGen/PowerPC/shift-cmp.ll | 16 +- llvm/test/CodeGen/PowerPC/shrink-wrap.ll | 54 + llvm/test/CodeGen/PowerPC/signbit-shift.ll | 4 +- llvm/test/CodeGen/PowerPC/sjlj.ll | 4 +- llvm/test/CodeGen/PowerPC/sms-cpy-1.ll | 113 + llvm/test/CodeGen/PowerPC/sms-grp-order.ll | 37 +- llvm/test/CodeGen/PowerPC/sms-phi-1.ll | 64 + llvm/test/CodeGen/PowerPC/sms-phi-2.ll | 69 + llvm/test/CodeGen/PowerPC/sms-phi-3.ll | 85 + llvm/test/CodeGen/PowerPC/sms-phi-5.ll | 56 + llvm/test/CodeGen/PowerPC/sms-simple.ll | 8 +- llvm/test/CodeGen/PowerPC/smulfixsat.ll | 52 + llvm/test/CodeGen/PowerPC/spe.ll | 1267 +- llvm/test/CodeGen/PowerPC/spill_p9_setb.ll | 56 + llvm/test/CodeGen/PowerPC/srem-lkk.ll | 149 + llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll | 1675 +++ .../CodeGen/PowerPC/stack-guard-reassign.ll | 11 +- llvm/test/CodeGen/PowerPC/store-combine.ll | 124 +- llvm/test/CodeGen/PowerPC/store-constant.ll | 203 +- llvm/test/CodeGen/PowerPC/store_fptoi.ll | 32 +- llvm/test/CodeGen/PowerPC/stubs.ll | 2 +- llvm/test/CodeGen/PowerPC/stwu-sched.ll | 2 +- llvm/test/CodeGen/PowerPC/sub-of-not.ll | 14 +- llvm/test/CodeGen/PowerPC/swaps-le-1.ll | 40 +- llvm/test/CodeGen/PowerPC/swaps-le-7.ll | 4 +- .../CodeGen/PowerPC/testComparesi32gtu.ll | 72 +- .../CodeGen/PowerPC/testComparesi32ltu.ll | 72 +- .../test/CodeGen/PowerPC/testComparesieqsc.ll | 2 +- .../test/CodeGen/PowerPC/testComparesieqsi.ll | 2 +- .../CodeGen/PowerPC/testComparesieqsll.ll | 2 +- .../test/CodeGen/PowerPC/testComparesieqss.ll | 2 +- .../test/CodeGen/PowerPC/testComparesiequc.ll | 2 +- .../test/CodeGen/PowerPC/testComparesiequi.ll | 2 +- .../CodeGen/PowerPC/testComparesiequll.ll | 2 +- .../test/CodeGen/PowerPC/testComparesiequs.ll | 2 +- .../test/CodeGen/PowerPC/testComparesigesc.ll | 2 +- .../test/CodeGen/PowerPC/testComparesigesi.ll | 2 +- .../CodeGen/PowerPC/testComparesigesll.ll | 2 +- .../test/CodeGen/PowerPC/testComparesigess.ll | 2 +- .../test/CodeGen/PowerPC/testComparesigeuc.ll | 125 +- .../test/CodeGen/PowerPC/testComparesigeui.ll | 129 +- .../CodeGen/PowerPC/testComparesigeull.ll | 128 +- .../test/CodeGen/PowerPC/testComparesigeus.ll | 127 +- .../test/CodeGen/PowerPC/testComparesigtsc.ll | 43 +- .../test/CodeGen/PowerPC/testComparesigtsi.ll | 43 +- .../CodeGen/PowerPC/testComparesigtsll.ll | 73 +- .../test/CodeGen/PowerPC/testComparesigtss.ll | 40 +- .../test/CodeGen/PowerPC/testComparesigtuc.ll | 61 +- .../test/CodeGen/PowerPC/testComparesigtui.ll | 61 +- .../test/CodeGen/PowerPC/testComparesigtus.ll | 63 +- .../test/CodeGen/PowerPC/testComparesilesc.ll | 2 +- .../test/CodeGen/PowerPC/testComparesilesi.ll | 2 +- .../CodeGen/PowerPC/testComparesilesll.ll | 2 +- .../test/CodeGen/PowerPC/testComparesiless.ll | 2 +- .../test/CodeGen/PowerPC/testComparesileuc.ll | 142 +- .../test/CodeGen/PowerPC/testComparesileui.ll | 142 +- .../CodeGen/PowerPC/testComparesileull.ll | 137 +- .../test/CodeGen/PowerPC/testComparesileus.ll | 142 +- .../test/CodeGen/PowerPC/testComparesiltsc.ll | 79 +- .../test/CodeGen/PowerPC/testComparesiltsi.ll | 75 +- .../CodeGen/PowerPC/testComparesiltsll.ll | 108 +- .../test/CodeGen/PowerPC/testComparesiltss.ll | 79 +- .../test/CodeGen/PowerPC/testComparesiltuc.ll | 57 +- .../test/CodeGen/PowerPC/testComparesiltui.ll | 57 +- .../test/CodeGen/PowerPC/testComparesiltus.ll | 57 +- .../test/CodeGen/PowerPC/testComparesinesc.ll | 2 +- .../test/CodeGen/PowerPC/testComparesinesi.ll | 2 +- .../CodeGen/PowerPC/testComparesinesll.ll | 2 +- .../test/CodeGen/PowerPC/testComparesiness.ll | 2 +- .../test/CodeGen/PowerPC/testComparesineuc.ll | 2 +- .../test/CodeGen/PowerPC/testComparesineui.ll | 2 +- .../CodeGen/PowerPC/testComparesineull.ll | 2 +- .../test/CodeGen/PowerPC/testComparesineus.ll | 2 +- .../CodeGen/PowerPC/testCompareslleqsc.ll | 2 +- .../CodeGen/PowerPC/testCompareslleqsi.ll | 2 +- .../CodeGen/PowerPC/testCompareslleqsll.ll | 2 +- .../CodeGen/PowerPC/testCompareslleqss.ll | 2 +- .../CodeGen/PowerPC/testComparesllequc.ll | 2 +- .../CodeGen/PowerPC/testComparesllequi.ll | 2 +- .../CodeGen/PowerPC/testComparesllequll.ll | 2 +- .../CodeGen/PowerPC/testComparesllequs.ll | 2 +- .../CodeGen/PowerPC/testComparesllgesc.ll | 2 +- .../CodeGen/PowerPC/testComparesllgesi.ll | 2 +- .../CodeGen/PowerPC/testComparesllgesll.ll | 2 +- .../CodeGen/PowerPC/testComparesllgess.ll | 2 +- .../CodeGen/PowerPC/testComparesllgeuc.ll | 127 +- .../CodeGen/PowerPC/testComparesllgeui.ll | 127 +- .../CodeGen/PowerPC/testComparesllgeull.ll | 128 +- .../CodeGen/PowerPC/testComparesllgeus.ll | 127 +- .../CodeGen/PowerPC/testComparesllgtsll.ll | 73 +- .../CodeGen/PowerPC/testComparesllgtuc.ll | 71 +- .../CodeGen/PowerPC/testComparesllgtui.ll | 63 +- .../CodeGen/PowerPC/testComparesllgtus.ll | 27 +- .../CodeGen/PowerPC/testCompareslllesc.ll | 2 +- .../CodeGen/PowerPC/testCompareslllesi.ll | 2 +- .../CodeGen/PowerPC/testCompareslllesll.ll | 2 +- .../CodeGen/PowerPC/testComparesllless.ll | 2 +- .../CodeGen/PowerPC/testComparesllleuc.ll | 142 +- .../CodeGen/PowerPC/testComparesllleui.ll | 142 +- .../CodeGen/PowerPC/testComparesllleull.ll | 137 +- .../CodeGen/PowerPC/testComparesllleus.ll | 142 +- .../CodeGen/PowerPC/testComparesllltsll.ll | 108 +- .../CodeGen/PowerPC/testComparesllltuc.ll | 57 +- .../CodeGen/PowerPC/testComparesllltui.ll | 42 +- .../CodeGen/PowerPC/testComparesllltus.ll | 56 +- .../CodeGen/PowerPC/testComparesllnesll.ll | 2 +- .../CodeGen/PowerPC/testComparesllneull.ll | 2 +- llvm/test/CodeGen/PowerPC/test_call_aix.ll | 11 + llvm/test/CodeGen/PowerPC/test_func_desc.ll | 74 + llvm/test/CodeGen/PowerPC/tls.ll | 8 +- .../CodeGen/PowerPC/toc-load-sched-bug.ll | 12 +- .../test/CodeGen/PowerPC/tocSaveInPrologue.ll | 17 +- llvm/test/CodeGen/PowerPC/trunc-srl-load.ll | 22 +- llvm/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll | 32 +- .../CodeGen/PowerPC/uint-to-ppcfp128-crash.ll | 2 +- llvm/test/CodeGen/PowerPC/umulfixsat.ll | 36 + .../umulo-128-legalisation-lowering.ll | 303 +- llvm/test/CodeGen/PowerPC/unal-vec-ldst.ll | 573 +- llvm/test/CodeGen/PowerPC/unal4-std.ll | 2 +- llvm/test/CodeGen/PowerPC/unaligned-floats.ll | 43 + llvm/test/CodeGen/PowerPC/urem-lkk.ll | 106 + llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll | 1338 ++ .../PowerPC/use-cr-result-of-dom-icmp-st.ll | 480 +- llvm/test/CodeGen/PowerPC/uwtables.ll | 2 +- llvm/test/CodeGen/PowerPC/vavg.ll | 240 + llvm/test/CodeGen/PowerPC/vec-bswap.ll | 115 + llvm/test/CodeGen/PowerPC/vec-min-max.ll | 49 + .../CodeGen/PowerPC/vec_add_sub_doubleword.ll | 2 +- .../PowerPC/vec_buildvector_loadstore.ll | 18 +- .../PowerPC/vec_conv_fp32_to_i64_elts.ll | 32 +- .../PowerPC/vec_conv_fp64_to_i32_elts.ll | 16 +- .../PowerPC/vec_conv_i16_to_fp32_elts.ll | 24 +- .../PowerPC/vec_conv_i16_to_fp64_elts.ll | 8 +- .../PowerPC/vec_conv_i64_to_fp32_elts.ll | 24 +- .../PowerPC/vec_conv_i8_to_fp32_elts.ll | 24 +- llvm/test/CodeGen/PowerPC/vec_revb.ll | 17 + llvm/test/CodeGen/PowerPC/vec_shuffle.ll | 126 +- llvm/test/CodeGen/PowerPC/vec_shuffle_le.ll | 172 +- .../CodeGen/PowerPC/vec_shuffle_p8vector.ll | 50 +- .../PowerPC/vec_shuffle_p8vector_le.ll | 21 +- .../vector-constrained-fp-intrinsics.ll | 1834 ++- .../CodeGen/PowerPC/vector-extend-sign.ll | 178 + llvm/test/CodeGen/PowerPC/vector-rotates.ll | 136 + .../test/CodeGen/PowerPC/vselect-constants.ll | 4 +- .../CodeGen/PowerPC/vsx-fma-mutate-undef.ll | 4 +- .../CodeGen/PowerPC/vsx_insert_extract_le.ll | 29 +- llvm/test/CodeGen/PowerPC/vsx_shuffle_le.ll | 6 +- llvm/test/CodeGen/PowerPC/zero-not-run.ll | 2 +- llvm/test/CodeGen/PowerPC/zext-and-cmp.ll | 2 +- .../RISCV/GlobalISel/calllowering-ret.ll | 17 + .../GlobalISel/irtranslator-calllowering.ll | 17 + llvm/test/CodeGen/RISCV/add-before-shl.ll | 34 +- .../test/CodeGen/RISCV/addc-adde-sube-subc.ll | 4 +- llvm/test/CodeGen/RISCV/addcarry.ll | 28 +- llvm/test/CodeGen/RISCV/alloca.ll | 4 +- llvm/test/CodeGen/RISCV/alu32.ll | 4 +- llvm/test/CodeGen/RISCV/alu64.ll | 16 +- .../test/CodeGen/RISCV/arith-with-overflow.ll | 32 +- .../test/CodeGen/RISCV/atomic-cmpxchg-flag.ll | 2 +- llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll | 1290 +- llvm/test/CodeGen/RISCV/atomic-rmw.ll | 11356 ++++++++-------- llvm/test/CodeGen/RISCV/bare-select.ll | 16 +- llvm/test/CodeGen/RISCV/blockaddress.ll | 10 +- .../CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll | 112 +- .../test/CodeGen/RISCV/callee-saved-fpr32s.ll | 20 +- .../test/CodeGen/RISCV/callee-saved-fpr64s.ll | 19 +- llvm/test/CodeGen/RISCV/callee-saved-gprs.ll | 173 +- .../RISCV/calling-conv-ilp32-ilp32f-common.ll | 44 +- ...calling-conv-ilp32-ilp32f-ilp32d-common.ll | 312 +- llvm/test/CodeGen/RISCV/calling-conv-ilp32.ll | 20 +- .../test/CodeGen/RISCV/calling-conv-ilp32d.ll | 144 +- .../calling-conv-ilp32f-ilp32d-common.ll | 98 +- .../calling-conv-lp64-lp64f-lp64d-common.ll | 168 +- llvm/test/CodeGen/RISCV/calling-conv-lp64.ll | 26 +- .../CodeGen/RISCV/calling-conv-rv32f-ilp32.ll | 28 +- llvm/test/CodeGen/RISCV/codemodel-lowering.ll | 18 +- .../test/CodeGen/RISCV/compress-inline-asm.ll | 2 +- llvm/test/CodeGen/RISCV/compress.ll | 6 +- llvm/test/CodeGen/RISCV/copysign-casts.ll | 110 + llvm/test/CodeGen/RISCV/disjoint.ll | 26 + llvm/test/CodeGen/RISCV/div.ll | 2 +- llvm/test/CodeGen/RISCV/double-arith.ll | 46 +- .../RISCV/double-bitmanip-dagcombines.ll | 2 +- llvm/test/CodeGen/RISCV/double-br-fcmp.ll | 16 +- .../test/CodeGen/RISCV/double-calling-conv.ll | 20 +- llvm/test/CodeGen/RISCV/double-convert.ll | 6 +- llvm/test/CodeGen/RISCV/double-fcmp.ll | 16 +- llvm/test/CodeGen/RISCV/double-imm.ll | 10 +- llvm/test/CodeGen/RISCV/double-intrinsics.ll | 55 +- llvm/test/CodeGen/RISCV/double-mem.ll | 73 +- .../CodeGen/RISCV/double-previous-failure.ll | 2 +- llvm/test/CodeGen/RISCV/double-select-fcmp.ll | 50 +- .../RISCV/double-stack-spill-restore.ll | 6 +- .../RISCV/exception-pointer-register.ll | 10 - llvm/test/CodeGen/RISCV/fastcc-float.ll | 71 + llvm/test/CodeGen/RISCV/fastcc-int.ll | 85 + llvm/test/CodeGen/RISCV/fixups-relax-diff.ll | 2 +- llvm/test/CodeGen/RISCV/float-arith.ll | 76 +- llvm/test/CodeGen/RISCV/float-br-fcmp.ll | 32 +- llvm/test/CodeGen/RISCV/float-convert.ll | 12 +- llvm/test/CodeGen/RISCV/float-fcmp.ll | 32 +- llvm/test/CodeGen/RISCV/float-imm.ll | 20 +- llvm/test/CodeGen/RISCV/float-intrinsics.ll | 34 +- llvm/test/CodeGen/RISCV/float-mem.ll | 84 +- llvm/test/CodeGen/RISCV/float-select-fcmp.ll | 44 +- llvm/test/CodeGen/RISCV/fp128.ll | 86 +- llvm/test/CodeGen/RISCV/fp16-promote.ll | 142 + llvm/test/CodeGen/RISCV/frame-info.ll | 215 +- llvm/test/CodeGen/RISCV/frame.ll | 2 +- .../CodeGen/RISCV/get-register-invalid.ll | 12 + .../CodeGen/RISCV/get-register-noreserve.ll | 38 + .../CodeGen/RISCV/get-register-reserve.ll | 34 + .../CodeGen/RISCV/get-setcc-result-type.ll | 24 +- .../CodeGen/RISCV/hoist-global-addr-base.ll | 4 +- llvm/test/CodeGen/RISCV/imm-cse.ll | 24 +- llvm/test/CodeGen/RISCV/imm.ll | 8 +- llvm/test/CodeGen/RISCV/indirectbr.ll | 4 +- .../CodeGen/RISCV/inline-asm-abi-names.ll | 1590 +++ .../test/CodeGen/RISCV/inline-asm-clobbers.ll | 61 + .../CodeGen/RISCV/inline-asm-d-abi-names.ll | 1509 ++ .../RISCV/inline-asm-d-constraint-f.ll | 41 +- .../CodeGen/RISCV/inline-asm-f-abi-names.ll | 1509 ++ .../RISCV/inline-asm-f-constraint-f.ll | 43 +- llvm/test/CodeGen/RISCV/inline-asm.ll | 75 +- .../CodeGen/RISCV/interrupt-attr-callee.ll | 70 + .../CodeGen/RISCV/interrupt-attr-nocall.ll | 716 +- llvm/test/CodeGen/RISCV/interrupt-attr.ll | 2 +- llvm/test/CodeGen/RISCV/intrinsics/trap.ll | 38 + llvm/test/CodeGen/RISCV/large-stack.ll | 133 +- llvm/test/CodeGen/RISCV/legalize-fneg.ll | 38 +- llvm/test/CodeGen/RISCV/lsr-legaladdimm.ll | 24 +- llvm/test/CodeGen/RISCV/mem.ll | 45 +- llvm/test/CodeGen/RISCV/mem64.ll | 58 +- llvm/test/CodeGen/RISCV/mir-target-flags.ll | 74 + llvm/test/CodeGen/RISCV/module-target-abi.ll | 24 + llvm/test/CodeGen/RISCV/module-target-abi2.ll | 27 + llvm/test/CodeGen/RISCV/mul.ll | 2 +- llvm/test/CodeGen/RISCV/option-norvc.ll | 2 +- llvm/test/CodeGen/RISCV/option-rvc.ll | 2 +- llvm/test/CodeGen/RISCV/remat.ll | 87 +- .../test/CodeGen/RISCV/reserved-reg-errors.ll | 36 + llvm/test/CodeGen/RISCV/reserved-regs.ll | 130 + .../CodeGen/RISCV/rv32i-rv64i-float-double.ll | 6 +- llvm/test/CodeGen/RISCV/rv64-large-stack.ll | 34 + .../test/CodeGen/RISCV/rv64f-float-convert.ll | 6 +- .../test/CodeGen/RISCV/rv64i-complex-float.ll | 53 + .../CodeGen/RISCV/rv64i-exhaustive-w-insts.ll | 36 +- .../CodeGen/RISCV/rv64i-single-softfloat.ll | 712 + .../RISCV/rv64i-w-insts-legalization.ll | 97 + .../CodeGen/RISCV/rv64m-exhaustive-w-insts.ll | 18 +- .../RISCV/rv64m-w-insts-legalization.ll | 34 + .../CodeGen/RISCV/select-optimize-multiple.ll | 154 +- llvm/test/CodeGen/RISCV/setcc-logic.ll | 28 +- llvm/test/CodeGen/RISCV/shifts.ll | 102 +- llvm/test/CodeGen/RISCV/shrinkwrap.ll | 97 + llvm/test/CodeGen/RISCV/split-offsets.ll | 62 +- llvm/test/CodeGen/RISCV/split-sp-adjust.ll | 45 + llvm/test/CodeGen/RISCV/srem-lkk.ll | 503 + llvm/test/CodeGen/RISCV/srem-vector-lkk.ll | 1432 ++ ...realignment-with-variable-sized-objects.ll | 72 + llvm/test/CodeGen/RISCV/stack-realignment.ll | 204 +- .../RISCV/subtarget-features-std-ext.ll | 20 + llvm/test/CodeGen/RISCV/tail-calls.ll | 42 +- llvm/test/CodeGen/RISCV/tls-models.ll | 103 +- .../RISCV/umulo-128-legalisation-lowering.ll | 200 +- llvm/test/CodeGen/RISCV/urem-lkk.ll | 300 + llvm/test/CodeGen/RISCV/urem-vector-lkk.ll | 1187 ++ llvm/test/CodeGen/RISCV/vararg.ll | 500 +- .../CodeGen/RISCV/zext-with-load-is-free.ll | 8 +- llvm/test/CodeGen/SPARC/64atomics.ll | 60 + llvm/test/CodeGen/SPARC/atomics.ll | 60 +- llvm/test/CodeGen/SPARC/exception.ll | 2 +- llvm/test/CodeGen/SPARC/fp128.ll | 11 +- llvm/test/CodeGen/SPARC/inlineasm.ll | 2 +- llvm/test/CodeGen/SystemZ/Large/branch-01.ll | 4 +- llvm/test/CodeGen/SystemZ/anyregcc-novec.ll | 17 +- llvm/test/CodeGen/SystemZ/call-05.ll | 3 +- llvm/test/CodeGen/SystemZ/cond-move-01.ll | 2 +- llvm/test/CodeGen/SystemZ/cond-move-02.ll | 2 +- llvm/test/CodeGen/SystemZ/cond-move-03.ll | 2 +- llvm/test/CodeGen/SystemZ/cond-move-06.ll | 2 +- llvm/test/CodeGen/SystemZ/cond-move-07.ll | 2 +- llvm/test/CodeGen/SystemZ/ctpop-02.ll | 4 +- llvm/test/CodeGen/SystemZ/fentry-insertion.ll | 29 + llvm/test/CodeGen/SystemZ/fp-abs-02.ll | 6 +- llvm/test/CodeGen/SystemZ/fp-abs-04.ll | 6 +- llvm/test/CodeGen/SystemZ/fp-cmp-04.ll | 8 +- llvm/test/CodeGen/SystemZ/fp-cmp-05.ll | 8 +- llvm/test/CodeGen/SystemZ/fp-libcall.ll | 62 + llvm/test/CodeGen/SystemZ/fp-mul-08.ll | 16 +- llvm/test/CodeGen/SystemZ/fp-mul-09.ll | 16 +- llvm/test/CodeGen/SystemZ/fp-mul-10.ll | 12 +- llvm/test/CodeGen/SystemZ/fp-mul-13.ll | 16 + llvm/test/CodeGen/SystemZ/fp-neg-01.ll | 6 +- llvm/test/CodeGen/SystemZ/fp-neg-02.ll | 6 +- llvm/test/CodeGen/SystemZ/fp-strict-add-01.ll | 52 +- llvm/test/CodeGen/SystemZ/fp-strict-add-02.ll | 52 +- llvm/test/CodeGen/SystemZ/fp-strict-add-03.ll | 10 +- llvm/test/CodeGen/SystemZ/fp-strict-add-04.ll | 4 +- llvm/test/CodeGen/SystemZ/fp-strict-alias.ll | 242 +- llvm/test/CodeGen/SystemZ/fp-strict-cmp-01.ll | 435 + llvm/test/CodeGen/SystemZ/fp-strict-cmp-02.ll | 249 + llvm/test/CodeGen/SystemZ/fp-strict-cmp-03.ll | 47 + llvm/test/CodeGen/SystemZ/fp-strict-cmp-04.ll | 524 + llvm/test/CodeGen/SystemZ/fp-strict-cmp-05.ll | 103 + llvm/test/CodeGen/SystemZ/fp-strict-cmp-06.ll | 44 + .../test/CodeGen/SystemZ/fp-strict-cmps-01.ll | 436 + .../test/CodeGen/SystemZ/fp-strict-cmps-02.ll | 249 + .../test/CodeGen/SystemZ/fp-strict-cmps-03.ll | 48 + .../test/CodeGen/SystemZ/fp-strict-cmps-04.ll | 148 + .../test/CodeGen/SystemZ/fp-strict-cmps-05.ll | 103 + .../test/CodeGen/SystemZ/fp-strict-cmps-06.ll | 44 + .../test/CodeGen/SystemZ/fp-strict-conv-01.ll | 26 +- .../test/CodeGen/SystemZ/fp-strict-conv-02.ll | 25 +- .../test/CodeGen/SystemZ/fp-strict-conv-03.ll | 25 +- .../test/CodeGen/SystemZ/fp-strict-conv-04.ll | 25 +- .../test/CodeGen/SystemZ/fp-strict-conv-05.ll | 45 + .../test/CodeGen/SystemZ/fp-strict-conv-06.ll | 49 + .../test/CodeGen/SystemZ/fp-strict-conv-07.ll | 45 + .../test/CodeGen/SystemZ/fp-strict-conv-08.ll | 47 + .../test/CodeGen/SystemZ/fp-strict-conv-09.ll | 42 + .../test/CodeGen/SystemZ/fp-strict-conv-10.ll | 91 + .../test/CodeGen/SystemZ/fp-strict-conv-11.ll | 42 + .../test/CodeGen/SystemZ/fp-strict-conv-12.ll | 90 + .../test/CodeGen/SystemZ/fp-strict-conv-13.ll | 86 + .../test/CodeGen/SystemZ/fp-strict-conv-14.ll | 79 + .../test/CodeGen/SystemZ/fp-strict-conv-15.ll | 17 +- .../test/CodeGen/SystemZ/fp-strict-conv-16.ll | 125 + llvm/test/CodeGen/SystemZ/fp-strict-div-01.ll | 52 +- llvm/test/CodeGen/SystemZ/fp-strict-div-02.ll | 52 +- llvm/test/CodeGen/SystemZ/fp-strict-div-03.ll | 10 +- llvm/test/CodeGen/SystemZ/fp-strict-div-04.ll | 4 +- llvm/test/CodeGen/SystemZ/fp-strict-mul-01.ll | 52 +- llvm/test/CodeGen/SystemZ/fp-strict-mul-02.ll | 307 +- llvm/test/CodeGen/SystemZ/fp-strict-mul-03.ll | 52 +- llvm/test/CodeGen/SystemZ/fp-strict-mul-04.ll | 337 +- llvm/test/CodeGen/SystemZ/fp-strict-mul-05.ll | 10 +- llvm/test/CodeGen/SystemZ/fp-strict-mul-06.ll | 34 +- llvm/test/CodeGen/SystemZ/fp-strict-mul-07.ll | 34 +- llvm/test/CodeGen/SystemZ/fp-strict-mul-08.ll | 50 +- llvm/test/CodeGen/SystemZ/fp-strict-mul-09.ll | 50 +- llvm/test/CodeGen/SystemZ/fp-strict-mul-10.ll | 29 +- llvm/test/CodeGen/SystemZ/fp-strict-mul-11.ll | 16 +- llvm/test/CodeGen/SystemZ/fp-strict-mul-12.ll | 86 + llvm/test/CodeGen/SystemZ/fp-strict-mul-13.ll | 21 + .../CodeGen/SystemZ/fp-strict-round-01.ll | 109 +- .../CodeGen/SystemZ/fp-strict-round-02.ll | 109 +- .../CodeGen/SystemZ/fp-strict-round-03.ll | 109 +- .../test/CodeGen/SystemZ/fp-strict-sqrt-01.ll | 25 +- .../test/CodeGen/SystemZ/fp-strict-sqrt-02.ll | 25 +- .../test/CodeGen/SystemZ/fp-strict-sqrt-03.ll | 4 +- .../test/CodeGen/SystemZ/fp-strict-sqrt-04.ll | 4 +- llvm/test/CodeGen/SystemZ/fp-strict-sub-01.ll | 52 +- llvm/test/CodeGen/SystemZ/fp-strict-sub-02.ll | 52 +- llvm/test/CodeGen/SystemZ/fp-strict-sub-03.ll | 10 +- llvm/test/CodeGen/SystemZ/fp-strict-sub-04.ll | 4 +- llvm/test/CodeGen/SystemZ/frame-02.ll | 86 +- llvm/test/CodeGen/SystemZ/frame-03.ll | 86 +- llvm/test/CodeGen/SystemZ/frame-04.ll | 86 +- llvm/test/CodeGen/SystemZ/frame-19.ll | 11 +- llvm/test/CodeGen/SystemZ/frame-20.ll | 86 +- llvm/test/CodeGen/SystemZ/frame-22.ll | 87 + llvm/test/CodeGen/SystemZ/ghc-cc-01.ll | 103 + llvm/test/CodeGen/SystemZ/ghc-cc-02.ll | 14 + llvm/test/CodeGen/SystemZ/ghc-cc-03.ll | 11 + llvm/test/CodeGen/SystemZ/ghc-cc-04.ll | 16 + llvm/test/CodeGen/SystemZ/ghc-cc-05.ll | 16 + llvm/test/CodeGen/SystemZ/ghc-cc-06.ll | 12 + llvm/test/CodeGen/SystemZ/ghc-cc-07.ll | 12 + llvm/test/CodeGen/SystemZ/int-cmp-44.ll | 59 +- llvm/test/CodeGen/SystemZ/int-cmp-45.ll | 27 +- llvm/test/CodeGen/SystemZ/int-cmp-56.ll | 163 + llvm/test/CodeGen/SystemZ/int-cmp-57.ll | 103 + llvm/test/CodeGen/SystemZ/int-uadd-03.ll | 2 +- llvm/test/CodeGen/SystemZ/int-usub-03.ll | 2 +- llvm/test/CodeGen/SystemZ/ipra-04.ll | 2 +- llvm/test/CodeGen/SystemZ/la-05.ll | 31 + .../test/CodeGen/SystemZ/mature-mc-support.ll | 8 +- llvm/test/CodeGen/SystemZ/mnop-mcount-01.ll | 26 + llvm/test/CodeGen/SystemZ/mnop-mcount-02.ll | 10 + .../test/CodeGen/SystemZ/mrecord-mcount-01.ll | 32 + .../test/CodeGen/SystemZ/mrecord-mcount-02.ll | 10 + llvm/test/CodeGen/SystemZ/multiselect.ll | 55 +- llvm/test/CodeGen/SystemZ/not-01.ll | 30 +- .../CodeGen/SystemZ/stack-size-section.ll | 2 +- llvm/test/CodeGen/SystemZ/stackmap-nops.ll | 153 +- .../SystemZ/store_nonbytesized_vecs.ll | 77 +- .../test/CodeGen/SystemZ/subregliveness-01.ll | 2 +- .../test/CodeGen/SystemZ/subregliveness-02.ll | 2 +- .../test/CodeGen/SystemZ/subregliveness-03.ll | 4 +- .../test/CodeGen/SystemZ/subregliveness-04.ll | 2 +- .../test/CodeGen/SystemZ/subregliveness-05.ll | 2 +- .../SystemZ/tail-call-mem-intrinsics.ll | 2 +- llvm/test/CodeGen/SystemZ/vec-abs-05.ll | 4 +- llvm/test/CodeGen/SystemZ/vec-abs-06.ll | 5 +- llvm/test/CodeGen/SystemZ/vec-bswap-01.ll | 2 +- llvm/test/CodeGen/SystemZ/vec-bswap-02.ll | 2 +- llvm/test/CodeGen/SystemZ/vec-bswap-03.ll | 2 +- llvm/test/CodeGen/SystemZ/vec-bswap-04.ll | 2 +- llvm/test/CodeGen/SystemZ/vec-bswap-05.ll | 2 +- llvm/test/CodeGen/SystemZ/vec-bswap-06.ll | 2 +- llvm/test/CodeGen/SystemZ/vec-bswap-07.ll | 2 +- llvm/test/CodeGen/SystemZ/vec-conv-03.ll | 4 +- llvm/test/CodeGen/SystemZ/vec-eswap-01.ll | 2 +- llvm/test/CodeGen/SystemZ/vec-eswap-02.ll | 2 +- .../test/CodeGen/SystemZ/vec-intrinsics-03.ll | 4 +- llvm/test/CodeGen/SystemZ/vec-max-05.ll | 90 +- llvm/test/CodeGen/SystemZ/vec-min-05.ll | 90 +- llvm/test/CodeGen/SystemZ/vec-mul-02.ll | 2 +- llvm/test/CodeGen/SystemZ/vec-mul-04.ll | 3 +- llvm/test/CodeGen/SystemZ/vec-mul-05.ll | 15 +- llvm/test/CodeGen/SystemZ/vec-neg-01.ll | 4 +- llvm/test/CodeGen/SystemZ/vec-neg-02.ll | 5 +- .../test/CodeGen/SystemZ/vec-strict-add-01.ll | 8 +- .../test/CodeGen/SystemZ/vec-strict-add-02.ll | 8 +- .../test/CodeGen/SystemZ/vec-strict-cmp-01.ll | 560 + .../test/CodeGen/SystemZ/vec-strict-cmp-02.ll | 442 + .../test/CodeGen/SystemZ/vec-strict-cmp-03.ll | 442 + .../CodeGen/SystemZ/vec-strict-cmps-01.ll | 442 + .../CodeGen/SystemZ/vec-strict-cmps-02.ll | 442 + .../CodeGen/SystemZ/vec-strict-cmps-03.ll | 56 + .../CodeGen/SystemZ/vec-strict-conv-01.ll | 130 + .../CodeGen/SystemZ/vec-strict-conv-02.ll | 61 + .../CodeGen/SystemZ/vec-strict-conv-03.ll | 52 + .../test/CodeGen/SystemZ/vec-strict-div-01.ll | 10 +- .../test/CodeGen/SystemZ/vec-strict-div-02.ll | 10 +- .../test/CodeGen/SystemZ/vec-strict-max-01.ll | 104 +- .../test/CodeGen/SystemZ/vec-strict-min-01.ll | 104 +- .../test/CodeGen/SystemZ/vec-strict-mul-01.ll | 10 +- .../test/CodeGen/SystemZ/vec-strict-mul-02.ll | 12 +- .../test/CodeGen/SystemZ/vec-strict-mul-03.ll | 10 +- .../test/CodeGen/SystemZ/vec-strict-mul-04.ll | 13 +- .../test/CodeGen/SystemZ/vec-strict-mul-05.ll | 33 +- .../CodeGen/SystemZ/vec-strict-round-01.ll | 74 +- .../CodeGen/SystemZ/vec-strict-round-02.ll | 74 +- .../CodeGen/SystemZ/vec-strict-sqrt-01.ll | 10 +- .../CodeGen/SystemZ/vec-strict-sqrt-02.ll | 10 +- .../test/CodeGen/SystemZ/vec-strict-sub-01.ll | 9 +- .../test/CodeGen/SystemZ/vec-strict-sub-02.ll | 10 +- .../vector-constrained-fp-intrinsics.ll | 1172 +- llvm/test/CodeGen/Thumb/PR17309.ll | 4 +- llvm/test/CodeGen/Thumb/PR35481.ll | 2 +- llvm/test/CodeGen/Thumb/callee_save.ll | 6 +- llvm/test/CodeGen/Thumb/cmp-and-fold.ll | 208 + llvm/test/CodeGen/Thumb/fastcc.ll | 2 +- llvm/test/CodeGen/Thumb/large-stack.ll | 6 +- llvm/test/CodeGen/Thumb/ldm-merge-call.ll | 4 +- llvm/test/CodeGen/Thumb/rev.ll | 25 +- .../CodeGen/Thumb/scheduler-clone-cpsr-def.ll | 41 + llvm/test/CodeGen/Thumb/stack_guard_remat.ll | 41 +- llvm/test/CodeGen/Thumb/stm-merge.ll | 2 +- llvm/test/CodeGen/Thumb/stm-scavenging.ll | 4 +- .../test/CodeGen/Thumb2/2009-07-21-ISelBug.ll | 2 +- .../CodeGen/Thumb2/2010-02-11-phi-cycle.ll | 9 +- .../CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll | 8 +- .../CodeGen/Thumb2/2010-11-22-EpilogueBug.ll | 2 +- .../LowOverheadLoops/basic-tail-pred.ll | 337 + .../Thumb2/LowOverheadLoops/branch-targets.ll | 512 + .../cond-vector-reduce-mve-codegen.ll | 520 + .../Thumb2/LowOverheadLoops/fast-fp-loops.ll | 592 + .../LowOverheadLoops/mve-float-loops.ll | 2008 +++ .../LowOverheadLoops/mve-tail-data-types.ll | 1230 ++ .../CodeGen/Thumb2/LowOverheadLoops/nested.ll | 241 + .../LowOverheadLoops/no-dec-le-simple.ll | 158 + .../LowOverheadLoops/tail-pred-narrow.ll | 54 + .../tail-pred-pattern-fail.ll | 505 + .../LowOverheadLoops/tail-pred-widen.ll | 173 + .../Thumb2/LowOverheadLoops/tail-reduce.ll | 118 + .../LowOverheadLoops/vector-arith-codegen.ll | 411 + .../vector-reduce-mve-tail.ll | 75 + .../Thumb2/LowOverheadLoops/vector-unroll.ll | 118 + llvm/test/CodeGen/Thumb2/aligned-spill.ll | 6 +- llvm/test/CodeGen/Thumb2/bug-subw.ll | 74 + llvm/test/CodeGen/Thumb2/cbnz.ll | 4 +- llvm/test/CodeGen/Thumb2/constant-islands.ll | 2 +- llvm/test/CodeGen/Thumb2/csel.ll | 340 + llvm/test/CodeGen/Thumb2/emit-unwinding.ll | 15 + llvm/test/CodeGen/Thumb2/float-cmp.ll | 52 +- .../CodeGen/Thumb2/float-intrinsics-double.ll | 2 +- .../CodeGen/Thumb2/float-intrinsics-float.ll | 2 +- llvm/test/CodeGen/Thumb2/float-ops.ll | 2 +- llvm/test/CodeGen/Thumb2/frame-pointer.ll | 16 +- llvm/test/CodeGen/Thumb2/large-call.ll | 2 +- llvm/test/CodeGen/Thumb2/lsll0.ll | 47 + llvm/test/CodeGen/Thumb2/mve-abs.ll | 63 +- llvm/test/CodeGen/Thumb2/mve-be.ll | 297 + llvm/test/CodeGen/Thumb2/mve-bitreverse.ll | 52 + llvm/test/CodeGen/Thumb2/mve-bswap.ll | 37 + llvm/test/CodeGen/Thumb2/mve-ctlz.ll | 140 + llvm/test/CodeGen/Thumb2/mve-ctpop.ll | 151 + llvm/test/CodeGen/Thumb2/mve-cttz.ll | 160 + llvm/test/CodeGen/Thumb2/mve-div-expand.ll | 150 +- llvm/test/CodeGen/Thumb2/mve-extractelt.ll | 86 + llvm/test/CodeGen/Thumb2/mve-fmas.ll | 271 + llvm/test/CodeGen/Thumb2/mve-fmath.ll | 594 +- llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll | 68 +- llvm/test/CodeGen/Thumb2/mve-frint.ll | 204 +- .../CodeGen/Thumb2/mve-gather-ind16-scaled.ll | 271 + .../Thumb2/mve-gather-ind16-unscaled.ll | 242 + .../CodeGen/Thumb2/mve-gather-ind32-scaled.ll | 441 + .../Thumb2/mve-gather-ind32-unscaled.ll | 757 + .../Thumb2/mve-gather-ind8-unscaled.ll | 370 + llvm/test/CodeGen/Thumb2/mve-gather-ptrs.ll | 863 ++ .../CodeGen/Thumb2/mve-gather-scatter-opt.ll | 46 + .../Thumb2/mve-intrinsics/load-store.ll | 1208 ++ .../Thumb2/mve-intrinsics/predicates.ll | 219 + .../Thumb2/mve-intrinsics/scalar-shifts.ll | 280 + .../Thumb2/mve-intrinsics/scatter-gather.ll | 2018 +++ .../CodeGen/Thumb2/mve-intrinsics/vabavq.ll | 158 + .../CodeGen/Thumb2/mve-intrinsics/vabdq.ll | 139 + .../Thumb2/mve-intrinsics/vadc-multiple.ll | 87 + .../CodeGen/Thumb2/mve-intrinsics/vadc.ll | 98 + .../CodeGen/Thumb2/mve-intrinsics/vaddq.ll | 93 + .../CodeGen/Thumb2/mve-intrinsics/vandq.ll | 177 + .../CodeGen/Thumb2/mve-intrinsics/vbicq.ll | 181 + .../CodeGen/Thumb2/mve-intrinsics/vcaddq.ll | 870 ++ .../CodeGen/Thumb2/mve-intrinsics/vcmlaq.ll | 205 + .../CodeGen/Thumb2/mve-intrinsics/vcmulq.ll | 323 + .../CodeGen/Thumb2/mve-intrinsics/vcvt.ll | 56 + .../mve-intrinsics/vector-shift-imm-dyadic.ll | 1270 ++ .../Thumb2/mve-intrinsics/vector-shift-imm.ll | 1476 ++ .../Thumb2/mve-intrinsics/vector-shift-var.ll | 1338 ++ .../CodeGen/Thumb2/mve-intrinsics/veorq.ll | 177 + .../CodeGen/Thumb2/mve-intrinsics/vhaddq.ll | 135 + .../CodeGen/Thumb2/mve-intrinsics/vhsubq.ll | 92 + .../CodeGen/Thumb2/mve-intrinsics/vld24.ll | 109 + .../CodeGen/Thumb2/mve-intrinsics/vldr.ll | 62 + .../CodeGen/Thumb2/mve-intrinsics/vmaxnmq.ll | 91 + .../CodeGen/Thumb2/mve-intrinsics/vmaxq.ll | 132 + .../CodeGen/Thumb2/mve-intrinsics/vminnmq.ll | 91 + .../CodeGen/Thumb2/mve-intrinsics/vminq.ll | 132 + .../CodeGen/Thumb2/mve-intrinsics/vminvq.ll | 36 + .../CodeGen/Thumb2/mve-intrinsics/vmldav.ll | 734 + .../CodeGen/Thumb2/mve-intrinsics/vmlldav.ll | 1183 ++ .../CodeGen/Thumb2/mve-intrinsics/vmulhq.ll | 135 + .../CodeGen/Thumb2/mve-intrinsics/vmullbq.ll | 181 + .../CodeGen/Thumb2/mve-intrinsics/vmulltq.ll | 179 + .../CodeGen/Thumb2/mve-intrinsics/vmulq.ll | 171 + .../CodeGen/Thumb2/mve-intrinsics/vornq.ll | 181 + .../CodeGen/Thumb2/mve-intrinsics/vorrq.ll | 177 + .../CodeGen/Thumb2/mve-intrinsics/vqaddq.ll | 92 + .../CodeGen/Thumb2/mve-intrinsics/vqdmulhq.ll | 92 + .../Thumb2/mve-intrinsics/vqrdmulhq.ll | 92 + .../CodeGen/Thumb2/mve-intrinsics/vqsubq.ll | 92 + .../CodeGen/Thumb2/mve-intrinsics/vrhaddq.ll | 135 + .../CodeGen/Thumb2/mve-intrinsics/vrmulhq.ll | 135 + .../CodeGen/Thumb2/mve-intrinsics/vsubq.ll | 93 + llvm/test/CodeGen/Thumb2/mve-ldst-offset.ll | 1454 +- llvm/test/CodeGen/Thumb2/mve-ldst-postinc.ll | 1621 ++- llvm/test/CodeGen/Thumb2/mve-ldst-preinc.ll | 1620 ++- llvm/test/CodeGen/Thumb2/mve-ldst-regimm.ll | 10 +- llvm/test/CodeGen/Thumb2/mve-loadstore.ll | 276 +- .../CodeGen/Thumb2/mve-masked-ldst-offset.ll | 2646 ++++ .../CodeGen/Thumb2/mve-masked-ldst-postinc.ll | 2646 ++++ .../CodeGen/Thumb2/mve-masked-ldst-preinc.ll | 2646 ++++ llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll | 1151 ++ llvm/test/CodeGen/Thumb2/mve-masked-load.ll | 2196 +++ llvm/test/CodeGen/Thumb2/mve-masked-store.ll | 1819 +++ llvm/test/CodeGen/Thumb2/mve-minmax.ll | 94 +- .../test/CodeGen/Thumb2/mve-multivec-spill.ll | 103 + llvm/test/CodeGen/Thumb2/mve-phireg.ll | 285 + llvm/test/CodeGen/Thumb2/mve-pred-and.ll | 714 + llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll | 431 + .../CodeGen/Thumb2/mve-pred-build-const.ll | 196 + .../test/CodeGen/Thumb2/mve-pred-build-var.ll | 246 + llvm/test/CodeGen/Thumb2/mve-pred-ext.ll | 335 + .../test/CodeGen/Thumb2/mve-pred-loadstore.ll | 355 + llvm/test/CodeGen/Thumb2/mve-pred-not.ll | 402 + llvm/test/CodeGen/Thumb2/mve-pred-or.ll | 508 + llvm/test/CodeGen/Thumb2/mve-pred-shuffle.ll | 563 + llvm/test/CodeGen/Thumb2/mve-pred-spill.ll | 165 + .../test/CodeGen/Thumb2/mve-pred-threshold.ll | 615 + llvm/test/CodeGen/Thumb2/mve-pred-xor.ll | 561 + .../CodeGen/Thumb2/mve-saturating-arith.ll | 408 + llvm/test/CodeGen/Thumb2/mve-sext.ll | 462 +- llvm/test/CodeGen/Thumb2/mve-shifts-scalar.ll | 422 + llvm/test/CodeGen/Thumb2/mve-shifts.ll | 30 +- llvm/test/CodeGen/Thumb2/mve-shuffle.ll | 73 +- llvm/test/CodeGen/Thumb2/mve-shuffleext.ll | 94 + llvm/test/CodeGen/Thumb2/mve-shufflemov.ll | 442 + llvm/test/CodeGen/Thumb2/mve-simple-arith.ll | 162 +- llvm/test/CodeGen/Thumb2/mve-stack.ll | 278 + llvm/test/CodeGen/Thumb2/mve-vaddqr.ll | 74 + llvm/test/CodeGen/Thumb2/mve-vaddv.ll | 105 + llvm/test/CodeGen/Thumb2/mve-vcmp.ll | 537 + llvm/test/CodeGen/Thumb2/mve-vcmpf.ll | 2541 ++++ llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll | 5222 +++++++ llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll | 4970 +++++++ llvm/test/CodeGen/Thumb2/mve-vcmpr.ll | 1186 ++ llvm/test/CodeGen/Thumb2/mve-vcmpz.ll | 835 ++ llvm/test/CodeGen/Thumb2/mve-vctp.ll | 54 + llvm/test/CodeGen/Thumb2/mve-vcvt.ll | 64 +- .../CodeGen/Thumb2/mve-vecreduce-fminmax.ll | 2264 +++ llvm/test/CodeGen/Thumb2/mve-vector-spill.ll | 163 + llvm/test/CodeGen/Thumb2/mve-vfma.ll | 25 + llvm/test/CodeGen/Thumb2/mve-vhaddsub.ll | 305 + llvm/test/CodeGen/Thumb2/mve-vld2.ll | 615 + llvm/test/CodeGen/Thumb2/mve-vld3.ll | 1745 +++ llvm/test/CodeGen/Thumb2/mve-vld4.ll | 1235 ++ llvm/test/CodeGen/Thumb2/mve-vldst4.ll | 284 + llvm/test/CodeGen/Thumb2/mve-vmaxv.ll | 136 + llvm/test/CodeGen/Thumb2/mve-vmla.ll | 200 + llvm/test/CodeGen/Thumb2/mve-vmovn.ll | 422 + llvm/test/CodeGen/Thumb2/mve-vmulqr.ll | 74 + llvm/test/CodeGen/Thumb2/mve-vpsel.ll | 77 + .../CodeGen/Thumb2/mve-vpt-from-intrinsics.ll | 45 + llvm/test/CodeGen/Thumb2/mve-vst2.ll | 606 + llvm/test/CodeGen/Thumb2/mve-vst3.ll | 1967 +++ llvm/test/CodeGen/Thumb2/mve-vst4.ll | 1126 ++ llvm/test/CodeGen/Thumb2/mve-vsubqr.ll | 77 + llvm/test/CodeGen/Thumb2/mve-widen-narrow.ll | 312 +- llvm/test/CodeGen/Thumb2/segmented-stacks.ll | 86 +- llvm/test/CodeGen/Thumb2/stack_guard_remat.ll | 2 +- .../Thumb2/t2peephole-t2ADDrr-to-t2ADDri.ll | 10 + llvm/test/CodeGen/Thumb2/thumb2-ldm.ll | 8 +- llvm/test/CodeGen/Thumb2/thumb2-uxtb.ll | 163 +- llvm/test/CodeGen/Thumb2/vqabs.ll | 50 + llvm/test/CodeGen/Thumb2/vqneg.ll | 44 + .../CodeGen/VE/simple_prologue_epilogue.ll | 32 + llvm/test/CodeGen/VE/target_support.ll | 2 + llvm/test/CodeGen/WebAssembly/atomic-fence.ll | 23 +- llvm/test/CodeGen/WebAssembly/bulk-memory.ll | 40 +- .../CodeGen/WebAssembly/cfg-stackify-eh.ll | 135 +- llvm/test/CodeGen/WebAssembly/conv-trap.ll | 80 + llvm/test/CodeGen/WebAssembly/cpus.ll | 9 +- llvm/test/CodeGen/WebAssembly/exception.ll | 3 +- llvm/test/CodeGen/WebAssembly/export-name.ll | 17 + .../CodeGen/WebAssembly/function-bitcasts.ll | 19 +- .../CodeGen/WebAssembly/indirect-import.ll | 4 +- .../CodeGen/WebAssembly/llround-conv-i32.ll | 4 +- .../lower-em-exceptions-resume-only.ll | 17 + .../lower-em-exceptions-whitelist.ll | 2 +- .../WebAssembly/lower-em-exceptions.ll | 6 +- .../WebAssembly/lower-em-sjlj-alias.ll | 43 + .../CodeGen/WebAssembly/lower-em-sjlj-sret.ll | 27 + .../test/CodeGen/WebAssembly/lower-em-sjlj.ll | 55 +- llvm/test/CodeGen/WebAssembly/multivalue.ll | 49 +- .../CodeGen/WebAssembly/negative-base-reg.ll | 2 +- llvm/test/CodeGen/WebAssembly/reg-stackify.ll | 2 +- llvm/test/CodeGen/WebAssembly/select.ll | 75 +- llvm/test/CodeGen/WebAssembly/simd-arith.ll | 357 +- .../CodeGen/WebAssembly/simd-build-vector.ll | 187 +- .../CodeGen/WebAssembly/simd-comparisons.ll | 150 + .../WebAssembly/simd-extended-extract.ll | 83 +- .../CodeGen/WebAssembly/simd-intrinsics.ll | 218 +- .../CodeGen/WebAssembly/simd-load-splat.ll | 21 + llvm/test/CodeGen/WebAssembly/simd-offset.ll | 1670 ++- llvm/test/CodeGen/WebAssembly/simd.ll | 80 +- llvm/test/CodeGen/WebAssembly/tailcall.ll | 90 +- .../WebAssembly/tls-general-dynamic.ll | 31 + llvm/test/CodeGen/WinCFGuard/cfguard-cast.ll | 35 + llvm/test/CodeGen/WinCFGuard/cfguard.ll | 9 +- llvm/test/CodeGen/WinEH/wineh-comdat.ll | 2 +- .../CodeGen/WinEH/wineh-noret-cleanup.ll.x | 7 +- .../CodeGen/WinEH/wineh-statenumbering.ll | 2 +- .../CodeGen/X86/2006-04-27-ISelFoldingBug.ll | 30 +- .../X86/2006-05-08-CoalesceSubRegClass.ll | 13 +- .../test/CodeGen/X86/2006-05-08-InstrSched.ll | 16 +- .../2006-07-28-AsmPrint-Long-As-Pointer.ll | 3 +- .../CodeGen/X86/2006-08-21-ExtraMovInst.ll | 17 +- .../X86/2006-10-10-FindModifiedNodeSlotBug.ll | 16 +- .../CodeGen/X86/2007-01-13-StackPtrIndex.ll | 249 +- .../test/CodeGen/X86/2007-02-16-BranchFold.ll | 79 +- .../CodeGen/X86/2007-03-01-SpillerCrash.ll | 9 +- .../X86/2007-03-24-InlineAsmPModifier.ll | 9 +- .../X86/2007-04-27-InlineAsm-IntMemInput.ll | 9 +- .../X86/2007-08-09-IllegalX86-64Asm.ll | 172 +- .../CodeGen/X86/2007-08-10-SignExtSubreg.ll | 9 +- .../CodeGen/X86/2007-10-04-AvoidEFLAGSCopy.ll | 19 +- .../X86/2007-10-12-CoalesceExtSubReg.ll | 25 +- .../CodeGen/X86/2007-10-12-SpillerUnfold1.ll | 32 +- .../CodeGen/X86/2007-10-12-SpillerUnfold2.ll | 18 +- .../CodeGen/X86/2007-10-29-ExtendSetCC.ll | 9 +- .../test/CodeGen/X86/2007-11-06-InstrSched.ll | 34 +- llvm/test/CodeGen/X86/2007-11-07-MulBy4.ll | 16 +- .../test/CodeGen/X86/2007-12-18-LoadCSEBug.ll | 26 +- .../X86/2008-02-22-LocalRegAllocBug.ll | 47 +- llvm/test/CodeGen/X86/2008-03-07-APIntBug.ll | 13 +- .../X86/2008-03-31-SpillerFoldingBug.ll | 40 +- .../CodeGen/X86/2008-04-09-BranchFolding.ll | 20 +- llvm/test/CodeGen/X86/2008-04-16-ReMatBug.ll | 90 +- .../CodeGen/X86/2008-04-17-CoalescerBug.ll | 184 +- llvm/test/CodeGen/X86/2008-04-24-MemCpyBug.ll | 25 +- .../CodeGen/X86/2008-04-28-CoalescerBug.ll | 58 +- .../X86/2008-05-01-InvalidOrdCompare.ll | 18 +- .../CodeGen/X86/2008-05-12-tailmerge-5.ll | 62 +- .../CodeGen/X86/2008-05-21-CoalescerBug.ll | 30 +- .../X86/2008-06-13-VolatileLoadStore.ll | 18 +- .../test/CodeGen/X86/2008-06-16-SubregsBug.ll | 17 +- .../test/CodeGen/X86/2008-06-25-VecISelBug.ll | 7 +- .../X86/2008-07-09-ELFSectionAttributes.ll | 4 +- llvm/test/CodeGen/X86/2008-07-11-SHLBy1.ll | 10 +- llvm/test/CodeGen/X86/2008-08-06-CmpStride.ll | 21 +- .../CodeGen/X86/2008-08-17-UComiCodeGenBug.ll | 9 +- .../CodeGen/X86/2008-09-05-sinttofp-2xi32.ll | 2 - .../CodeGen/X86/2008-09-11-CoalescerBug2.ll | 1 - .../CodeGen/X86/2008-09-25-sseregparm-1.ll | 20 +- .../CodeGen/X86/2008-10-06-x87ld-nan-1.ll | 15 +- .../CodeGen/X86/2008-10-06-x87ld-nan-2.ll | 22 +- .../CodeGen/X86/2008-10-24-FlippedCompare.ll | 14 +- llvm/test/CodeGen/X86/2008-11-29-ULT-Sign.ll | 13 +- .../2008-12-01-loop-iv-used-outside-loop.ll | 23 +- .../CodeGen/X86/2008-12-02-dagcombine-1.ll | 10 +- .../CodeGen/X86/2008-12-02-dagcombine-2.ll | 8 +- .../CodeGen/X86/2008-12-02-dagcombine-3.ll | 13 +- .../CodeGen/X86/2008-12-16-dagcombine-4.ll | 8 +- .../CodeGen/X86/2008-12-22-dagcombine-5.ll | 8 +- .../CodeGen/X86/2008-12-23-crazy-address.ll | 31 +- .../CodeGen/X86/2008-12-23-dagcombine-6.ll | 13 +- llvm/test/CodeGen/X86/2009-01-31-BigShift.ll | 17 +- llvm/test/CodeGen/X86/2009-01-31-BigShift2.ll | 12 +- .../X86/2009-02-21-ExtWeakInitializer.ll | 5 +- .../CodeGen/X86/2009-02-26-MachineLICMBug.ll | 4 +- .../CodeGen/X86/2009-03-23-MultiUseSched.ll | 229 +- llvm/test/CodeGen/X86/2009-04-12-picrel.ll | 9 +- .../CodeGen/X86/2009-04-14-IllegalRegs.ll | 55 +- .../CodeGen/X86/2009-04-25-CoalescerBug.ll | 14 +- .../X86/2009-04-29-IndirectDestOperands.ll | 22 +- llvm/test/CodeGen/X86/2009-05-30-ISelBug.ll | 21 +- .../CodeGen/X86/2009-06-05-VZextByteShort.ll | 3 +- ...2-x86_64-tail-call-conv-out-of-sync-bug.ll | 19 +- .../CodeGen/X86/2009-06-15-not-a-tail-call.ll | 17 +- llvm/test/CodeGen/X86/2009-08-12-badswitch.ll | 150 +- llvm/test/CodeGen/X86/20090313-signext.ll | 16 +- .../X86/2010-02-19-TailCallRetAddrBug.ll | 2 +- llvm/test/CodeGen/X86/2011-05-09-loaduse.ll | 30 +- .../CodeGen/X86/2011-10-19-LegelizeLoad.ll | 28 +- llvm/test/CodeGen/X86/2011-12-28-vselecti8.ll | 9 +- .../CodeGen/X86/2011-12-8-bitcastintprom.ll | 6 +- llvm/test/CodeGen/X86/2012-01-18-vbitcast.ll | 5 +- .../CodeGen/X86/2012-03-15-build_vector_wl.ll | 1 - llvm/test/CodeGen/X86/2012-04-26-sdglue.ll | 4 +- llvm/test/CodeGen/X86/2012-07-10-extload64.ll | 2 +- .../test/CodeGen/X86/2012-08-07-CmpISelBug.ll | 13 +- .../CodeGen/X86/2013-03-13-VEX-DestReg.ll | 2 +- .../CodeGen/X86/2014-08-29-CompactUnwind.ll | 2 +- llvm/test/CodeGen/X86/3dnow-intrinsics.ll | 33 +- llvm/test/CodeGen/X86/4char-promote.ll | 7 +- .../X86/DynamicCalleeSavedRegisters.ll | 58 +- llvm/test/CodeGen/X86/GlobalISel/add-ext.ll | 10 +- .../test/CodeGen/X86/GlobalISel/add-scalar.ll | 4 +- .../CodeGen/X86/GlobalISel/ashr-scalar.ll | 11 +- .../test/CodeGen/X86/GlobalISel/ext-x86-64.ll | 12 +- llvm/test/CodeGen/X86/GlobalISel/ext.ll | 12 +- .../GlobalISel/irtranslator-callingconv.ll | 40 +- .../CodeGen/X86/GlobalISel/lshr-scalar.ll | 11 +- llvm/test/CodeGen/X86/GlobalISel/ptr-add.ll | 140 + .../test/CodeGen/X86/GlobalISel/shl-scalar.ll | 11 +- .../x86_64-irtranslator-struct-return.ll | 75 +- llvm/test/CodeGen/X86/O0-pipeline.ll | 12 +- llvm/test/CodeGen/X86/O3-pipeline.ll | 22 +- llvm/test/CodeGen/X86/SwitchLowering.ll | 38 +- llvm/test/CodeGen/X86/SwizzleShuff.ll | 2 +- llvm/test/CodeGen/X86/absolute-constant.ll | 23 +- llvm/test/CodeGen/X86/addcarry.ll | 622 + .../test/CodeGen/X86/addr-label-difference.ll | 5 +- llvm/test/CodeGen/X86/alias-static-alloca.ll | 28 +- .../X86/align-branch-boundary-default.ll | 37 + .../align-branch-boundary-noautopadding.ll | 36 + .../X86/align-branch-boundary-suppressions.ll | 85 + llvm/test/CodeGen/X86/align-down-const.ll | 152 + llvm/test/CodeGen/X86/align-down.ll | 272 + llvm/test/CodeGen/X86/aligned-comm.ll | 11 +- llvm/test/CodeGen/X86/all-ones-vector.ll | 56 +- llvm/test/CodeGen/X86/and-load-fold.ll | 4 +- llvm/test/CodeGen/X86/anyext.ll | 2 - llvm/test/CodeGen/X86/arg-cast.ll | 10 +- llvm/test/CodeGen/X86/atom-fixup-lea4.ll | 2 +- .../CodeGen/X86/atom-pad-short-functions.ll | 23 + llvm/test/CodeGen/X86/atomic-monotonic.ll | 69 +- .../CodeGen/X86/atomic-non-integer-fp128.ll | 35 + llvm/test/CodeGen/X86/atomic-non-integer.ll | 74 +- llvm/test/CodeGen/X86/atomic-unordered.ll | 1165 +- llvm/test/CodeGen/X86/atomic32.ll | 175 + llvm/test/CodeGen/X86/atomicf128.ll | 32 +- llvm/test/CodeGen/X86/avg-mask.ll | 114 +- llvm/test/CodeGen/X86/avg.ll | 1500 +- llvm/test/CodeGen/X86/avoid-sfb-overlaps.ll | 9 +- llvm/test/CodeGen/X86/avoid-sfb.ll | 13 +- llvm/test/CodeGen/X86/avx-basic.ll | 2 +- llvm/test/CodeGen/X86/avx-bitcast.ll | 4 +- llvm/test/CodeGen/X86/avx-cvt-2.ll | 2 +- llvm/test/CodeGen/X86/avx-cvt.ll | 36 + llvm/test/CodeGen/X86/avx-fp2int.ll | 2 + llvm/test/CodeGen/X86/avx-intel-ocl.ll | 4 +- llvm/test/CodeGen/X86/avx-isa-check.ll | 2 +- llvm/test/CodeGen/X86/avx-load-store.ll | 23 + llvm/test/CodeGen/X86/avx-minmax.ll | 51 +- llvm/test/CodeGen/X86/avx-splat.ll | 165 +- llvm/test/CodeGen/X86/avx-unpack.ll | 8 +- llvm/test/CodeGen/X86/avx-vbroadcast.ll | 47 +- llvm/test/CodeGen/X86/avx-vpclmulqdq.ll | 1 + llvm/test/CodeGen/X86/avx-vzeroupper.ll | 109 +- llvm/test/CodeGen/X86/avx2-conversions.ll | 6 +- llvm/test/CodeGen/X86/avx2-gather.ll | 4 +- llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll | 32 + llvm/test/CodeGen/X86/avx2-masked-gather.ll | 594 +- llvm/test/CodeGen/X86/avx2-shift.ll | 10 +- llvm/test/CodeGen/X86/avx2-vbroadcast.ll | 4 +- llvm/test/CodeGen/X86/avx2-vector-shifts.ll | 10 +- .../CodeGen/X86/avx512-any_extend_load.ll | 38 +- llvm/test/CodeGen/X86/avx512-arith.ll | 94 + .../CodeGen/X86/avx512-broadcast-unfold.ll | 4635 +++++++ llvm/test/CodeGen/X86/avx512-bugfix-25270.ll | 4 +- llvm/test/CodeGen/X86/avx512-calling-conv.ll | 2315 +++- .../CodeGen/X86/avx512-cmp-kor-sequence.ll | 2 +- llvm/test/CodeGen/X86/avx512-cmp-mask.ll | 51 + llvm/test/CodeGen/X86/avx512-cmp.ll | 39 +- llvm/test/CodeGen/X86/avx512-cvt.ll | 143 +- llvm/test/CodeGen/X86/avx512-ext.ll | 1127 +- llvm/test/CodeGen/X86/avx512-fma-commute.ll | 16 +- llvm/test/CodeGen/X86/avx512-inc-dec.ll | 12 +- .../test/CodeGen/X86/avx512-insert-extract.ll | 106 +- llvm/test/CodeGen/X86/avx512-intel-ocl.ll | 328 +- .../X86/avx512-intrinsics-fast-isel.ll | 25 +- .../CodeGen/X86/avx512-intrinsics-upgrade.ll | 129 +- llvm/test/CodeGen/X86/avx512-intrinsics.ll | 58 +- llvm/test/CodeGen/X86/avx512-logic.ll | 32 +- llvm/test/CodeGen/X86/avx512-mask-op.ll | 1825 ++- llvm/test/CodeGen/X86/avx512-mask-spills.ll | 16 +- .../CodeGen/X86/avx512-masked_memop-16-8.ll | 451 +- llvm/test/CodeGen/X86/avx512-memfold.ll | 2 +- llvm/test/CodeGen/X86/avx512-nontemporal.ll | 42 +- llvm/test/CodeGen/X86/avx512-regcall-Mask.ll | 88 +- .../test/CodeGen/X86/avx512-regcall-NoMask.ll | 125 +- llvm/test/CodeGen/X86/avx512-scalar_mask.ll | 12 +- llvm/test/CodeGen/X86/avx512-select.ll | 84 +- .../X86/avx512-shuffles/partial_permute.ll | 114 +- .../CodeGen/X86/avx512-skx-insert-subvec.ll | 50 + llvm/test/CodeGen/X86/avx512-trunc.ll | 147 +- .../test/CodeGen/X86/avx512-unsafe-fp-math.ll | 5 +- llvm/test/CodeGen/X86/avx512-vbroadcast.ll | 4 +- .../test/CodeGen/X86/avx512-vbroadcasti128.ll | 28 +- .../test/CodeGen/X86/avx512-vbroadcasti256.ll | 28 +- llvm/test/CodeGen/X86/avx512-vec-cmp.ll | 523 +- llvm/test/CodeGen/X86/avx512-vec3-crash.ll | 18 +- llvm/test/CodeGen/X86/avx512-vselect.ll | 156 + .../X86/avx512bw-intrinsics-upgrade.ll | 14 +- llvm/test/CodeGen/X86/avx512bw-mask-op.ll | 48 + .../X86/avx512bwvl-intrinsics-upgrade.ll | 120 +- .../X86/avx512dqvl-intrinsics-upgrade.ll | 27 +- .../CodeGen/X86/avx512f-vec-test-testn.ll | 131 +- .../X86/avx512ifma-intrinsics-upgrade.ll | 34 +- .../test/CodeGen/X86/avx512ifma-intrinsics.ll | 34 +- .../CodeGen/X86/avx512vbmi2-funnel-shifts.ll | 104 + .../X86/avx512vbmi2vl-funnel-shifts.ll | 203 + .../X86/avx512vl-intrinsics-fast-isel.ll | 4 + .../X86/avx512vl-intrinsics-upgrade.ll | 655 +- llvm/test/CodeGen/X86/avx512vl-nontemporal.ll | 34 +- llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll | 24 +- .../CodeGen/X86/avx512vl-vec-masked-cmp.ll | 734 +- .../X86/avx512vl_vnni-intrinsics-upgrade.ll | 64 +- .../CodeGen/X86/avx512vl_vnni-intrinsics.ll | 64 +- .../X86/avx512vlcd-intrinsics-fast-isel.ll | 2 - .../X86/avx512vlvp2intersect-intrinsics.ll | 7 +- .../X86/avx512vnni-intrinsics-upgrade.ll | 32 +- .../test/CodeGen/X86/avx512vnni-intrinsics.ll | 32 +- llvm/test/CodeGen/X86/avx512vnni.ll | 198 + .../X86/avx512vp2intersect-intrinsics.ll | 3 +- llvm/test/CodeGen/X86/bit-piece-comment.ll | 2 +- llvm/test/CodeGen/X86/bit-test-shift.ll | 11 +- .../test/CodeGen/X86/bitcast-and-setcc-128.ll | 653 +- .../test/CodeGen/X86/bitcast-and-setcc-256.ll | 161 +- .../test/CodeGen/X86/bitcast-and-setcc-512.ll | 174 +- .../X86/bitcast-int-to-vector-bool-zext.ll | 43 +- llvm/test/CodeGen/X86/bitcast-mmx.ll | 38 +- llvm/test/CodeGen/X86/bitcast-setcc-128.ll | 340 +- llvm/test/CodeGen/X86/bitcast-setcc-512.ll | 18 +- llvm/test/CodeGen/X86/bitcast-vector-bool.ll | 220 +- llvm/test/CodeGen/X86/bitcast2.ll | 12 +- llvm/test/CodeGen/X86/bitreverse.ll | 13 +- llvm/test/CodeGen/X86/block-placement.ll | 4 +- llvm/test/CodeGen/X86/bmi-x86_64.ll | 32 +- llvm/test/CodeGen/X86/bmi.ll | 4 +- llvm/test/CodeGen/X86/br-fold.ll | 6 +- .../CodeGen/X86/break-anti-dependencies.ll | 56 +- .../X86/broadcast-elm-cross-splat-vec.ll | 296 +- llvm/test/CodeGen/X86/bswap-vector.ll | 85 +- llvm/test/CodeGen/X86/bswap_tree.ll | 29 + llvm/test/CodeGen/X86/bug26810.ll | 12 +- llvm/test/CodeGen/X86/build-vector-512.ll | 648 +- llvm/test/CodeGen/X86/buildvec-insertvec.ll | 25 +- .../CodeGen/X86/bypass-slow-division-32.ll | 7 - .../CodeGen/X86/bypass-slow-division-64.ll | 10 + .../CodeGen/X86/bypass-slow-division-tune.ll | 50 +- llvm/test/CodeGen/X86/byval6.ll | 41 +- llvm/test/CodeGen/X86/callbr-asm.ll | 30 +- llvm/test/CodeGen/X86/catchpad-lifetime.ll | 4 + .../CodeGen/X86/catchpad-realign-savexmm.ll | 21 +- llvm/test/CodeGen/X86/catchpad-regmask.ll | 6 +- llvm/test/CodeGen/X86/catchpad-weight.ll | 6 +- llvm/test/CodeGen/X86/catchret-regmask.ll | 7 +- llvm/test/CodeGen/X86/cfguard-checks.ll | 262 + llvm/test/CodeGen/X86/cfguard-module-flag.ll | 26 + .../CodeGen/X86/cfguard-x86-64-vectorcall.ll | 38 + .../CodeGen/X86/cfguard-x86-vectorcall.ll | 43 + llvm/test/CodeGen/X86/cleanuppad-inalloca.ll | 2 +- llvm/test/CodeGen/X86/cleanuppad-realign.ll | 10 +- .../X86/clear_upper_vector_element_bits.ll | 300 +- llvm/test/CodeGen/X86/cmov-into-branch.ll | 80 +- llvm/test/CodeGen/X86/cmov.ll | 2 - llvm/test/CodeGen/X86/cmp.ll | 4 +- .../X86/cmpxchg8b_alloca_regalloc_handling.ll | 61 + llvm/test/CodeGen/X86/coalesce-esp.ll | 30 +- llvm/test/CodeGen/X86/coalescer-commute1.ll | 22 +- llvm/test/CodeGen/X86/coalescer-commute3.ll | 29 +- llvm/test/CodeGen/X86/coalescer-commute4.ll | 34 +- llvm/test/CodeGen/X86/coalescer-commute5.ll | 9 +- llvm/test/CodeGen/X86/coalescer-remat.ll | 16 +- .../CodeGen/X86/codegen-prepare-collapse.ll | 18 + llvm/test/CodeGen/X86/coff-fp-section-name.ll | 97 + .../CodeGen/X86/combine-64bit-vec-binop.ll | 6 +- llvm/test/CodeGen/X86/combine-and.ll | 12 + llvm/test/CodeGen/X86/combine-bitselect.ll | 24 +- llvm/test/CodeGen/X86/combine-bswap.ll | 4 +- llvm/test/CodeGen/X86/combine-fabs.ll | 13 +- llvm/test/CodeGen/X86/combine-lds.ll | 7 +- llvm/test/CodeGen/X86/combine-mulo.ll | 29 +- llvm/test/CodeGen/X86/combine-or.ll | 2 +- llvm/test/CodeGen/X86/combine-sdiv.ll | 358 +- llvm/test/CodeGen/X86/combine-shl.ll | 103 + llvm/test/CodeGen/X86/combine-srem.ll | 139 +- .../CodeGen/X86/combine-sse41-intrinsics.ll | 47 +- .../CodeGen/X86/combineIncDecVector-crash.ll | 51 + llvm/test/CodeGen/X86/commute-fcmp.ll | 522 +- llvm/test/CodeGen/X86/compare-add.ll | 8 +- llvm/test/CodeGen/X86/compare_folding.ll | 14 +- llvm/test/CodeGen/X86/complex-fastmath.ll | 12 +- .../CodeGen/X86/conditional-tailcall-pgso.ll | 242 + llvm/test/CodeGen/X86/conditional-tailcall.ll | 2 +- llvm/test/CodeGen/X86/constant-combines.ll | 3 +- .../CodeGen/X86/constrained-fp80-trunc-ext.ll | 18 +- .../X86/copysign-constant-magnitude.ll | 33 +- llvm/test/CodeGen/X86/cpus-intel.ll | 1 + .../X86/crash-lre-eliminate-dead-def.ll | 4 +- llvm/test/CodeGen/X86/csr-split.ll | 230 + llvm/test/CodeGen/X86/cstring.ll | 3 +- llvm/test/CodeGen/X86/ctpop-combine.ll | 4 +- llvm/test/CodeGen/X86/cvtv2f32.ll | 44 +- llvm/test/CodeGen/X86/cxx_tlscc64.ll | 2 +- .../CodeGen/X86/dagcombine-unsafe-math.ll | 7 +- .../X86/dbg-changes-codegen-branch-folding.ll | 4 +- llvm/test/CodeGen/X86/dbg-changes-codegen.ll | 4 +- llvm/test/CodeGen/X86/dbg-combine.ll | 2 +- llvm/test/CodeGen/X86/dbg-value-func-arg.ll | 86 + llvm/test/CodeGen/X86/debug-loclists-lto.ll | 66 + llvm/test/CodeGen/X86/debug-loclists.ll | 224 +- llvm/test/CodeGen/X86/debugloc-argsize.ll | 2 +- .../X86/div-rem-pair-recomposition-signed.ll | 916 ++ .../div-rem-pair-recomposition-unsigned.ll | 916 ++ llvm/test/CodeGen/X86/divrem.ll | 9 +- llvm/test/CodeGen/X86/divrem8_ext.ll | 38 +- .../X86/dont-trunc-store-double-to-float.ll | 21 +- llvm/test/CodeGen/X86/dtor-priority-coff.ll | 70 + llvm/test/CodeGen/X86/dwarf-headers.ll | 2 +- llvm/test/CodeGen/X86/early-cfi-sections.ll | 2 +- llvm/test/CodeGen/X86/eh-frame-unreachable.ll | 2 +- llvm/test/CodeGen/X86/elf-associated.ll | 8 +- llvm/test/CodeGen/X86/empty-function.ll | 2 +- .../CodeGen/X86/empty-struct-return-type.ll | 11 +- llvm/test/CodeGen/X86/epilogue-cfi-fp.ll | 2 +- llvm/test/CodeGen/X86/exedepsfix-broadcast.ll | 2 +- llvm/test/CodeGen/X86/extend.ll | 14 +- llvm/test/CodeGen/X86/extmul128.ll | 13 +- llvm/test/CodeGen/X86/extmul64.ll | 13 +- llvm/test/CodeGen/X86/extract-combine.ll | 8 +- llvm/test/CodeGen/X86/extract-concat.ll | 168 +- llvm/test/CodeGen/X86/extract-extract.ll | 6 +- llvm/test/CodeGen/X86/extract-insert.ll | 2 - llvm/test/CodeGen/X86/extract-store.ll | 99 +- llvm/test/CodeGen/X86/extractelement-index.ll | 24 + llvm/test/CodeGen/X86/extractelement-load.ll | 182 +- llvm/test/CodeGen/X86/extractps.ll | 23 +- llvm/test/CodeGen/X86/f16c-intrinsics.ll | 10 +- llvm/test/CodeGen/X86/fadd-combines.ll | 28 + llvm/test/CodeGen/X86/fast-isel-divrem.ll | 8 +- .../test/CodeGen/X86/fast-isel-nontemporal.ll | 48 +- .../X86/fast-isel-select-pseudo-cmov.ll | 24 +- llvm/test/CodeGen/X86/fast-isel-select-sse.ll | 180 +- llvm/test/CodeGen/X86/fast-isel-vecload.ll | 56 +- .../CodeGen/X86/field-extract-use-trunc.ll | 75 +- llvm/test/CodeGen/X86/fildll.ll | 39 +- llvm/test/CodeGen/X86/finite-libcalls.ll | 36 +- llvm/test/CodeGen/X86/fixup-bw-copy.ll | 2 +- llvm/test/CodeGen/X86/fixup-lea.ll | 112 +- llvm/test/CodeGen/X86/float-conv-elim.ll | 2 +- llvm/test/CodeGen/X86/fma-fneg-combine-2.ll | 109 + llvm/test/CodeGen/X86/fmaxnum.ll | 8 +- llvm/test/CodeGen/X86/fmf-reduction.ll | 26 + llvm/test/CodeGen/X86/fminnum.ll | 8 +- llvm/test/CodeGen/X86/fmul-combines.ll | 54 +- llvm/test/CodeGen/X86/fold-call-2.ll | 11 +- llvm/test/CodeGen/X86/fold-call-3.ll | 62 +- llvm/test/CodeGen/X86/fold-call-oper.ll | 2 +- llvm/test/CodeGen/X86/fold-load-unops.ll | 183 + llvm/test/CodeGen/X86/fold-load-vec.ll | 2 +- llvm/test/CodeGen/X86/fold-sext-trunc.ll | 3 +- llvm/test/CodeGen/X86/fold-tied-op.ll | 2 +- .../test/CodeGen/X86/fold-vector-sext-zext.ll | 16 +- llvm/test/CodeGen/X86/fp-arith.ll | 66 +- llvm/test/CodeGen/X86/fp-cvt.ll | 89 +- .../CodeGen/X86/fp-elim-and-no-fp-elim.ll | 4 +- llvm/test/CodeGen/X86/fp-elim.ll | 4 +- llvm/test/CodeGen/X86/fp-fast.ll | 76 +- llvm/test/CodeGen/X86/fp-fold.ll | 242 +- .../CodeGen/X86/fp-intrinsics-flags-x86_64.ll | 18 + llvm/test/CodeGen/X86/fp-intrinsics-flags.ll | 110 + llvm/test/CodeGen/X86/fp-intrinsics-fma.ll | 69 + llvm/test/CodeGen/X86/fp-intrinsics.ll | 2640 +++- llvm/test/CodeGen/X86/fp-stack-2results.ll | 150 +- llvm/test/CodeGen/X86/fp-stack-direct-ret.ll | 8 +- llvm/test/CodeGen/X86/fp-stack-ret-conv.ll | 21 +- llvm/test/CodeGen/X86/fp-stack-ret-store.ll | 30 +- llvm/test/CodeGen/X86/fp-stack-retcopy.ll | 6 +- llvm/test/CodeGen/X86/fp-stack-set-st1.ll | 19 +- .../CodeGen/X86/fp-strict-libcalls-msvc32.ll | 155 + llvm/test/CodeGen/X86/fp-strict-scalar-cmp.ll | 4032 ++++++ .../CodeGen/X86/fp-strict-scalar-fptoint.ll | 1336 ++ .../CodeGen/X86/fp-strict-scalar-inttofp.ll | 1299 ++ .../CodeGen/X86/fp-strict-scalar-round.ll | 474 + llvm/test/CodeGen/X86/fp-strict-scalar.ll | 698 + llvm/test/CodeGen/X86/fp128-cast-strict.ll | 975 ++ llvm/test/CodeGen/X86/fp128-cast.ll | 1223 +- llvm/test/CodeGen/X86/fp128-compare.ll | 77 +- llvm/test/CodeGen/X86/fp128-extract.ll | 7 +- llvm/test/CodeGen/X86/fp128-g.ll | 12 +- llvm/test/CodeGen/X86/fp128-i128.ll | 67 +- .../test/CodeGen/X86/fp128-libcalls-strict.ll | 1420 ++ llvm/test/CodeGen/X86/fp128-libcalls.ll | 836 +- llvm/test/CodeGen/X86/fp128-select.ll | 158 +- llvm/test/CodeGen/X86/fp2sint.ll | 9 +- .../CodeGen/X86/fp80-strict-scalar-cmp.ll | 992 ++ llvm/test/CodeGen/X86/fp80-strict-scalar.ll | 872 ++ llvm/test/CodeGen/X86/fp_load_fold.ll | 40 +- llvm/test/CodeGen/X86/fpcmp-soft-fp.ll | 10 +- llvm/test/CodeGen/X86/frame-order.ll | 4 +- llvm/test/CodeGen/X86/frameregister.ll | 2 +- llvm/test/CodeGen/X86/fshl.ll | 81 +- llvm/test/CodeGen/X86/fshr.ll | 78 +- llvm/test/CodeGen/X86/fsxor-alignment.ll | 16 +- llvm/test/CodeGen/X86/ftrunc.ll | 14 +- llvm/test/CodeGen/X86/funclet-layout.ll | 16 +- llvm/test/CodeGen/X86/gcc_except_table.ll | 2 +- llvm/test/CodeGen/X86/gpr-to-mask.ll | 6 +- .../greedy_regalloc_bad_eviction_sequence.ll | 4 +- llvm/test/CodeGen/X86/h-registers-0.ll | 174 +- llvm/test/CodeGen/X86/h-registers-2.ll | 13 +- llvm/test/CodeGen/X86/h-registers-3.ll | 49 +- llvm/test/CodeGen/X86/haddsub-3.ll | 89 +- llvm/test/CodeGen/X86/haddsub-broadcast.ll | 21 + llvm/test/CodeGen/X86/haddsub.ll | 243 +- llvm/test/CodeGen/X86/half.ll | 5 +- llvm/test/CodeGen/X86/hidden-vis-2.ll | 14 +- ...st-and-by-const-from-lshr-in-eqcmp-zero.ll | 466 +- ...ist-and-by-const-from-shl-in-eqcmp-zero.ll | 246 +- .../CodeGen/X86/horizontal-reduce-smax.ll | 414 + .../CodeGen/X86/horizontal-reduce-smin.ll | 414 + .../CodeGen/X86/horizontal-reduce-umax.ll | 494 + .../CodeGen/X86/horizontal-reduce-umin.ll | 354 + llvm/test/CodeGen/X86/i128-add.ll | 8 +- llvm/test/CodeGen/X86/i128-and-beyond.ll | 15 +- llvm/test/CodeGen/X86/i128-immediate.ll | 8 +- llvm/test/CodeGen/X86/i128-mul.ll | 135 +- llvm/test/CodeGen/X86/i386-shrink-wrapping.ll | 2 +- llvm/test/CodeGen/X86/ifunc-asm.ll | 3 +- .../CodeGen/X86/illegal-vector-args-return.ll | 16 +- llvm/test/CodeGen/X86/immediate_merging.ll | 106 + llvm/test/CodeGen/X86/immediate_merging64.ll | 41 + llvm/test/CodeGen/X86/implicit-null-check.ll | 68 +- llvm/test/CodeGen/X86/imul.ll | 9 +- .../CodeGen/X86/inline-asm-A-constraint.ll | 2 +- .../test/CodeGen/X86/inline-asm-modifier-n.ll | 9 +- llvm/test/CodeGen/X86/inline-asm-mrv.ll | 30 +- llvm/test/CodeGen/X86/inline-asm-pic.ll | 14 +- llvm/test/CodeGen/X86/inlineasm-sched-bug.ll | 4 +- .../test/CodeGen/X86/ins_subreg_coalesce-2.ll | 9 +- .../test/CodeGen/X86/ins_subreg_coalesce-3.ll | 41 +- .../CodeGen/X86/insertelement-copytoregs.ll | 9 +- .../CodeGen/X86/insertelement-legalize.ll | 23 +- .../test/CodeGen/X86/insertelement-shuffle.ll | 14 +- .../CodeGen/X86/insertelement-var-index.ll | 44 + llvm/test/CodeGen/X86/insertps-combine.ll | 9 +- llvm/test/CodeGen/X86/ipra-reg-usage.ll | 2 +- .../X86/isel-blendi-gettargetconstant.ll | 19 + llvm/test/CodeGen/X86/isel-sink2.ll | 16 +- llvm/test/CodeGen/X86/isnan.ll | 12 +- llvm/test/CodeGen/X86/isnan2.ll | 10 +- llvm/test/CodeGen/X86/ispositive.ll | 8 +- llvm/test/CodeGen/X86/known-bits-vector.ll | 30 +- llvm/test/CodeGen/X86/known-bits.ll | 98 +- .../test/CodeGen/X86/known-signbits-vector.ll | 574 +- llvm/test/CodeGen/X86/kshift.ll | 227 +- llvm/test/CodeGen/X86/label-annotation.ll | 4 +- llvm/test/CodeGen/X86/label-heapallocsite.ll | 43 +- llvm/test/CodeGen/X86/large-constants.ll | 51 +- .../test/CodeGen/X86/lea-opt-memop-check-1.ll | 2 +- llvm/test/CodeGen/X86/lea-recursion.ll | 34 +- llvm/test/CodeGen/X86/lea.ll | 2 +- llvm/test/CodeGen/X86/leaf-fp-elim.ll | 2 +- llvm/test/CodeGen/X86/lifetime-alias.ll | 16 +- llvm/test/CodeGen/X86/limited-prec.ll | 528 +- .../CodeGen/X86/llc-override-mcpu-mattr.ll | 6 +- llvm/test/CodeGen/X86/load-combine-dbg.ll | 2 +- llvm/test/CodeGen/X86/load-combine.ll | 28 +- llvm/test/CodeGen/X86/load-local-v3i1.ll | 132 +- llvm/test/CodeGen/X86/load-partial.ll | 314 +- llvm/test/CodeGen/X86/localescape.ll | 2 +- llvm/test/CodeGen/X86/loop-blocks.ll | 46 + .../test/CodeGen/X86/loop-strength-reduce5.ll | 22 +- .../test/CodeGen/X86/loop-strength-reduce6.ll | 7 +- .../test/CodeGen/X86/loop-strength-reduce7.ll | 27 +- llvm/test/CodeGen/X86/lower-bitcast.ll | 58 - llvm/test/CodeGen/X86/lower-ptrmask.ll | 31 + llvm/test/CodeGen/X86/lrshrink.ll | 49 +- llvm/test/CodeGen/X86/lsr-negative-stride.ll | 42 +- llvm/test/CodeGen/X86/lsr-sort.ll | 19 +- llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll | 2 +- llvm/test/CodeGen/X86/machine-combiner.ll | 12 +- .../CodeGen/X86/machine-outliner-debuginfo.ll | 2 +- .../X86/machine-outliner-disubprogram.ll | 2 +- .../CodeGen/X86/machine-outliner-noredzone.ll | 2 +- .../CodeGen/X86/machine-outliner-tailcalls.ll | 2 +- llvm/test/CodeGen/X86/machine-outliner.ll | 2 +- llvm/test/CodeGen/X86/madd.ll | 292 +- llvm/test/CodeGen/X86/masked-iv-safe.ll | 199 +- llvm/test/CodeGen/X86/masked-iv-unsafe.ll | 367 +- llvm/test/CodeGen/X86/masked_compressstore.ll | 5471 ++++---- llvm/test/CodeGen/X86/masked_expandload.ll | 5599 ++++---- llvm/test/CodeGen/X86/masked_gather.ll | 1793 ++- .../test/CodeGen/X86/masked_gather_scatter.ll | 879 +- .../X86/masked_gather_scatter_widen.ll | 307 +- llvm/test/CodeGen/X86/masked_load.ll | 8894 ++++++------ llvm/test/CodeGen/X86/masked_store.ll | 6314 ++++----- llvm/test/CodeGen/X86/masked_store_trunc.ll | 9312 ++++++------- .../CodeGen/X86/masked_store_trunc_ssat.ll | 10534 +++++++------- .../CodeGen/X86/masked_store_trunc_usat.ll | 10110 +++++++------- llvm/test/CodeGen/X86/maskmovdqu.ll | 36 +- llvm/test/CodeGen/X86/materialize.ll | 115 + llvm/test/CodeGen/X86/memcmp-minsize.ll | 61 +- .../CodeGen/X86/memcmp-more-load-pairs.ll | 6241 +++++++++ llvm/test/CodeGen/X86/memcmp-optsize.ll | 147 +- llvm/test/CodeGen/X86/memcmp-pgso.ll | 1064 ++ llvm/test/CodeGen/X86/memcmp.ll | 3407 ++++- llvm/test/CodeGen/X86/memcpy.ll | 443 +- llvm/test/CodeGen/X86/memset-3.ll | 7 +- llvm/test/CodeGen/X86/memset-nonzero.ll | 221 +- .../X86/memset-sse-stack-realignment.ll | 192 +- llvm/test/CodeGen/X86/memset-zero.ll | 227 + .../X86/merge-consecutive-loads-256.ll | 29 + .../X86/merge-consecutive-loads-512.ll | 16 +- llvm/test/CodeGen/X86/merge-sp-update-lea.ll | 2 +- .../merge-vector-stores-scale-idx-crash.ll | 2 +- llvm/test/CodeGen/X86/midpoint-int-vec-128.ll | 59 +- llvm/test/CodeGen/X86/midpoint-int-vec-256.ll | 60 +- llvm/test/CodeGen/X86/midpoint-int-vec-512.ll | 716 +- llvm/test/CodeGen/X86/midpoint-int.ll | 605 +- .../CodeGen/X86/min-legal-vector-width.ll | 975 +- llvm/test/CodeGen/X86/misched-copy.ll | 2 +- llvm/test/CodeGen/X86/misched-matmul.ll | 2 +- .../X86/misched_phys_reg_assign_order.ll | 2 +- llvm/test/CodeGen/X86/mixed-ptr-sizes.ll | 128 + .../CodeGen/X86/mmx-arg-passing-x86-64.ll | 8 +- llvm/test/CodeGen/X86/mmx-arith.ll | 337 +- llvm/test/CodeGen/X86/mmx-coalescing.ll | 7 +- llvm/test/CodeGen/X86/mmx-cvt.ll | 19 +- llvm/test/CodeGen/X86/mmx-fold-load.ll | 16 +- llvm/test/CodeGen/X86/mmx-intrinsics.ll | 65 + llvm/test/CodeGen/X86/movfs.ll | 8 +- llvm/test/CodeGen/X86/movmsk-cmp.ll | 268 +- llvm/test/CodeGen/X86/movpc32-check.ll | 4 +- .../test/CodeGen/X86/ms-inline-asm-PR44272.ll | 18 + llvm/test/CodeGen/X86/ms-inline-asm-avx512.ll | 2 +- llvm/test/CodeGen/X86/ms-inline-asm.ll | 2 +- llvm/test/CodeGen/X86/mul-i1024.ll | 10822 ++++++--------- llvm/test/CodeGen/X86/mul-i256.ll | 335 +- llvm/test/CodeGen/X86/mul-i512.ll | 2201 ++- llvm/test/CodeGen/X86/mul-remat.ll | 10 +- llvm/test/CodeGen/X86/mul-shift-reassoc.ll | 11 +- llvm/test/CodeGen/X86/mul128.ll | 36 +- llvm/test/CodeGen/X86/mulfix_combine.ll | 156 + llvm/test/CodeGen/X86/mulo-pow2.ll | 13 +- llvm/test/CodeGen/X86/mulvi32.ll | 41 +- llvm/test/CodeGen/X86/musttail-fastcall.ll | 90 +- llvm/test/CodeGen/X86/musttail-inalloca.ll | 38 + llvm/test/CodeGen/X86/musttail-tailcc.ll | 92 + llvm/test/CodeGen/X86/musttail-varargs.ll | 96 +- llvm/test/CodeGen/X86/mxcsr-reg-usage.ll | 52 + llvm/test/CodeGen/X86/neg-shl-add.ll | 20 +- llvm/test/CodeGen/X86/neg_fp.ll | 50 +- llvm/test/CodeGen/X86/negate-add-zero.ll | 49 +- .../X86/negative-stride-fptosi-user.ll | 27 +- llvm/test/CodeGen/X86/no-plt-libcalls.ll | 6 +- llvm/test/CodeGen/X86/no-sse-win64.ll | 129 + llvm/test/CodeGen/X86/no-sse-x86.ll | 76 + llvm/test/CodeGen/X86/nobt.ll | 39 +- llvm/test/CodeGen/X86/nocf_check.ll | 2 +- llvm/test/CodeGen/X86/nomovtopush.ll | 59 + llvm/test/CodeGen/X86/nontemporal-3.ll | 149 +- llvm/test/CodeGen/X86/nontemporal-loads-2.ll | 34 +- llvm/test/CodeGen/X86/nontemporal-loads.ll | 129 +- llvm/test/CodeGen/X86/noreturn-call-linux.ll | 59 + llvm/test/CodeGen/X86/noreturn-call-win64.ll | 53 + llvm/test/CodeGen/X86/noreturn-call.ll | 56 + llvm/test/CodeGen/X86/not-of-dec.ll | 117 + llvm/test/CodeGen/X86/oddshuffles.ll | 453 +- llvm/test/CodeGen/X86/oddsubvector.ll | 51 +- llvm/test/CodeGen/X86/offset-operator.ll | 15 + llvm/test/CodeGen/X86/optimize-max-0.ll | 418 +- llvm/test/CodeGen/X86/overflow.ll | 49 +- llvm/test/CodeGen/X86/overlap-shift.ll | 12 +- llvm/test/CodeGen/X86/packed_struct.ll | 19 +- llvm/test/CodeGen/X86/packss.ll | 153 +- llvm/test/CodeGen/X86/paddus.ll | 163 +- .../X86/patchable-function-entry-ibt.ll | 88 + .../CodeGen/X86/patchable-function-entry.ll | 102 + llvm/test/CodeGen/X86/patchable-prologue.ll | 2 +- llvm/test/CodeGen/X86/peep-test-0.ll | 22 +- llvm/test/CodeGen/X86/peep-test-1.ll | 20 +- llvm/test/CodeGen/X86/phaddsub-extract.ll | 143 +- llvm/test/CodeGen/X86/pic-load-remat.ll | 27 +- llvm/test/CodeGen/X86/pmaddubsw.ll | 168 +- llvm/test/CodeGen/X86/pmul.ll | 64 +- llvm/test/CodeGen/X86/pmulh.ll | 100 +- llvm/test/CodeGen/X86/pointer-vector.ll | 2 +- llvm/test/CodeGen/X86/postalloc-coalescing.ll | 19 +- llvm/test/CodeGen/X86/powi-windows.ll | 46 + llvm/test/CodeGen/X86/powi.ll | 49 + llvm/test/CodeGen/X86/pr14161.ll | 2 - llvm/test/CodeGen/X86/pr1489.ll | 97 +- llvm/test/CodeGen/X86/pr1505.ll | 9 +- llvm/test/CodeGen/X86/pr15309.ll | 12 +- llvm/test/CodeGen/X86/pr15705.ll | 2 +- llvm/test/CodeGen/X86/pr18846.ll | 2 +- llvm/test/CodeGen/X86/pr20020.ll | 2 +- llvm/test/CodeGen/X86/pr2326.ll | 13 +- llvm/test/CodeGen/X86/pr24374.ll | 2 +- llvm/test/CodeGen/X86/pr28472.ll | 7 +- llvm/test/CodeGen/X86/pr29112.ll | 32 +- llvm/test/CodeGen/X86/pr31045.ll | 2 +- llvm/test/CodeGen/X86/pr32610.ll | 2 +- llvm/test/CodeGen/X86/pr3366.ll | 10 +- llvm/test/CodeGen/X86/pr34080-2.ll | 4 +- llvm/test/CodeGen/X86/pr34080.ll | 2 +- llvm/test/CodeGen/X86/pr34177.ll | 121 +- llvm/test/CodeGen/X86/pr3457.ll | 24 +- llvm/test/CodeGen/X86/pr34629.ll | 2 +- llvm/test/CodeGen/X86/pr34634.ll | 2 +- llvm/test/CodeGen/X86/pr34657.ll | 13 +- llvm/test/CodeGen/X86/pr35918.ll | 96 +- llvm/test/CodeGen/X86/pr37063.ll | 6 +- llvm/test/CodeGen/X86/pr37916.ll | 1 - llvm/test/CodeGen/X86/pr38795.ll | 27 +- llvm/test/CodeGen/X86/pr38803.ll | 5 +- llvm/test/CodeGen/X86/pr39666.ll | 21 +- .../CodeGen/X86/pr40631_deadstore_elision.ll | 3 +- llvm/test/CodeGen/X86/pr40994.ll | 4 +- llvm/test/CodeGen/X86/pr42727.ll | 34 + llvm/test/CodeGen/X86/pr42870.ll | 31 + llvm/test/CodeGen/X86/pr42905.ll | 26 + llvm/test/CodeGen/X86/pr42909.ll | 21 + llvm/test/CodeGen/X86/pr42998.ll | 63 + llvm/test/CodeGen/X86/pr43157.ll | 20 + llvm/test/CodeGen/X86/pr43507.ll | 18 + llvm/test/CodeGen/X86/pr43509.ll | 25 + llvm/test/CodeGen/X86/pr43529.ll | 39 + llvm/test/CodeGen/X86/pr43575.ll | 14 + llvm/test/CodeGen/X86/pr43820.ll | 383 + llvm/test/CodeGen/X86/pr43866.ll | 37 + llvm/test/CodeGen/X86/pr43952.ll | 16 + llvm/test/CodeGen/X86/pr44140.ll | 97 + llvm/test/CodeGen/X86/pr44396.ll | 45 + llvm/test/CodeGen/X86/pr44412.ll | 68 + llvm/test/CodeGen/X86/pr44812.ll | 24 + .../CodeGen/X86/prefer-avx256-mask-shuffle.ll | 4 +- llvm/test/CodeGen/X86/prolog-push-seq.ll | 2 +- llvm/test/CodeGen/X86/promote-vec3.ll | 82 +- llvm/test/CodeGen/X86/promote.ll | 23 +- llvm/test/CodeGen/X86/psadbw.ll | 43 + llvm/test/CodeGen/X86/psubus.ll | 262 +- llvm/test/CodeGen/X86/ptr-rotate.ll | 7 +- llvm/test/CodeGen/X86/push-cfi-obj.ll | 2 +- llvm/test/CodeGen/X86/push-cfi.ll | 2 +- llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll | 180 +- .../X86/ragreedy-last-chance-recoloring.ll | 2 +- llvm/test/CodeGen/X86/recip-fastmath.ll | 196 +- llvm/test/CodeGen/X86/recip-fastmath2.ll | 1181 +- .../X86/regalloc-advanced-split-cost.ll | 2 +- llvm/test/CodeGen/X86/regparm.ll | 2 +- llvm/test/CodeGen/X86/relptr-rodata.ll | 21 + llvm/test/CodeGen/X86/rem.ll | 9 +- llvm/test/CodeGen/X86/remarks-section.ll | 93 +- llvm/test/CodeGen/X86/remat-constant.ll | 15 +- llvm/test/CodeGen/X86/ret-addr.ll | 56 +- llvm/test/CodeGen/X86/ret-i64-0.ll | 8 +- llvm/test/CodeGen/X86/ret-mmx.ll | 2 +- llvm/test/CodeGen/X86/reverse_branches.ll | 54 +- llvm/test/CodeGen/X86/rot16.ll | 53 + .../test/CodeGen/X86/rotate-extract-vector.ll | 42 + llvm/test/CodeGen/X86/rotate-extract.ll | 59 + llvm/test/CodeGen/X86/rounding-ops.ll | 57 + llvm/test/CodeGen/X86/sad.ll | 60 +- llvm/test/CodeGen/X86/sadd_sat.ll | 117 +- llvm/test/CodeGen/X86/sadd_sat_plus.ll | 207 + llvm/test/CodeGen/X86/sadd_sat_vec.ll | 2966 ++-- llvm/test/CodeGen/X86/saddo-redundant-add.ll | 22 +- llvm/test/CodeGen/X86/sat-add.ll | 22 +- llvm/test/CodeGen/X86/scalar-extract.ll | 11 +- llvm/test/CodeGen/X86/scalar-fp-to-i32.ll | 1024 ++ llvm/test/CodeGen/X86/scalar-fp-to-i64.ll | 330 +- llvm/test/CodeGen/X86/scalar-int-to-fp.ll | 217 +- llvm/test/CodeGen/X86/scalar_widen_div.ll | 102 +- llvm/test/CodeGen/X86/scalarize-fp.ll | 23 + llvm/test/CodeGen/X86/sdiv_fix.ll | 713 + llvm/test/CodeGen/X86/segmented-stacks.ll | 41 +- llvm/test/CodeGen/X86/seh-catchpad.ll | 14 +- llvm/test/CodeGen/X86/seh-except-finally.ll | 8 +- llvm/test/CodeGen/X86/seh-no-invokes.ll | 4 +- llvm/test/CodeGen/X86/select-1-or-neg1.ll | 4 +- .../CodeGen/X86/select-of-fp-constants.ll | 5 +- llvm/test/CodeGen/X86/select-sra.ll | 196 + .../CodeGen/X86/select-testb-volatile-load.ll | 33 + llvm/test/CodeGen/X86/select.ll | 32 +- llvm/test/CodeGen/X86/selectcc-to-shiftand.ll | 209 +- llvm/test/CodeGen/X86/setcc-wide-types.ll | 597 +- llvm/test/CodeGen/X86/setcc.ll | 14 + llvm/test/CodeGen/X86/setuge.ll | 20 +- llvm/test/CodeGen/X86/shadow-stack.ll | 14 +- llvm/test/CodeGen/X86/shift-amount-mod.ll | 22 + llvm/test/CodeGen/X86/shift-avx2-crash.ll | 3 +- llvm/test/CodeGen/X86/shift-by-signext.ll | 148 + llvm/test/CodeGen/X86/shift-coalesce.ll | 27 +- llvm/test/CodeGen/X86/shift-combine-crash.ll | 1 + llvm/test/CodeGen/X86/shift-combine.ll | 270 + llvm/test/CodeGen/X86/shift-i128.ll | 1471 +- llvm/test/CodeGen/X86/shift-logic.ll | 189 + llvm/test/CodeGen/X86/shift-one.ll | 8 +- llvm/test/CodeGen/X86/shift-parts.ll | 21 +- llvm/test/CodeGen/X86/shl_elim.ll | 12 +- llvm/test/CodeGen/X86/shrink-compare-pgso.ll | 321 + llvm/test/CodeGen/X86/shrink-fp-const1.ll | 7 +- llvm/test/CodeGen/X86/shrink-fp-const2.ll | 7 +- llvm/test/CodeGen/X86/shrink_vmul.ll | 526 +- llvm/test/CodeGen/X86/shrinkwrap-hang.ll | 2 +- .../test/CodeGen/X86/shuffle-combine-crash.ll | 15 +- .../X86/shuffle-strided-with-offset-128.ll | 433 +- .../X86/shuffle-strided-with-offset-256.ll | 550 +- .../X86/shuffle-strided-with-offset-512.ll | 589 +- llvm/test/CodeGen/X86/shuffle-vs-trunc-128.ll | 159 +- llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll | 352 +- llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll | 80 +- llvm/test/CodeGen/X86/sincos-opt.ll | 27 + llvm/test/CodeGen/X86/sjlj-baseptr.ll | 2 +- llvm/test/CodeGen/X86/slow-incdec.ll | 52 +- llvm/test/CodeGen/X86/slow-pmulld.ll | 252 +- llvm/test/CodeGen/X86/smul_fix.ll | 55 +- llvm/test/CodeGen/X86/smul_fix_sat.ll | 168 +- .../CodeGen/X86/smul_fix_sat_constants.ll | 20 +- llvm/test/CodeGen/X86/soft-fp.ll | 5 +- .../CodeGen/X86/speculative-load-hardening.ll | 24 +- llvm/test/CodeGen/X86/splat-for-size.ll | 198 + llvm/test/CodeGen/X86/split-eh-lpad-edges.ll | 33 +- llvm/test/CodeGen/X86/split-vector-bitcast.ll | 24 +- llvm/test/CodeGen/X86/split-vector-rem.ll | 196 +- llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll | 34 +- llvm/test/CodeGen/X86/sqrt-fastmath.ll | 4 +- llvm/test/CodeGen/X86/sqrt-partial.ll | 110 +- llvm/test/CodeGen/X86/sqrt.ll | 27 +- llvm/test/CodeGen/X86/srem-lkk.ll | 159 + llvm/test/CodeGen/X86/srem-seteq-optsize.ll | 73 + .../CodeGen/X86/srem-seteq-vec-nonsplat.ll | 3370 +++++ llvm/test/CodeGen/X86/srem-seteq-vec-splat.ll | 752 + llvm/test/CodeGen/X86/srem-seteq.ll | 388 + llvm/test/CodeGen/X86/srem-vector-lkk.ll | 556 + llvm/test/CodeGen/X86/sse-align-1.ll | 11 +- llvm/test/CodeGen/X86/sse-align-11.ll | 22 +- llvm/test/CodeGen/X86/sse-align-4.ll | 11 +- llvm/test/CodeGen/X86/sse-align-5.ll | 7 +- llvm/test/CodeGen/X86/sse-align-6.ll | 17 +- llvm/test/CodeGen/X86/sse-align-8.ll | 7 +- llvm/test/CodeGen/X86/sse-align-9.ll | 11 +- llvm/test/CodeGen/X86/sse-load-ret.ll | 45 +- llvm/test/CodeGen/X86/sse-minmax.ll | 6 +- llvm/test/CodeGen/X86/sse-varargs.ll | 20 +- .../CodeGen/X86/sse2-intrinsics-canonical.ll | 106 +- llvm/test/CodeGen/X86/sse2-vector-shifts.ll | 7 +- llvm/test/CodeGen/X86/sse41.ll | 29 +- llvm/test/CodeGen/X86/ssub_sat.ll | 162 +- llvm/test/CodeGen/X86/ssub_sat_plus.ll | 197 + llvm/test/CodeGen/X86/ssub_sat_vec.ll | 3167 ++--- llvm/test/CodeGen/X86/stack-align.ll | 2 +- llvm/test/CodeGen/X86/stack-folding-3dnow.ll | 266 +- .../CodeGen/X86/stack-folding-adx-x86_64.ll | 295 +- .../CodeGen/X86/stack-folding-avx512bf16.ll | 264 +- .../X86/stack-folding-avx512vp2intersect.ll | 81 +- llvm/test/CodeGen/X86/stack-folding-bmi.ll | 491 +- llvm/test/CodeGen/X86/stack-folding-bmi2.ll | 253 +- .../test/CodeGen/X86/stack-folding-fp-avx1.ll | 2512 +++- .../CodeGen/X86/stack-folding-fp-avx512.ll | 1366 +- .../CodeGen/X86/stack-folding-fp-avx512vl.ll | 916 +- .../CodeGen/X86/stack-folding-fp-sse42.ll | 1739 ++- .../CodeGen/X86/stack-folding-int-avx1.ll | 1484 +- .../CodeGen/X86/stack-folding-int-avx2.ll | 1396 +- .../CodeGen/X86/stack-folding-int-avx512.ll | 4858 +++++-- .../CodeGen/X86/stack-folding-int-avx512vl.ll | 2910 +++- .../X86/stack-folding-int-avx512vnni.ll | 216 + .../CodeGen/X86/stack-folding-int-sse42.ll | 1806 ++- llvm/test/CodeGen/X86/stack-folding-lwp.ll | 171 +- llvm/test/CodeGen/X86/stack-folding-mmx.ll | 928 +- llvm/test/CodeGen/X86/stack-folding-sha.ll | 88 +- llvm/test/CodeGen/X86/stack-folding-tbm.ll | 805 +- llvm/test/CodeGen/X86/stack-folding-x86_64.ll | 163 +- llvm/test/CodeGen/X86/stack-folding-xop.ll | 855 +- llvm/test/CodeGen/X86/stack-protector-2.ll | 165 + .../stack-protector-strong-macho-win32-xor.ll | 57 + .../X86/stack-protector-vreg-to-vreg-copy.ll | 4 +- llvm/test/CodeGen/X86/stack-protector.ll | 4 +- llvm/test/CodeGen/X86/stack-size-section.ll | 2 +- llvm/test/CodeGen/X86/stack_guard_remat.ll | 2 +- llvm/test/CodeGen/X86/stackmap-nops.ll | 488 +- .../X86/statepoint-no-realign-stack.ll | 107 + llvm/test/CodeGen/X86/store-global-address.ll | 7 +- llvm/test/CodeGen/X86/store-narrow.ll | 197 +- .../CodeGen/X86/store-zero-and-minus-one.ll | 50 + llvm/test/CodeGen/X86/stores-merging.ll | 25 +- llvm/test/CodeGen/X86/storetrunc-fp.ll | 11 +- llvm/test/CodeGen/X86/sub-of-bias.ll | 229 + llvm/test/CodeGen/X86/subcarry.ll | 273 +- llvm/test/CodeGen/X86/subreg-to-reg-0.ll | 8 +- llvm/test/CodeGen/X86/subreg-to-reg-2.ll | 10 +- llvm/test/CodeGen/X86/subreg-to-reg-4.ll | 97 +- llvm/test/CodeGen/X86/subvector-broadcast.ll | 364 +- llvm/test/CodeGen/X86/swifterror.ll | 2 +- llvm/test/CodeGen/X86/switch-bt.ll | 39 + llvm/test/CodeGen/X86/switch-density.ll | 69 + llvm/test/CodeGen/X86/switch-zextload.ll | 14 +- llvm/test/CodeGen/X86/tail-call-deref.ll | 36 + .../X86/tail-dup-merge-loop-headers.ll | 116 +- llvm/test/CodeGen/X86/tail-dup-repeat.ll | 11 +- llvm/test/CodeGen/X86/tail-merge-wineh.ll | 2 +- llvm/test/CodeGen/X86/tail-opts.ll | 210 +- llvm/test/CodeGen/X86/tailcall-assume.ll | 15 + llvm/test/CodeGen/X86/tailcall-tailcc.ll | 155 + llvm/test/CodeGen/X86/tailcall.ll | 58 +- llvm/test/CodeGen/X86/tailcc-calleesave.ll | 19 + .../CodeGen/X86/tailcc-disable-tail-calls.ll | 40 + llvm/test/CodeGen/X86/tailcc-fastcc.ll | 49 + llvm/test/CodeGen/X86/tailcc-fastisel.ll | 18 + llvm/test/CodeGen/X86/tailcc-largecode.ll | 71 + llvm/test/CodeGen/X86/tailcc-stackalign.ll | 23 + llvm/test/CodeGen/X86/tailcc-structret.ll | 7 + llvm/test/CodeGen/X86/tailccbyval.ll | 21 + llvm/test/CodeGen/X86/tailccbyval64.ll | 42 + llvm/test/CodeGen/X86/tailccfp.ll | 6 + llvm/test/CodeGen/X86/tailccfp2.ll | 27 + llvm/test/CodeGen/X86/tailccpic1.ll | 16 + llvm/test/CodeGen/X86/tailccpic2.ll | 15 + llvm/test/CodeGen/X86/tailccstack64.ll | 28 + .../test/CodeGen/X86/taildup-heapallocsite.ll | 90 + llvm/test/CodeGen/X86/test-shrink-bug.ll | 8 +- llvm/test/CodeGen/X86/test-vs-bittest.ll | 362 +- llvm/test/CodeGen/X86/throws-cfi-fp.ll | 2 +- .../CodeGen/X86/tls-addr-non-leaf-function.ll | 4 +- llvm/test/CodeGen/X86/tls-shrink-wrapping.ll | 4 +- llvm/test/CodeGen/X86/trap.ll | 9 + llvm/test/CodeGen/X86/trunc-and.ll | 24 + llvm/test/CodeGen/X86/trunc-ext-ld-st.ll | 111 +- llvm/test/CodeGen/X86/trunc-subvector.ll | 134 +- llvm/test/CodeGen/X86/twoaddr-coalesce-3.ll | 2 +- llvm/test/CodeGen/X86/twoaddr-coalesce.ll | 26 +- llvm/test/CodeGen/X86/typeid-alias.ll | 18 + llvm/test/CodeGen/X86/uadd_sat.ll | 87 +- llvm/test/CodeGen/X86/uadd_sat_plus.ll | 138 + llvm/test/CodeGen/X86/uadd_sat_vec.ll | 329 +- llvm/test/CodeGen/X86/udiv_fix.ll | 344 + llvm/test/CodeGen/X86/uint64-to-float.ll | 5 +- llvm/test/CodeGen/X86/umul-with-carry.ll | 22 +- llvm/test/CodeGen/X86/umul_fix.ll | 64 +- llvm/test/CodeGen/X86/umul_fix_sat.ll | 542 + .../X86/umulo-128-legalisation-lowering.ll | 85 +- llvm/test/CodeGen/X86/undef-label.ll | 2 +- ...unfold-masked-merge-vector-variablemask.ll | 64 +- llvm/test/CodeGen/X86/unreachable-trap.ll | 11 +- llvm/test/CodeGen/X86/unused_stackslots.ll | 4 +- llvm/test/CodeGen/X86/urem-lkk.ll | 108 + llvm/test/CodeGen/X86/urem-power-of-two.ll | 2 - llvm/test/CodeGen/X86/urem-seteq-nonzero.ll | 324 + .../CodeGen/X86/urem-seteq-vec-nonsplat.ll | 1579 +-- .../CodeGen/X86/urem-seteq-vec-nonzero.ll | 369 + llvm/test/CodeGen/X86/urem-seteq-vec-splat.ll | 261 +- .../X86/urem-seteq-vec-tautological.ll | 275 + llvm/test/CodeGen/X86/urem-seteq.ll | 117 +- llvm/test/CodeGen/X86/urem-vector-lkk.ll | 378 + .../X86/use-cr-result-of-dom-icmp-st.ll | 615 + llvm/test/CodeGen/X86/usub_sat.ll | 95 +- llvm/test/CodeGen/X86/usub_sat_plus.ll | 149 + llvm/test/CodeGen/X86/usub_sat_vec.ll | 340 +- llvm/test/CodeGen/X86/uwtables.ll | 2 +- llvm/test/CodeGen/X86/v8i1-masks.ll | 1293 ++ llvm/test/CodeGen/X86/var-permute-512.ll | 508 +- .../X86/variable-sized-darwin-bzero.ll | 14 +- llvm/test/CodeGen/X86/vec-copysign-avx512.ll | 89 +- llvm/test/CodeGen/X86/vec-strict-128.ll | 438 + llvm/test/CodeGen/X86/vec-strict-256.ll | 298 + llvm/test/CodeGen/X86/vec-strict-512.ll | 283 + llvm/test/CodeGen/X86/vec-strict-cmp-128.ll | 4462 ++++++ llvm/test/CodeGen/X86/vec-strict-cmp-256.ll | 2436 ++++ llvm/test/CodeGen/X86/vec-strict-cmp-512.ll | 1410 ++ .../test/CodeGen/X86/vec-strict-cmp-sub128.ll | 308 + .../CodeGen/X86/vec-strict-fptoint-128.ll | 2813 ++++ .../CodeGen/X86/vec-strict-fptoint-256.ll | 1761 +++ .../CodeGen/X86/vec-strict-fptoint-512.ll | 908 ++ .../CodeGen/X86/vec-strict-inttofp-128.ll | 1377 ++ .../CodeGen/X86/vec-strict-inttofp-256.ll | 1147 ++ .../CodeGen/X86/vec-strict-inttofp-512.ll | 606 + llvm/test/CodeGen/X86/vec-strict-round-128.ll | 174 + llvm/test/CodeGen/X86/vec_align.ll | 13 +- llvm/test/CodeGen/X86/vec_align_i256.ll | 12 +- llvm/test/CodeGen/X86/vec_anyext.ll | 1 + llvm/test/CodeGen/X86/vec_call.ll | 26 +- llvm/test/CodeGen/X86/vec_cast2.ll | 158 +- llvm/test/CodeGen/X86/vec_cast3.ll | 156 +- llvm/test/CodeGen/X86/vec_ctbits.ll | 74 +- llvm/test/CodeGen/X86/vec_extract-mmx.ll | 16 +- llvm/test/CodeGen/X86/vec_extract.ll | 10 +- llvm/test/CodeGen/X86/vec_fp_to_int.ll | 776 +- llvm/test/CodeGen/X86/vec_insert-5.ll | 3 +- llvm/test/CodeGen/X86/vec_insert-7.ll | 9 +- llvm/test/CodeGen/X86/vec_insert-mmx.ll | 14 +- llvm/test/CodeGen/X86/vec_int_to_fp.ll | 2061 ++- llvm/test/CodeGen/X86/vec_minmax_sint.ll | 170 +- llvm/test/CodeGen/X86/vec_minmax_uint.ll | 170 +- llvm/test/CodeGen/X86/vec_round.ll | 14 +- llvm/test/CodeGen/X86/vec_saddo.ll | 1389 +- llvm/test/CodeGen/X86/vec_setcc.ll | 45 + llvm/test/CodeGen/X86/vec_shift6.ll | 24 +- llvm/test/CodeGen/X86/vec_shuf-insert.ll | 19 +- llvm/test/CodeGen/X86/vec_smulo.ll | 1003 +- llvm/test/CodeGen/X86/vec_split.ll | 115 +- llvm/test/CodeGen/X86/vec_ss_load_fold.ll | 4 +- llvm/test/CodeGen/X86/vec_ssubo.ll | 1425 +- llvm/test/CodeGen/X86/vec_trunc_sext.ll | 1 + llvm/test/CodeGen/X86/vec_uaddo.ll | 175 +- .../CodeGen/X86/vec_uint_to_fp-fastmath.ll | 240 +- llvm/test/CodeGen/X86/vec_uint_to_fp.ll | 38 +- llvm/test/CodeGen/X86/vec_umulo.ll | 1254 +- llvm/test/CodeGen/X86/vec_usubo.ll | 168 +- llvm/test/CodeGen/X86/vec_zero-2.ll | 1 + llvm/test/CodeGen/X86/vector-bitreverse.ll | 142 +- llvm/test/CodeGen/X86/vector-blend.ll | 74 +- .../test/CodeGen/X86/vector-compare-all_of.ll | 6 - .../test/CodeGen/X86/vector-compare-any_of.ll | 12 +- .../CodeGen/X86/vector-compare-results.ll | 132 +- .../vector-constrained-fp-intrinsics-flags.ll | 59 + .../vector-constrained-fp-intrinsics-fma.ll | 30 +- .../X86/vector-constrained-fp-intrinsics.ll | 4807 +++++-- llvm/test/CodeGen/X86/vector-ext-logic.ll | 113 +- llvm/test/CodeGen/X86/vector-fshl-128.ll | 225 +- llvm/test/CodeGen/X86/vector-fshl-256.ll | 123 +- llvm/test/CodeGen/X86/vector-fshl-512.ll | 632 +- llvm/test/CodeGen/X86/vector-fshl-rot-128.ll | 110 +- llvm/test/CodeGen/X86/vector-fshl-rot-256.ll | 96 +- llvm/test/CodeGen/X86/vector-fshl-rot-512.ll | 500 +- llvm/test/CodeGen/X86/vector-fshr-128.ll | 342 +- llvm/test/CodeGen/X86/vector-fshr-256.ll | 123 +- llvm/test/CodeGen/X86/vector-fshr-512.ll | 638 +- llvm/test/CodeGen/X86/vector-fshr-rot-128.ll | 123 +- llvm/test/CodeGen/X86/vector-fshr-rot-256.ll | 75 +- llvm/test/CodeGen/X86/vector-fshr-rot-512.ll | 522 +- llvm/test/CodeGen/X86/vector-gep.ll | 3 +- .../CodeGen/X86/vector-half-conversions.ll | 199 +- llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll | 293 +- llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll | 4 +- llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll | 204 +- llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll | 82 +- llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll | 2 - llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll | 220 +- llvm/test/CodeGen/X86/vector-idiv-v2i32.ll | 1102 +- llvm/test/CodeGen/X86/vector-interleave.ll | 190 + llvm/test/CodeGen/X86/vector-intrinsics.ll | 355 +- llvm/test/CodeGen/X86/vector-lzcnt-512.ll | 172 +- llvm/test/CodeGen/X86/vector-lzcnt-sub128.ll | 85 + llvm/test/CodeGen/X86/vector-mul.ll | 916 +- .../CodeGen/X86/vector-mulfix-legalize.ll | 146 + llvm/test/CodeGen/X86/vector-narrow-binop.ll | 41 +- llvm/test/CodeGen/X86/vector-pcmp.ll | 17 +- llvm/test/CodeGen/X86/vector-popcnt-512.ll | 66 +- llvm/test/CodeGen/X86/vector-reduce-add.ll | 564 +- .../CodeGen/X86/vector-reduce-and-bool.ll | 252 +- llvm/test/CodeGen/X86/vector-reduce-and.ll | 60 +- .../CodeGen/X86/vector-reduce-fadd-fast.ll | 30 +- llvm/test/CodeGen/X86/vector-reduce-mul.ll | 1347 +- .../test/CodeGen/X86/vector-reduce-or-bool.ll | 264 +- llvm/test/CodeGen/X86/vector-reduce-or.ll | 60 +- llvm/test/CodeGen/X86/vector-reduce-smax.ll | 789 +- llvm/test/CodeGen/X86/vector-reduce-smin.ll | 802 +- llvm/test/CodeGen/X86/vector-reduce-umax.ll | 697 +- llvm/test/CodeGen/X86/vector-reduce-umin.ll | 718 +- .../CodeGen/X86/vector-reduce-xor-bool.ll | 383 +- llvm/test/CodeGen/X86/vector-reduce-xor.ll | 60 +- llvm/test/CodeGen/X86/vector-rem.ll | 20 +- llvm/test/CodeGen/X86/vector-rotate-128.ll | 253 +- llvm/test/CodeGen/X86/vector-rotate-256.ll | 77 +- llvm/test/CodeGen/X86/vector-rotate-512.ll | 520 +- llvm/test/CodeGen/X86/vector-sext.ll | 307 +- .../test/CodeGen/X86/vector-shift-ashr-512.ll | 160 +- .../CodeGen/X86/vector-shift-ashr-sub128.ll | 3806 +++--- .../X86/vector-shift-by-select-loop.ll | 168 +- .../test/CodeGen/X86/vector-shift-lshr-128.ll | 5 +- .../test/CodeGen/X86/vector-shift-lshr-256.ll | 83 + .../test/CodeGen/X86/vector-shift-lshr-512.ll | 118 +- .../CodeGen/X86/vector-shift-lshr-sub128.ll | 2817 ++-- llvm/test/CodeGen/X86/vector-shift-shl-128.ll | 50 +- llvm/test/CodeGen/X86/vector-shift-shl-256.ll | 50 +- llvm/test/CodeGen/X86/vector-shift-shl-512.ll | 112 +- .../CodeGen/X86/vector-shift-shl-sub128.ll | 2084 +-- .../CodeGen/X86/vector-shuffle-128-v16.ll | 26 +- .../test/CodeGen/X86/vector-shuffle-128-v2.ll | 15 +- .../test/CodeGen/X86/vector-shuffle-128-v4.ll | 35 +- .../test/CodeGen/X86/vector-shuffle-128-v8.ll | 4 +- .../CodeGen/X86/vector-shuffle-256-v32.ll | 16 +- .../test/CodeGen/X86/vector-shuffle-256-v4.ll | 317 +- .../test/CodeGen/X86/vector-shuffle-256-v8.ll | 162 +- .../CodeGen/X86/vector-shuffle-512-v16.ll | 2 +- .../CodeGen/X86/vector-shuffle-512-v32.ll | 119 +- .../CodeGen/X86/vector-shuffle-512-v64.ll | 232 +- .../test/CodeGen/X86/vector-shuffle-512-v8.ll | 26 +- .../test/CodeGen/X86/vector-shuffle-avx512.ll | 1053 +- .../X86/vector-shuffle-combining-avx.ll | 57 + .../X86/vector-shuffle-combining-ssse3.ll | 6 +- .../CodeGen/X86/vector-shuffle-combining.ll | 403 +- llvm/test/CodeGen/X86/vector-shuffle-v1.ll | 50 +- llvm/test/CodeGen/X86/vector-trunc-math.ll | 32 +- llvm/test/CodeGen/X86/vector-trunc-packus.ll | 7151 +++++++--- llvm/test/CodeGen/X86/vector-trunc-ssat.ll | 6973 +++++++--- llvm/test/CodeGen/X86/vector-trunc-usat.ll | 5617 +++++--- llvm/test/CodeGen/X86/vector-trunc.ll | 276 +- .../CodeGen/X86/vector-truncate-combine.ll | 4 - llvm/test/CodeGen/X86/vector-tzcnt-512.ll | 208 +- llvm/test/CodeGen/X86/vector-variable-idx.ll | 18 +- .../CodeGen/X86/vector-width-store-merge.ll | 49 +- llvm/test/CodeGen/X86/vector-zext.ll | 130 +- llvm/test/CodeGen/X86/viabs.ll | 8 +- llvm/test/CodeGen/X86/volatile.ll | 26 +- .../X86/vp2intersect_multiple_pairs.ll | 37 +- llvm/test/CodeGen/X86/vsel-cmp-load.ll | 82 +- llvm/test/CodeGen/X86/vselect-avx.ll | 48 +- llvm/test/CodeGen/X86/vselect.ll | 94 +- llvm/test/CodeGen/X86/vshift-4.ll | 12 +- .../X86/vshli-simplify-demanded-bits.ll | 58 + llvm/test/CodeGen/X86/widen_arith-1.ll | 14 +- llvm/test/CodeGen/X86/widen_arith-2.ll | 7 +- llvm/test/CodeGen/X86/widen_arith-3.ll | 13 +- llvm/test/CodeGen/X86/widen_bitops-0.ll | 72 +- llvm/test/CodeGen/X86/widen_cast-1.ll | 17 +- llvm/test/CodeGen/X86/widen_cast-2.ll | 3 +- llvm/test/CodeGen/X86/widen_cast-3.ll | 3 +- llvm/test/CodeGen/X86/widen_cast-4.ll | 37 +- llvm/test/CodeGen/X86/widen_cast-5.ll | 9 +- llvm/test/CodeGen/X86/widen_cast-6.ll | 2 - llvm/test/CodeGen/X86/widen_compare-1.ll | 22 +- llvm/test/CodeGen/X86/widen_conv-1.ll | 41 +- llvm/test/CodeGen/X86/widen_conv-2.ll | 8 +- llvm/test/CodeGen/X86/widen_conv-3.ll | 123 +- llvm/test/CodeGen/X86/widen_conv-4.ll | 96 +- llvm/test/CodeGen/X86/widen_conversions.ll | 4 +- llvm/test/CodeGen/X86/widen_load-2.ll | 91 +- llvm/test/CodeGen/X86/widen_mul.ll | 10 +- llvm/test/CodeGen/X86/widen_shuffle-1.ll | 10 +- llvm/test/CodeGen/X86/win-catchpad-csrs.ll | 34 +- llvm/test/CodeGen/X86/win-catchpad.ll | 14 +- llvm/test/CodeGen/X86/win-cleanuppad.ll | 4 +- llvm/test/CodeGen/X86/win-funclet-cfi.ll | 4 +- .../X86/win32-eh-available-externally.ll | 2 +- llvm/test/CodeGen/X86/win32-seh-catchpad.ll | 2 +- .../CodeGen/X86/win32-seh-nested-finally.ll | 4 +- llvm/test/CodeGen/X86/win64-eh-empty-block.ll | 107 + .../test/CodeGen/X86/win64-funclet-savexmm.ll | 115 + .../CodeGen/X86/win64-stackprobe-overflow.ll | 14 + llvm/test/CodeGen/X86/win64_call_epi.ll | 5 +- llvm/test/CodeGen/X86/win64_eh.ll | 20 +- llvm/test/CodeGen/X86/win64_frame.ll | 66 +- llvm/test/CodeGen/X86/win_coreclr_chkstk.ll | 2 +- llvm/test/CodeGen/X86/wineh-coreclr.ll | 4 + llvm/test/CodeGen/X86/x86-32-intrcc.ll | 2 +- llvm/test/CodeGen/X86/x86-64-arg.ll | 8 +- llvm/test/CodeGen/X86/x86-64-baseptr.ll | 2 +- llvm/test/CodeGen/X86/x86-64-bittest-logic.ll | 137 + llvm/test/CodeGen/X86/x86-64-disp.ll | 8 +- .../X86/x86-64-double-shifts-Oz-Os-O2.ll | 35 +- .../CodeGen/X86/x86-64-flags-intrinsics.ll | 8 +- llvm/test/CodeGen/X86/x86-64-gv-offset.ll | 14 +- llvm/test/CodeGen/X86/x86-64-intrcc.ll | 2 +- llvm/test/CodeGen/X86/x86-64-mem.ll | 45 +- llvm/test/CodeGen/X86/x86-64-pic-1.ll | 12 +- llvm/test/CodeGen/X86/x86-64-pic-10.ll | 16 +- llvm/test/CodeGen/X86/x86-64-pic-11.ll | 12 +- llvm/test/CodeGen/X86/x86-64-pic-2.ll | 13 +- llvm/test/CodeGen/X86/x86-64-pic-4.ll | 9 +- llvm/test/CodeGen/X86/x86-64-pic-5.ll | 9 +- llvm/test/CodeGen/X86/x86-64-pic-6.ll | 9 +- llvm/test/CodeGen/X86/x86-64-pic-7.ll | 8 +- llvm/test/CodeGen/X86/x86-64-pic-8.ll | 9 +- llvm/test/CodeGen/X86/x86-64-pic-9.ll | 12 +- llvm/test/CodeGen/X86/x86-64-ret0.ll | 13 +- .../CodeGen/X86/x86-64-stack-and-frame-ptr.ll | 2 +- llvm/test/CodeGen/X86/x86-64-varargs.ll | 23 +- llvm/test/CodeGen/X86/x86-cmov-converter.ll | 26 +- .../CodeGen/X86/x86-interleaved-access.ll | 226 +- llvm/test/CodeGen/X86/x86-interrupt_cc.ll | 12 +- .../X86/x86-mixed-alignment-dagcombine.ll | 31 +- .../CodeGen/X86/x86-repmov-copy-eflags.ll | 53 + .../X86/x86-sanitizer-shrink-wrapping.ll | 2 +- llvm/test/CodeGen/X86/x86-shifts.ll | 18 +- .../CodeGen/X86/x86-shrink-wrap-unwind.ll | 6 +- llvm/test/CodeGen/X86/x86-shrink-wrapping.ll | 4 +- llvm/test/CodeGen/X86/x86-store-gv-addr.ll | 15 +- llvm/test/CodeGen/X86/xmulo.ll | 12 +- llvm/test/CodeGen/X86/xop-ifma.ll | 12 +- llvm/test/CodeGen/X86/xor.ll | 7 +- llvm/test/CodeGen/X86/xray-custom-log.ll | 8 +- .../xray-selective-instrumentation-miss.ll | 7 +- .../X86/xray-selective-instrumentation.ll | 14 +- llvm/test/CodeGen/X86/xray-typed-event-log.ll | 12 +- llvm/test/CodeGen/X86/zext-inreg-1.ll | 19 +- .../AArch64/call-site-info-output.ll | 41 + llvm/test/DebugInfo/AArch64/line-header.ll.x | 2 +- .../DebugInfo/AArch64/machine-outliner.ll | 2 +- llvm/test/DebugInfo/ARM/PR16736.ll | 2 +- llvm/test/DebugInfo/ARM/PR26163.ll | 4 +- .../DebugInfo/ARM/call-site-info-output.ll | 41 + .../ARM/entry-value-multi-byte-expr.ll | 91 + .../test/DebugInfo/ARM/lowerbdgdeclare_vla.ll | 2 +- .../multiple-constant-uses-drops-dbgloc.ll | 2 +- .../DebugInfo/COFF/array-odr-violation.ll | 39 +- llvm/test/DebugInfo/COFF/asan-module-ctor.ll | 2 +- llvm/test/DebugInfo/COFF/asm.ll | 4 +- .../DebugInfo/COFF/class-options-common.ll | 14 +- llvm/test/DebugInfo/COFF/comdat.ll | 10 +- llvm/test/DebugInfo/COFF/cpp-mangling.ll | 2 +- .../DebugInfo/COFF/defer-complete-type.ll | 2 +- llvm/test/DebugInfo/COFF/enum-co.ll | 2 +- llvm/test/DebugInfo/COFF/fp-stack.ll | 2 +- llvm/test/DebugInfo/COFF/fpo-argsize.ll | 2 +- llvm/test/DebugInfo/COFF/fpo-csrs.ll | 4 +- llvm/test/DebugInfo/COFF/fpo-funclet.ll | 4 +- .../test/DebugInfo/COFF/fpo-realign-alloca.ll | 4 +- .../test/DebugInfo/COFF/fpo-realign-vframe.ll | 4 +- llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll | 4 +- llvm/test/DebugInfo/COFF/fpo-stack-protect.ll | 4 +- llvm/test/DebugInfo/COFF/frameproc-flags.ll | 14 +- llvm/test/DebugInfo/COFF/function-options.ll | 2 +- llvm/test/DebugInfo/COFF/global-constants.ll | 4 +- .../test/DebugInfo/COFF/global-type-hashes.ll | 4 +- llvm/test/DebugInfo/COFF/global_visibility.ll | 4 +- llvm/test/DebugInfo/COFF/inheritance.ll | 4 +- llvm/test/DebugInfo/COFF/inlining-files.ll | 2 +- llvm/test/DebugInfo/COFF/inlining-header.ll | 2 +- llvm/test/DebugInfo/COFF/inlining-levels.ll | 2 +- llvm/test/DebugInfo/COFF/inlining-padding.ll | 2 +- llvm/test/DebugInfo/COFF/inlining.ll | 53 +- llvm/test/DebugInfo/COFF/lambda.ll | 4 +- llvm/test/DebugInfo/COFF/lexicalblock.ll | 2 +- llvm/test/DebugInfo/COFF/line-zero.ll | 77 + llvm/test/DebugInfo/COFF/lines-difile.ll | 6 +- llvm/test/DebugInfo/COFF/local-constant.ll | 4 +- .../test/DebugInfo/COFF/local-variable-gap.ll | 10 +- llvm/test/DebugInfo/COFF/local-variables.ll | 16 +- llvm/test/DebugInfo/COFF/long-name.ll | 2 +- llvm/test/DebugInfo/COFF/multifile.ll | 4 +- llvm/test/DebugInfo/COFF/multifunction.ll | 4 +- llvm/test/DebugInfo/COFF/nrvo.ll | 6 +- llvm/test/DebugInfo/COFF/parameter-order.ll | 2 +- llvm/test/DebugInfo/COFF/pieces.ll | 29 +- llvm/test/DebugInfo/COFF/pr37492.ll | 30 +- .../test/DebugInfo/COFF/purge-typedef-udts.ll | 2 +- .../test/DebugInfo/COFF/register-variables.ll | 52 +- llvm/test/DebugInfo/COFF/retained-types.ll | 2 +- llvm/test/DebugInfo/COFF/scopes.ll | 2 +- llvm/test/DebugInfo/COFF/simple.ll | 4 +- llvm/test/DebugInfo/COFF/static-methods.ll | 4 +- llvm/test/DebugInfo/COFF/synthetic.ll | 26 +- .../COFF/tail-call-without-lexical-scopes.ll | 4 +- llvm/test/DebugInfo/COFF/thunk.ll | 10 +- llvm/test/DebugInfo/COFF/type-quals.ll | 2 +- llvm/test/DebugInfo/COFF/types-array.ll | 4 +- llvm/test/DebugInfo/COFF/types-basic.ll | 343 +- .../test/DebugInfo/COFF/types-calling-conv.ll | 4 +- llvm/test/DebugInfo/COFF/types-cvarargs.ll | 4 +- .../test/DebugInfo/COFF/types-data-members.ll | 354 +- .../COFF/types-method-ref-qualifiers.ll | 2 +- .../DebugInfo/COFF/types-recursive-struct.ll | 2 +- .../DebugInfo/COFF/types-recursive-unnamed.ll | 2 +- llvm/test/DebugInfo/COFF/udts.ll | 2 +- llvm/test/DebugInfo/COFF/unnamed.ll | 2 +- llvm/test/DebugInfo/COFF/vframe-csr.ll | 4 +- llvm/test/DebugInfo/COFF/vframe-fpo.ll | 4 +- llvm/test/DebugInfo/COFF/vftables.ll | 8 +- .../DebugInfo/COFF/virtual-method-kinds.ll | 8 +- llvm/test/DebugInfo/COFF/virtual-methods.ll | 8 +- .../test/DebugInfo/COFF/vtable-optzn-array.ll | 4 +- .../Generic/2010-05-03-DisableFramePtr.ll | 2 +- llvm/test/DebugInfo/Generic/PR20038.ll | 4 +- llvm/test/DebugInfo/Generic/block-asan.ll | 6 +- .../DebugInfo/Generic/constant-pointers.ll | 2 +- .../DebugInfo/Generic/cross-cu-inlining.ll | 4 +- .../DebugInfo/Generic/cross-cu-linkonce.ll | 2 +- llvm/test/DebugInfo/Generic/cu-range-hole.ll | 2 +- llvm/test/DebugInfo/Generic/cu-ranges.ll | 2 +- .../DebugInfo/Generic/dead-argument-order.ll | 2 +- .../Generic/debug-info-always-inline.ll | 6 +- llvm/test/DebugInfo/Generic/debug-label.ll | 3 +- llvm/test/DebugInfo/Generic/def-line.ll | 4 +- .../test/DebugInfo/Generic/directives-only.ll | 4 +- llvm/test/DebugInfo/Generic/discriminator.ll | 4 +- llvm/test/DebugInfo/Generic/enum-types.ll | 2 +- llvm/test/DebugInfo/Generic/enum.ll | 2 +- .../Generic/export-symbol-anonymous-class.ll | 43 + .../Generic/extended-loc-directive.ll | 4 +- .../DebugInfo/Generic/global-sra-array.ll | 2 +- llvm/test/DebugInfo/Generic/global.ll | 2 +- .../Generic/incorrect-variable-debugloc.ll | 6 +- .../Generic/incorrect-variable-debugloc1.ll | 4 +- .../DebugInfo/Generic/inline-no-debug-info.ll | 2 +- llvm/test/DebugInfo/Generic/inline-scopes.ll | 4 +- .../DebugInfo/Generic/inlined-arguments.ll | 4 +- .../test/DebugInfo/Generic/inlined-strings.ll | 2 +- llvm/test/DebugInfo/Generic/lto-comp-dir.ll | 4 +- llvm/test/DebugInfo/Generic/mainsubprogram.ll | 2 +- llvm/test/DebugInfo/Generic/member-order.ll | 2 +- .../Generic/missing-abstract-variable.ll | 4 +- llvm/test/DebugInfo/Generic/multiline.ll | 4 +- llvm/test/DebugInfo/Generic/namespace.ll | 2 +- .../Generic/namespace_function_definition.ll | 2 +- .../namespace_inline_function_definition.ll | 4 +- llvm/test/DebugInfo/Generic/piece-verifier.ll | 2 +- .../DebugInfo/Generic/recursive_inlining.ll | 4 +- llvm/test/DebugInfo/Generic/restrict.ll | 2 +- llvm/test/DebugInfo/Generic/tu-composite.ll | 2 +- .../DebugInfo/Generic/unconditional-branch.ll | 2 +- llvm/test/DebugInfo/Generic/version.ll | 2 +- llvm/test/DebugInfo/Inputs/gmlt.ll | 4 +- llvm/test/DebugInfo/Inputs/line.ll | 2 +- llvm/test/DebugInfo/Mips/dsr-fixed-objects.ll | 8 +- llvm/test/DebugInfo/Mips/eh_frame.ll | 7 +- llvm/test/DebugInfo/Mips/fn-call-line.ll | 4 +- llvm/test/DebugInfo/NVPTX/cu-range-hole.ll | 8 +- .../DebugInfo/NVPTX/dbg-declare-alloca.ll | 22 +- llvm/test/DebugInfo/NVPTX/debug-addr-class.ll | 6 +- llvm/test/DebugInfo/NVPTX/debug-file-loc.ll | 6 +- llvm/test/DebugInfo/NVPTX/debug-info.ll | 12 +- llvm/test/DebugInfo/NVPTX/debug-loc-offset.ll | 14 +- .../DebugInfo/RISCV/dwarf-riscv-relocs.ll | 20 +- .../test/DebugInfo/RISCV/relax-debug-frame.ll | 58 + .../Sparc/entry-value-complex-reg-expr.ll | 79 + llvm/test/DebugInfo/Sparc/gnu-window-save.ll | 4 +- llvm/test/DebugInfo/Sparc/subreg.ll | 2 +- .../DebugInfo/WebAssembly/dbg-loop-loc.ll | 2 +- .../WebAssembly/dbg-value-dwarfdump.ll | 70 + .../DebugInfo/WebAssembly/dbg-value-ti.ll | 74 + llvm/test/DebugInfo/X86/DIModule.ll | 4 +- llvm/test/DebugInfo/X86/DIModuleContext.ll | 2 +- .../DebugInfo/X86/DW_AT_calling-convention.ll | 2 +- llvm/test/DebugInfo/X86/DW_AT_deleted.ll | 110 + .../DebugInfo/X86/DW_AT_location-reference.ll | 11 +- .../X86/DW_AT_stmt_list_sec_offset.ll | 2 +- llvm/test/DebugInfo/X86/PR26148.ll | 2 +- llvm/test/DebugInfo/X86/PR37234.ll | 3 +- llvm/test/DebugInfo/X86/addr_comments.ll | 2 +- llvm/test/DebugInfo/X86/arguments.ll | 2 +- llvm/test/DebugInfo/X86/clang-module.ll | 2 +- .../DebugInfo/X86/codegenprep-addrsink.ll | 67 + .../DebugInfo/X86/coff_debug_info_type.ll | 2 +- .../test/DebugInfo/X86/coff_relative_names.ll | 2 +- llvm/test/DebugInfo/X86/constant-loclist.ll | 10 +- .../X86/containing-type-extension-rust.ll | 6 +- llvm/test/DebugInfo/X86/cu-ranges-odr.ll | 2 +- llvm/test/DebugInfo/X86/cu-ranges.ll | 2 +- llvm/test/DebugInfo/X86/dbg-addr-dse.ll | 4 +- .../X86/dbg-value-dropped-instcombine.ll | 76 + .../DebugInfo/X86/dbg-value-frame-index.ll | 4 +- .../X86/dbg-value-regmask-clobber.ll | 6 +- llvm/test/DebugInfo/X86/dbg_value_direct.ll | 4 +- .../DebugInfo/X86/dbgcall-site-64-bit-imms.ll | 56 + .../X86/dbgcall-site-zero-valued-imms.ll | 68 + .../DebugInfo/X86/debug-dead-local-var.ll | 2 +- llvm/test/DebugInfo/X86/debug-info-blocks.ll | 2 +- .../X86/debug-info-template-align.ll | 63 + llvm/test/DebugInfo/X86/debug-loc-frame.ll | 6 +- .../X86/debug-macinfo-split-dwarf.ll | 85 + llvm/test/DebugInfo/X86/debug-macro.ll | 5 +- .../test/DebugInfo/X86/debug-ranges-offset.ll | 4 +- llvm/test/DebugInfo/X86/debug_addr.ll | 4 +- .../test/DebugInfo/X86/decl-derived-member.ll | 6 +- llvm/test/DebugInfo/X86/discriminator.ll | 2 +- llvm/test/DebugInfo/X86/discriminator2.ll | 4 +- llvm/test/DebugInfo/X86/discriminator3.ll | 4 +- llvm/test/DebugInfo/X86/dw_op_minus_direct.ll | 2 +- .../X86/dwarf-aranges-no-dwarf-labels.ll | 4 +- .../X86/dwarf-callsite-related-attrs.ll | 16 +- .../test/DebugInfo/X86/dwarf-linkage-names.ll | 2 +- .../DebugInfo/X86/dwarf-pubnames-split.ll | 2 +- llvm/test/DebugInfo/X86/empty_macinfo.ll | 6 +- llvm/test/DebugInfo/X86/fission-inline.ll | 4 +- .../test/DebugInfo/X86/fission-no-inlining.ll | 4 +- llvm/test/DebugInfo/X86/fission-ranges.ll | 32 +- llvm/test/DebugInfo/X86/generate-odr-hash.ll | 2 +- .../DebugInfo/X86/ghost-sdnode-dbgvalues.ll | 2 +- .../DebugInfo/X86/gmlt-empty-base-address.ll | 36 + .../DebugInfo/X86/gnu-public-names-gmlt.ll | 4 +- llvm/test/DebugInfo/X86/gnu-public-names.ll | 4 +- .../DebugInfo/X86/inline-member-function.ll | 2 +- llvm/test/DebugInfo/X86/inline-seldag-test.ll | 2 +- .../DebugInfo/X86/length_symbol_difference.ll | 1 - llvm/test/DebugInfo/X86/lexical_block.ll | 2 +- llvm/test/DebugInfo/X86/line-info.ll | 2 +- .../X86/live-debug-values-expr-conflict.ll | 75 + .../X86/live-debug-values-remove-range.ll | 114 + .../DebugInfo/X86/live-debug-variables.ll | 2 +- llvm/test/DebugInfo/X86/loclists-dwp.ll | 4 +- llvm/test/DebugInfo/X86/low-pc-cu.ll | 2 +- llvm/test/DebugInfo/X86/mi-print.ll | 2 +- llvm/test/DebugInfo/X86/no_debug_ranges.ll | 2 +- llvm/test/DebugInfo/X86/nodebug.ll | 2 +- .../DebugInfo/X86/nodebug_with_debug_loc.ll | 4 +- llvm/test/DebugInfo/X86/noreturn_cpp11.ll | 96 +- llvm/test/DebugInfo/X86/objc-property-void.ll | 2 +- llvm/test/DebugInfo/X86/objc_direct.ll | 54 + llvm/test/DebugInfo/X86/pieces-1.ll | 4 +- llvm/test/DebugInfo/X86/pieces-3.ll | 7 +- llvm/test/DebugInfo/X86/pieces-4.ll | 4 +- llvm/test/DebugInfo/X86/pr28270.ll | 4 +- llvm/test/DebugInfo/X86/safestack-byval.ll | 2 +- .../DebugInfo/X86/sdag-transfer-dbgvalue.ll | 64 + llvm/test/DebugInfo/X86/single-dbg_value.ll | 2 +- llvm/test/DebugInfo/X86/spill-nospill.ll | 4 +- .../X86/split-dwarf-multiple-cu-hash.ll | 2 +- .../DebugInfo/X86/split-dwarf-v5-ranges.ll | 2 +- llvm/test/DebugInfo/X86/sret.ll | 6 +- .../test/DebugInfo/X86/sroa-after-inlining.ll | 139 + llvm/test/DebugInfo/X86/sroasplit-2.ll | 2 +- llvm/test/DebugInfo/X86/stack-value-dwarf2.ll | 2 +- llvm/test/DebugInfo/X86/stack-value-piece.ll | 18 +- .../X86/string-offsets-table-order.ll | 6 +- .../DebugInfo/X86/string-offsets-table.ll | 4 +- llvm/test/DebugInfo/X86/template.ll | 4 + llvm/test/DebugInfo/X86/tls.ll | 2 +- .../DebugInfo/X86/tu-to-non-named-type.ll | 2 +- llvm/test/DebugInfo/X86/void-typedef.ll | 2 +- .../test/DebugInfo/salvage-cast-debug-info.ll | 25 + .../tut-simplify-cfg-blockaddress.ll | 23 + .../SimplifyCFG/tut-simplify-cfg1.ll | 90 + .../tut-simplify-cfg2-dead-block-order.ll | 109 + .../SimplifyCFG/tut-simplify-cfg3-phis.ll | 70 + ...ify-cfg4-multiple-duplicate-cfg-updates.ll | 40 + ...t-simplify-cfg5-del-phis-for-dead-block.ll | 122 + .../tut-simplify-cfg6-dead-self-loop.ll | 25 + .../ExecutionEngine/OrcLazy/emulated-tls.ll | 23 + .../test/ExecutionEngine/OrcLazy/printargv.ll | 81 + .../OrcLazy/static-library-support.ll | 11 + llvm/test/Feature/fp-intrinsics.ll | 223 +- llvm/test/Feature/intrinsics.ll | 2 +- llvm/test/Feature/load_extension.ll | 13 + llvm/test/Feature/optnone-opt.ll | 6 - llvm/test/Feature/strip_names.ll | 2 +- llvm/test/Feature/undefined.ll | 13 + .../AddressSanitizer/debug-info-alloca.ll | 75 + .../AddressSanitizer/global_addrspace.ll | 31 + .../AddressSanitizer/global_metadata.ll | 2 +- .../AddressSanitizer/global_metadata_array.ll | 2 +- .../global_metadata_windows.ll | 4 +- .../AddressSanitizer/local_stack_base.ll | 4 +- .../version-mismatch-check.ll | 12 + .../DataFlowSanitizer/abilist.ll | 10 +- .../DataFlowSanitizer/debug-nonzero-labels.ll | 2 +- .../DataFlowSanitizer/debug.ll | 2 +- .../DataFlowSanitizer/memset.ll | 2 +- .../HWAddressSanitizer/alloca-compat.ll | 17 + .../HWAddressSanitizer/alloca.ll | 4 +- .../HWAddressSanitizer/basic-compat.ll | 13 + .../HWAddressSanitizer/basic.ll | 42 +- .../dbg-value-tag-offset.ll | 61 + .../HWAddressSanitizer/globals.ll | 37 + .../HWAddressSanitizer/kernel-alloca.ll | 2 +- .../HWAddressSanitizer/landingpad.ll | 10 +- .../HWAddressSanitizer/personality.ll | 90 + .../Instrumentation/InstrProfiling/PR23499.ll | 8 +- .../Instrumentation/InstrProfiling/comdat.ll | 16 +- .../Instrumentation/InstrProfiling/linkage.ll | 8 +- .../MemorySanitizer/attributes.ll | 47 + .../Instrumentation/MemorySanitizer/clmul.ll | 72 + .../MemorySanitizer/msan_basic.ll | 13 +- .../msan_llvm_launder_invariant.ll | 38 + .../msan_llvm_strip_invariant.ll | 21 + .../MemorySanitizer/store-origin.ll | 2 +- .../SanitizerCoverage/abort-in-entry-block.ll | 1 + .../SanitizerCoverage/backedge-pruning.ll | 2 + .../SanitizerCoverage/chains.ll | 1 + .../cmp-tracing-api-x86_32.ll | 25 +- .../cmp-tracing-api-x86_64.ll | 25 +- .../SanitizerCoverage/cmp-tracing.ll | 1 + .../SanitizerCoverage/coff-comdat.ll | 1 + .../coff-pc-table-inline-8bit-counters.ll | 1 + .../SanitizerCoverage/coff-used-ctor.ll | 3 +- .../SanitizerCoverage/const-cmp-tracing.ll | 1 + .../SanitizerCoverage/coverage-dbg.ll | 3 +- .../SanitizerCoverage/coverage.ll | 4 +- .../SanitizerCoverage/coverage2-dbg.ll | 3 +- .../SanitizerCoverage/div-tracing.ll | 1 + .../SanitizerCoverage/gep-tracing.ll | 1 + .../SanitizerCoverage/inline-8bit-counters.ll | 1 + .../interposable-symbol-nocomdat.ll | 2 + .../SanitizerCoverage/no-func.ll | 1 + .../SanitizerCoverage/pc-table.ll | 2 + .../SanitizerCoverage/postdominator_check.ll | 2 + .../Instrumentation/SanitizerCoverage/seh.ll | 3 + ...stack-depth-variable-declared-by-user.ll.x | 2 + .../SanitizerCoverage/stack-depth.ll | 9 +- .../SanitizerCoverage/switch-tracing.ll | 1 + .../trace-pc-guard-comdat.ll | 1 + .../trace-pc-guard-inline-8bit-counters.ll | 1 + .../trace-pc-guard-nocomdat.ll | 1 + .../SanitizerCoverage/tracing-comdat.ll | 3 + .../SanitizerCoverage/tracing.ll | 4 + .../SanitizerCoverage/unreachable-critedge.ll | 1 + .../SanitizerCoverage/wineh.ll | 7 +- .../ThreadSanitizer/tsan_basic.ll | 2 +- llvm/test/JitListener/multiple.ll | 2 +- llvm/test/JitListener/simple.ll | 2 +- llvm/test/LTO/ARM/lto-linking-metadata.ll | 19 + .../test/LTO/Resolution/X86/Inputs/alias-1.ll | 2 +- .../Resolution/X86/Inputs/alias-alias-1.ll | 2 +- .../Resolution/X86/Inputs/appending-var-2.ll | 2 +- .../Resolution/X86/Inputs/comdat-mixed-lto.ll | 2 +- llvm/test/LTO/Resolution/X86/Inputs/comdat.ll | 2 +- .../test/LTO/Resolution/X86/Inputs/common2.ll | 2 +- .../test/LTO/Resolution/X86/Inputs/commons.ll | 2 +- .../Resolution/X86/Inputs/dead-strip-alias.ll | 2 +- .../X86/Inputs/dead-strip-fulllto.ll | 2 +- llvm/test/LTO/Resolution/X86/Inputs/ifunc2.ll | 6 + .../LTO/Resolution/X86/Inputs/intrinsic.ll | 2 +- .../X86/Inputs/link-odr-availextern-ae.ll | 2 +- .../X86/Inputs/link-odr-availextern-odr.ll | 2 +- .../LTO/Resolution/X86/Inputs/mixed_lto.ll | 2 +- .../LTO/Resolution/X86/Inputs/mod-asm-used.ll | 2 +- .../Resolution/X86/Inputs/not-prevailing.ll | 2 +- llvm/test/LTO/Resolution/X86/alias-alias.ll | 2 +- llvm/test/LTO/Resolution/X86/alias.ll | 2 +- llvm/test/LTO/Resolution/X86/appending-var.ll | 2 +- llvm/test/LTO/Resolution/X86/asm-output.ll | 2 +- .../Resolution/X86/available-externally.ll | 2 +- .../LTO/Resolution/X86/cache-dso-local.ll | 2 +- .../LTO/Resolution/X86/cache-dso-local2.ll | 2 +- .../LTO/Resolution/X86/cache-prevailing.ll | 2 +- .../LTO/Resolution/X86/comdat-mixed-lto.ll | 2 +- llvm/test/LTO/Resolution/X86/comdat.ll | 4 +- llvm/test/LTO/Resolution/X86/common2.ll | 2 +- llvm/test/LTO/Resolution/X86/commons.ll | 2 +- .../LTO/Resolution/X86/dead-strip-alias.ll | 2 +- .../LTO/Resolution/X86/dead-strip-fulllto.ll | 2 +- ...diagnostic-handler-remarks-with-hotness.ll | 2 +- .../X86/diagnostic-handler-remarks.ll | 2 +- .../LTO/Resolution/X86/export-jumptable.ll | 2 +- .../X86/function-alias-non-prevailing.ll | 2 +- llvm/test/LTO/Resolution/X86/ifunc.ll | 4 +- llvm/test/LTO/Resolution/X86/ifunc2.ll | 19 + llvm/test/LTO/Resolution/X86/intrinsic.ll | 2 +- .../Resolution/X86/link-odr-availextern.ll | 2 +- .../LTO/Resolution/X86/linker-redef-thin.ll | 2 +- llvm/test/LTO/Resolution/X86/linker-redef.ll | 2 +- llvm/test/LTO/Resolution/X86/linkonce.ll | 2 +- .../Resolution/X86/load-sample-prof-icp.ll | 2 +- .../Resolution/X86/load-sample-prof-lto.ll | 2 +- .../LTO/Resolution/X86/load-sample-prof.ll | 2 +- .../LTO/Resolution/X86/local-def-dllimport.ll | 2 +- .../test/LTO/Resolution/X86/lowertypetests.ll | 2 +- .../test/LTO/Resolution/X86/lto-unit-check.ll | 2 +- llvm/test/LTO/Resolution/X86/mixed_lto.ll | 2 +- llvm/test/LTO/Resolution/X86/mod-asm-used.ll | 2 +- llvm/test/LTO/Resolution/X86/multi-thinlto.ll | 2 +- .../Resolution/X86/not-prevailing-alias.ll | 2 +- .../X86/not-prevailing-variables.ll | 2 +- .../X86/not-prevailing-weak-aliasee.ll | 33 + .../test/LTO/Resolution/X86/not-prevailing.ll | 2 +- .../LTO/Resolution/X86/setting-dso-local.ll | 2 +- llvm/test/LTO/Resolution/X86/symtab-elf.ll | 2 +- .../LTO/Resolution/X86/type-checked-load.ll | 2 +- llvm/test/LTO/X86/Inputs/codemodel-3.ll | 2 +- llvm/test/LTO/X86/Inputs/dllimport.ll | 2 +- .../X86/Inputs/remangle_intrinsics_tbaa.ll | 2 +- llvm/test/LTO/X86/Inputs/start-lib1.ll | 8 + llvm/test/LTO/X86/Inputs/start-lib2.ll | 6 + .../LTO/X86/Inputs/strip-debug-info-bar.ll | 2 +- llvm/test/LTO/X86/Inputs/type-mapping-bug2.ll | 2 +- llvm/test/LTO/X86/Inputs/type-mapping-bug3.ll | 26 + llvm/test/LTO/X86/codemodel-1.ll | 2 +- llvm/test/LTO/X86/codemodel-2.ll | 2 +- llvm/test/LTO/X86/codemodel-3.ll | 2 +- llvm/test/LTO/X86/current-section.ll | 2 +- ...diagnostic-handler-remarks-with-hotness.ll | 2 +- .../LTO/X86/diagnostic-handler-remarks.ll | 2 +- llvm/test/LTO/X86/disable-verify.ll | 2 +- llvm/test/LTO/X86/dllimport.ll | 2 +- llvm/test/LTO/X86/embed-bitcode.ll | 28 + llvm/test/LTO/X86/internalize.ll | 2 +- .../X86/no-undefined-puts-when-implemented.ll | 2 +- llvm/test/LTO/X86/objc-detection-i386.ll | 2 +- llvm/test/LTO/X86/objc-detection.ll | 4 +- llvm/test/LTO/X86/parallel.ll | 2 +- llvm/test/LTO/X86/pr38046.ll | 2 +- llvm/test/LTO/X86/private-symbol.ll | 2 +- llvm/test/LTO/X86/remangle_intrinsics_tbaa.ll | 2 +- llvm/test/LTO/X86/set-merged.ll | 2 +- .../LTO/X86/strip-debug-info-no-call-loc.ll | 2 +- llvm/test/LTO/X86/strip-debug-info.ll | 2 +- llvm/test/LTO/X86/symver-asm.ll | 2 +- llvm/test/LTO/X86/symver-asm2.ll | 2 +- llvm/test/LTO/X86/symver-asm3.ll | 2 +- llvm/test/LTO/X86/tailcallelim.ll | 2 +- llvm/test/LTO/X86/triple-init.ll | 2 +- llvm/test/LTO/X86/triple-init2.ll | 2 +- llvm/test/LTO/X86/type-mapping-bug2.ll | 2 +- llvm/test/LTO/X86/type-mapping-bug3.ll | 47 + llvm/test/Linker/Inputs/module-max-warn.ll | 3 + .../Linker/Inputs/thinlto_funcimport_debug.ll | 4 +- .../Inputs/type-unique-inheritance-a.ll | 2 +- .../Inputs/type-unique-inheritance-b.ll | 6 +- .../Linker/Inputs/type-unique-simple2-a.ll | 2 +- .../Linker/Inputs/type-unique-simple2-b.ll | 6 +- llvm/test/Linker/addrspace.ll | 12 + llvm/test/Linker/comdat16.ll | 2 +- llvm/test/Linker/comdat8.ll | 4 +- llvm/test/Linker/datalayout.ll | 7 +- llvm/test/Linker/module-max-warn.ll | 12 + llvm/test/Linker/thinlto_funcimport_debug.ll | 2 +- llvm/test/Linker/type-unique-simple-a.ll | 2 +- llvm/test/Linker/type-unique-simple-b.ll | 6 +- llvm/test/Linker/type-unique-type-array-a.ll | 4 +- llvm/test/Linker/type-unique-type-array-b.ll | 4 +- llvm/test/MC/AArch64/coff-debug.ll | 2 +- llvm/test/MC/ARM/elf-execute-only-section.ll | 4 +- llvm/test/MC/ARM/inline-asm-srcloc.ll | 2 +- llvm/test/MC/COFF/cv-compiler-info.ll | 2 +- llvm/test/MC/ELF/cfi-version.ll | 4 +- llvm/test/MC/ELF/section-relro.ll | 65 + llvm/test/MC/Hexagon/extended_relocations.ll | 4 +- llvm/test/MC/MachO/cstexpr-gotpcrel-32.ll | 37 +- llvm/test/MC/MachO/tlv-bss.ll | 2 +- llvm/test/MC/WebAssembly/assembler-binary.ll | 4 +- llvm/test/MC/WebAssembly/comdat.ll | 5 +- .../data-symbol-in-text-section.ll | 13 + llvm/test/MC/WebAssembly/debug-info.ll | 36 +- llvm/test/MC/WebAssembly/dwarfdump.ll | 2 +- llvm/test/MC/WebAssembly/event-section.ll | 5 +- .../MC/WebAssembly/external-func-address.ll | 5 +- llvm/test/MC/WebAssembly/libcall.ll | 5 +- llvm/test/MC/WebAssembly/no-dead-strip.ll | 4 +- llvm/test/MC/WebAssembly/types.ll | 23 +- llvm/test/MC/X86/stackmap-nops.ll | 4 +- llvm/test/Other/2010-05-06-Printer.ll | 1 - llvm/test/Other/attribute-comment.ll | 2 +- llvm/test/Other/cgscc-devirt-iteration.ll | 2 +- llvm/test/Other/cgscc-libcall-update.ll | 6 +- llvm/test/Other/invariant.group.ll | 4 +- llvm/test/Other/lint.ll | 5 +- llvm/test/Other/module-pass-printer.ll | 18 + llvm/test/Other/new-pass-manager.ll | 2 + llvm/test/Other/new-pm-defaults.ll | 98 +- llvm/test/Other/new-pm-lto-defaults.ll | 11 +- llvm/test/Other/new-pm-pgo-O0.ll | 21 + llvm/test/Other/new-pm-pr42726-cgscc.ll | 57 + llvm/test/Other/new-pm-thinlto-defaults.ll | 57 +- llvm/test/Other/opt-O0-pipeline.ll | 3 +- llvm/test/Other/opt-O2-pipeline.ll | 30 +- llvm/test/Other/opt-O3-pipeline.ll | 30 +- llvm/test/Other/opt-Os-pipeline.ll | 30 +- llvm/test/Other/opt-override-mcpu-mattr.ll | 16 +- llvm/test/Other/pass-pipelines.ll | 3 +- llvm/test/Other/print-module-scope.ll | 4 +- llvm/test/Other/print-slotindexes.ll | 12 + llvm/test/Other/printer.ll | 35 +- llvm/test/Other/scalable-vectors-core-ir.ll | 393 + llvm/test/Other/unroll-sroa.ll | 61 + llvm/test/Reduce/remove-args.ll | 13 + llvm/test/Reduce/remove-bbs.ll | 29 + llvm/test/Reduce/remove-funcs.ll | 27 + llvm/test/Reduce/remove-global-vars.ll | 25 + llvm/test/Reduce/remove-instructions.ll | 23 + llvm/test/Reduce/remove-metadata.ll | 20 + .../test/ThinLTO/X86/Inputs/alias_internal.ll | 2 +- llvm/test/ThinLTO/X86/Inputs/cache-icall.ll | 2 +- .../ThinLTO/X86/Inputs/cache-import-lists1.ll | 2 +- .../ThinLTO/X86/Inputs/cache-import-lists2.ll | 2 +- .../Inputs/cache-typeid-resolutions-import.ll | 2 +- .../X86/Inputs/cache-typeid-resolutions1.ll | 2 +- .../X86/Inputs/cache-typeid-resolutions2.ll | 2 +- .../X86/Inputs/cache-typeid-resolutions3.ll | 2 +- llvm/test/ThinLTO/X86/Inputs/cache.ll | 2 +- .../ThinLTO/X86/Inputs/callees-metadata.ll | 2 +- .../ThinLTO/X86/Inputs/cfi-distributed.ll | 2 +- .../X86/Inputs/cfi-icall-only-bazqux.ll | 2 +- .../ThinLTO/X86/Inputs/crash_debuginfo.ll | 2 +- llvm/test/ThinLTO/X86/Inputs/deadstrip.ll | 2 +- .../Inputs/debuginfo-compositetype-import.ll | 2 +- .../ThinLTO/X86/Inputs/debuginfo-cu-import.ll | 2 +- llvm/test/ThinLTO/X86/Inputs/devirt2.ll | 59 + llvm/test/ThinLTO/X86/Inputs/devirt_alias.ll | 15 + .../X86/Inputs/devirt_available_externally.ll | 14 + .../devirt_external_comdat_same_guid.ll | 43 + .../X86/Inputs/devirt_local_same_guid.ll | 41 + .../test/ThinLTO/X86/Inputs/devirt_promote.ll | 39 + .../X86/Inputs/devirt_single_hybrid_bar.ll | 58 + .../X86/Inputs/devirt_single_hybrid_foo.ll | 35 + .../X86/Inputs/diagnostic-handler-remarks.ll | 2 +- .../Inputs/dicompositetype-unique-alias.ll | 2 +- .../X86/Inputs/dicompositetype-unique.ll | 2 +- .../X86/Inputs/dicompositetype-unique2.ll | 2 +- llvm/test/ThinLTO/X86/Inputs/dot-dumper.ll | 2 +- llvm/test/ThinLTO/X86/Inputs/dot-dumper2.ll | 2 +- .../ThinLTO/X86/Inputs/drop-debug-info.ll | 2 +- .../ThinLTO/X86/Inputs/dsolocal_dllimport.ll | 2 +- llvm/test/ThinLTO/X86/Inputs/emit_imports.ll | 2 +- llvm/test/ThinLTO/X86/Inputs/emit_imports2.ll | 2 +- llvm/test/ThinLTO/X86/Inputs/empty.ll | 2 +- .../X86/Inputs/empty_module_with_cache.ll | 2 +- llvm/test/ThinLTO/X86/Inputs/export.ll | 2 +- .../ThinLTO/X86/Inputs/funcimport-debug.ll | 2 +- .../ThinLTO/X86/Inputs/funcimport-tbaa.ll | 2 +- llvm/test/ThinLTO/X86/Inputs/funcimport.ll | 2 +- llvm/test/ThinLTO/X86/Inputs/funcimport2.ll | 2 +- .../X86/Inputs/funcimport_alwaysinline.ll | 10 + .../X86/Inputs/function_entry_count.ll | 2 +- .../X86/Inputs/globals-import-blockaddr.ll | 2 +- .../X86/Inputs/globals-import-cf-baz.ll | 2 +- .../test/ThinLTO/X86/Inputs/globals-import.ll | 2 +- .../test/ThinLTO/X86/Inputs/guid_collision.ll | 15 + .../ThinLTO/X86/Inputs/import_opaque_type.ll | 2 +- .../X86/Inputs/index-const-prop-alias.ll | 2 +- .../X86/Inputs/index-const-prop-cache-foo.ll | 2 +- .../Inputs/index-const-prop-cache-test1.ll | 2 +- .../Inputs/index-const-prop-cache-test2.ll | 2 +- .../X86/Inputs/index-const-prop-comdat.ll | 2 +- .../X86/Inputs/index-const-prop-define-g.ll | 2 +- .../X86/Inputs/index-const-prop-full-lto.ll | 2 +- .../X86/Inputs/index-const-prop-gvref.ll | 2 +- .../X86/Inputs/index-const-prop-linkage.ll | 2 +- .../ThinLTO/X86/Inputs/index-const-prop.ll | 2 +- llvm/test/ThinLTO/X86/Inputs/internalize.ll | 6 + .../ThinLTO/X86/Inputs/lazyload_metadata.ll | 2 +- .../X86/Inputs/linkonce_aliasee_ref_import.ll | 2 +- .../X86/Inputs/linkonce_odr_unnamed_addr.ll | 2 +- .../X86/Inputs/linkonce_resolution_comdat.ll | 2 +- llvm/test/ThinLTO/X86/Inputs/llvm.used.ll | 2 +- .../X86/Inputs/local_name_conflict1.ll | 2 +- .../X86/Inputs/local_name_conflict2.ll | 2 +- .../X86/Inputs/local_name_conflict_var1.ll | 2 +- .../X86/Inputs/local_name_conflict_var2.ll | 2 +- llvm/test/ThinLTO/X86/Inputs/merge-triple.ll | 2 +- llvm/test/ThinLTO/X86/Inputs/module_asm.ll | 2 +- llvm/test/ThinLTO/X86/Inputs/module_asm2.ll | 2 +- llvm/test/ThinLTO/X86/Inputs/noinline.ll | 2 +- .../ThinLTO/X86/Inputs/personality-local.ll | 2 +- llvm/test/ThinLTO/X86/Inputs/personality.ll | 2 +- llvm/test/ThinLTO/X86/Inputs/pr35472.ll | 2 +- .../X86/Inputs/reference_non_importable.ll | 2 +- .../X86/Inputs/referenced_by_constant.ll | 2 +- .../X86/Inputs/strong_non_prevailing.ll | 2 +- .../thinlto-internalize-doublepromoted.ll | 10 + .../X86/Inputs/thinlto-internalize-used2.ll | 2 +- .../ThinLTO/X86/Inputs/weak_resolution.ll | 2 +- .../ThinLTO/X86/Inputs/writeonly-with-refs.ll | 17 + llvm/test/ThinLTO/X86/alias_internal.ll | 2 +- llvm/test/ThinLTO/X86/autoupgrade.ll | 2 +- llvm/test/ThinLTO/X86/builtin-nostrip.ll | 2 +- llvm/test/ThinLTO/X86/cache-config.ll | 2 +- llvm/test/ThinLTO/X86/cache-icall.ll | 2 +- llvm/test/ThinLTO/X86/cache-import-lists.ll | 2 +- .../ThinLTO/X86/cache-typeid-resolutions.ll | 2 +- llvm/test/ThinLTO/X86/cache.ll | 2 +- llvm/test/ThinLTO/X86/callees-metadata.ll | 2 +- llvm/test/ThinLTO/X86/cfi-devirt.ll | 2 +- llvm/test/ThinLTO/X86/cfi-distributed.ll | 2 +- .../test/ThinLTO/X86/cfi-icall-only-defuse.ll | 2 +- llvm/test/ThinLTO/X86/cfi-icall.ll | 2 +- llvm/test/ThinLTO/X86/cfi.ll | 2 +- llvm/test/ThinLTO/X86/crash_debuginfo.ll | 2 +- llvm/test/ThinLTO/X86/deadstrip.ll | 2 +- .../X86/debuginfo-compositetype-import.ll | 2 +- llvm/test/ThinLTO/X86/debuginfo-cu-import.ll | 2 +- llvm/test/ThinLTO/X86/devirt-after-icp.ll | 2 +- llvm/test/ThinLTO/X86/devirt.ll | 58 +- llvm/test/ThinLTO/X86/devirt2.ll | 278 + llvm/test/ThinLTO/X86/devirt_alias.ll | 58 + .../X86/devirt_available_externally.ll | 72 + .../X86/devirt_external_comdat_same_guid.ll | 87 + .../ThinLTO/X86/devirt_local_same_guid.ll | 74 + llvm/test/ThinLTO/X86/devirt_promote.ll | 72 + .../test/ThinLTO/X86/devirt_promote_legacy.ll | 57 + llvm/test/ThinLTO/X86/devirt_single_hybrid.ll | 55 + ...diagnostic-handler-remarks-with-hotness.ll | 2 +- .../ThinLTO/X86/diagnostic-handler-remarks.ll | 15 +- .../X86/dicompositetype-unique-alias.ll | 2 +- .../ThinLTO/X86/dicompositetype-unique.ll | 2 +- .../ThinLTO/X86/dicompositetype-unique2.ll | 2 +- llvm/test/ThinLTO/X86/distributed_import.ll | 2 +- llvm/test/ThinLTO/X86/dot-dumper-full-lto.ll | 2 +- llvm/test/ThinLTO/X86/dot-dumper.ll | 10 +- llvm/test/ThinLTO/X86/dot-dumper2.ll | 4 +- llvm/test/ThinLTO/X86/drop-debug-info.ll | 2 +- llvm/test/ThinLTO/X86/dsolocal_dllimport.ll | 2 +- llvm/test/ThinLTO/X86/emit_imports.ll | 2 +- llvm/test/ThinLTO/X86/empty-module.ll | 2 +- .../ThinLTO/X86/empty_module_with_cache.ll | 2 +- llvm/test/ThinLTO/X86/export.ll | 2 +- llvm/test/ThinLTO/X86/funcimport-debug.ll | 2 +- llvm/test/ThinLTO/X86/funcimport-stats.ll | 21 + llvm/test/ThinLTO/X86/funcimport-tbaa.ll | 2 +- llvm/test/ThinLTO/X86/funcimport.ll | 2 +- llvm/test/ThinLTO/X86/funcimport2.ll | 2 +- .../ThinLTO/X86/funcimport_alwaysinline.ll | 26 + llvm/test/ThinLTO/X86/function_entry_count.ll | 2 +- .../ThinLTO/X86/globals-import-blockaddr.ll | 2 +- .../ThinLTO/X86/globals-import-const-fold.ll | 2 +- llvm/test/ThinLTO/X86/globals-import.ll | 4 +- llvm/test/ThinLTO/X86/guid_collision.ll | 37 + llvm/test/ThinLTO/X86/import_opaque_type.ll | 2 +- llvm/test/ThinLTO/X86/index-const-prop-O0.ll | 2 +- .../ThinLTO/X86/index-const-prop-alias.ll | 2 +- .../ThinLTO/X86/index-const-prop-cache.ll | 2 +- .../ThinLTO/X86/index-const-prop-comdat.ll | 2 +- .../test/ThinLTO/X86/index-const-prop-dead.ll | 2 +- .../ThinLTO/X86/index-const-prop-full-lto.ll | 2 +- .../ThinLTO/X86/index-const-prop-gvref.ll | 2 +- .../test/ThinLTO/X86/index-const-prop-ldst.ll | 2 +- .../ThinLTO/X86/index-const-prop-linkage.ll | 2 +- llvm/test/ThinLTO/X86/index-const-prop.ll | 2 +- llvm/test/ThinLTO/X86/index-const-prop2.ll | 10 +- llvm/test/ThinLTO/X86/internalize.ll | 37 +- llvm/test/ThinLTO/X86/lazyload_metadata.ll | 21 +- .../X86/linkonce_aliasee_ref_import.ll | 2 +- .../ThinLTO/X86/linkonce_odr_unnamed_addr.ll | 2 +- .../ThinLTO/X86/linkonce_resolution_comdat.ll | 6 +- llvm/test/ThinLTO/X86/llvm.used.ll | 2 +- llvm/test/ThinLTO/X86/load-store-caching.ll | 2 +- llvm/test/ThinLTO/X86/local_name_conflict.ll | 4 +- .../ThinLTO/X86/local_name_conflict_var.ll | 2 +- llvm/test/ThinLTO/X86/merge-triple.ll | 2 +- llvm/test/ThinLTO/X86/module_asm2.ll | 2 +- llvm/test/ThinLTO/X86/module_asm_glob.ll | 2 +- .../X86/module_summary_graph_traits.ll | 2 +- llvm/test/ThinLTO/X86/newpm-basic.ll | 2 +- .../X86/nodevirt-nonpromoted-typeid.ll | 66 + llvm/test/ThinLTO/X86/noinline.ll | 2 +- llvm/test/ThinLTO/X86/not-internalized.ll | 34 + llvm/test/ThinLTO/X86/personality-local.ll | 2 +- llvm/test/ThinLTO/X86/personality.ll | 2 +- llvm/test/ThinLTO/X86/pr35472.ll | 2 +- llvm/test/ThinLTO/X86/printer.ll | 12 + .../ThinLTO/X86/reference_non_importable.ll | 2 +- .../ThinLTO/X86/referenced_by_constant.ll | 2 +- llvm/test/ThinLTO/X86/save_objects.ll | 2 +- .../test/ThinLTO/X86/strong_non_prevailing.ll | 2 +- .../X86/thinlto-internalize-doublepromoted.ll | 33 + .../ThinLTO/X86/thinlto-internalize-used.ll | 2 +- llvm/test/ThinLTO/X86/tli-nobuiltin.ll | 2 +- llvm/test/ThinLTO/X86/weak_externals.ll | 6 +- llvm/test/ThinLTO/X86/weak_resolution.ll | 2 +- llvm/test/ThinLTO/X86/writeonly-with-refs.ll | 38 + llvm/test/ThinLTO/X86/writeonly.ll | 13 +- llvm/test/ThinLTO/X86/writeonly2.ll | 6 +- llvm/test/Transforms/ADCE/2016-09-06.ll | 2 +- .../blocks-with-dead-term-nondeterministic.ll | 2 +- .../Transforms/AddDiscriminators/basic.ll | 2 +- .../AddDiscriminators/call-nested.ll | 4 +- .../test/Transforms/AddDiscriminators/call.ll | 4 +- .../Transforms/AddDiscriminators/diamond.ll | 4 +- .../AddDiscriminators/first-only.ll | 2 +- .../Transforms/AddDiscriminators/invoke.ll | 6 +- .../Transforms/AddDiscriminators/multiple.ll | 2 +- .../AddDiscriminators/no-discriminators.ll | 2 +- .../Transforms/AddDiscriminators/oneline.ll | 2 +- .../AggressiveInstCombine/popcount.ll | 193 + .../AlignmentFromAssumptions/simple.ll | 55 + .../2008-02-01-ReturnAttrs.ll | 19 +- .../2008-07-02-array-indexing.ll | 27 +- .../ArgumentPromotion/2008-09-07-CGUpdate.ll | 7 +- .../2008-09-08-CGUpdateSelfEdge.ll | 17 +- .../ArgumentPromotion/X86/attributes.ll | 41 +- .../X86/min-legal-vector-width.ll | 161 +- .../ArgumentPromotion/X86/thiscall.ll | 43 +- .../ArgumentPromotion/aggregate-promote.ll | 26 +- .../Transforms/ArgumentPromotion/attrs.ll | 52 +- .../Transforms/ArgumentPromotion/basictest.ll | 20 +- .../Transforms/ArgumentPromotion/byval-2.ll | 36 +- .../Transforms/ArgumentPromotion/byval.ll | 60 +- .../Transforms/ArgumentPromotion/chained.ll | 21 +- .../ArgumentPromotion/control-flow.ll | 21 +- .../ArgumentPromotion/control-flow2.ll | 33 +- .../Transforms/ArgumentPromotion/crash.ll | 51 +- llvm/test/Transforms/ArgumentPromotion/dbg.ll | 41 +- .../test/Transforms/ArgumentPromotion/fp80.ll | 50 +- .../Transforms/ArgumentPromotion/inalloca.ll | 32 +- .../ArgumentPromotion/invalidation.ll | 33 +- .../Transforms/ArgumentPromotion/musttail.ll | 38 +- .../ArgumentPromotion/naked_functions.ll | 15 +- .../nonzero-address-spaces.ll | 15 +- .../Transforms/ArgumentPromotion/pr27568.ll | 23 +- .../Transforms/ArgumentPromotion/pr3085.ll | 1329 +- .../Transforms/ArgumentPromotion/pr32917.ll | 18 +- .../pr33641_remove_arg_dbgvalue.ll | 18 +- .../Transforms/ArgumentPromotion/profile.ll | 12 +- .../ArgumentPromotion/reserve-tbaa.ll | 29 +- .../test/Transforms/ArgumentPromotion/sret.ll | 23 +- .../test/Transforms/ArgumentPromotion/tail.ll | 24 +- .../Transforms/ArgumentPromotion/variadic.ll | 14 +- .../AMDGPU/expand-atomic-rmw-fadd.ll | 145 + .../AMDGPU/expand-atomic-rmw-fsub.ll | 4 +- .../2008-02-01-ReturnAttrs.ll | 30 + .../2008-07-02-array-indexing.ll | 43 + .../ArgumentPromotion/2008-09-07-CGUpdate.ll | 13 + .../2008-09-08-CGUpdateSelfEdge.ll | 26 + .../ArgumentPromotion/X86/attributes.ll | 89 + .../X86/min-legal-vector-width.ll | 328 + .../ArgumentPromotion/X86/thiscall.ll | 73 + .../ArgumentPromotion/aggregate-promote.ll | 36 + .../Attributor/ArgumentPromotion/attrs.ll | 52 + .../Attributor/ArgumentPromotion/basictest.ll | 45 + .../Attributor/ArgumentPromotion/byval-2.ll | 47 + .../Attributor/ArgumentPromotion/byval.ll | 69 + .../Attributor/ArgumentPromotion/chained.ll | 31 + .../ArgumentPromotion/control-flow.ll | 38 + .../ArgumentPromotion/control-flow2.ll | 38 + .../Attributor/ArgumentPromotion/crash.ll | 94 + .../Attributor/ArgumentPromotion/dbg.ll | 51 + .../Attributor/ArgumentPromotion/fp80.ll | 72 + .../Attributor/ArgumentPromotion/inalloca.ll | 91 + .../ArgumentPromotion/invalidation.ll | 50 + .../live_called_from_dead.ll | 55 + .../Attributor/ArgumentPromotion/musttail.ll | 70 + .../ArgumentPromotion/naked_functions.ll | 34 + .../nonzero-address-spaces.ll | 35 + .../Attributor/ArgumentPromotion/pr27568.ll | 44 + .../Attributor/ArgumentPromotion/pr3085.ll | 1945 +++ .../Attributor/ArgumentPromotion/pr32917.ll | 35 + .../pr33641_remove_arg_dbgvalue.ll | 45 + .../Attributor/ArgumentPromotion/profile.ll | 34 + .../ArgumentPromotion/reserve-tbaa.ll | 65 + .../Attributor/ArgumentPromotion/sret.ll | 39 + .../Attributor/ArgumentPromotion/tail.ll | 29 + .../Attributor/ArgumentPromotion/variadic.ll | 38 + .../IPConstantProp/2008-06-09-WeakProp.ll | 25 + .../IPConstantProp/2009-09-24-byval-ptr.ll | 102 + .../Attributor/IPConstantProp/PR16052.ll | 34 + .../Attributor/IPConstantProp/PR26044.ll | 88 + .../Attributor/IPConstantProp/PR43857.ll | 30 + .../IPConstantProp/arg-count-mismatch.ll | 85 + .../IPConstantProp/arg-type-mismatch.ll | 25 + .../Attributor/IPConstantProp/comdat-ipo.ll | 34 + .../IPConstantProp/dangling-block-address.ll | 43 + .../Attributor/IPConstantProp/deadarg.ll | 7 + .../IPConstantProp/fp-bc-icmp-const-fold.ll | 78 + .../Attributor/IPConstantProp/global.ll | 40 + .../IPConstantProp/multiple_callbacks.ll | 117 + .../IPConstantProp/musttail-call.ll | 79 + .../Attributor/IPConstantProp/naked-return.ll | 47 + .../IPConstantProp/openmp_parallel_for.ll | 132 + .../Attributor/IPConstantProp/pthreads.ll | 96 + .../Attributor/IPConstantProp/recursion.ll | 19 + .../IPConstantProp/remove-call-inst.ll | 45 + .../IPConstantProp/return-argument.ll | 96 + .../IPConstantProp/return-constant.ll | 55 + .../IPConstantProp/return-constants.ll | 69 + ...fter-each-resolving-undefs-for-function.ll | 65 + .../IPConstantProp/thread_local_acs.ll | 53 + llvm/test/Transforms/Attributor/align.ll | 412 + llvm/test/Transforms/Attributor/callbacks.ll | 66 + .../Attributor/dereferenceable-1.ll | 312 + .../Attributor/dereferenceable-2.ll | 356 + .../Transforms/Attributor/heap_to_stack.ll | 402 + .../Transforms/Attributor/internal-noalias.ll | 48 + llvm/test/Transforms/Attributor/liveness.ll | 948 ++ .../Attributor/lvi-after-jumpthreading.ll | 181 + .../Transforms/Attributor/lvi-for-ashr.ll | 46 + llvm/test/Transforms/Attributor/misc.ll | 105 + .../Transforms/Attributor/new_attributes.ll | 43 + llvm/test/Transforms/Attributor/noalias.ll | 284 + .../test/Transforms/Attributor/nocapture-1.ll | 346 + .../test/Transforms/Attributor/nocapture-2.ll | 471 + llvm/test/Transforms/Attributor/nofree.ll | 243 + llvm/test/Transforms/Attributor/nonnull.ll | 840 ++ llvm/test/Transforms/Attributor/norecurse.ll | 147 + llvm/test/Transforms/Attributor/noreturn.ll | 149 + .../Transforms/Attributor/noreturn_async.ll | 142 + .../Transforms/Attributor/noreturn_sync.ll | 138 + llvm/test/Transforms/Attributor/nosync.ll | 319 + llvm/test/Transforms/Attributor/nounwind.ll | 98 + llvm/test/Transforms/Attributor/range.ll | 504 + .../read_write_returned_arguments_scc.ll | 165 + llvm/test/Transforms/Attributor/readattrs.ll | 197 + llvm/test/Transforms/Attributor/returned.ll | 812 ++ .../Attributor/undefined_behavior.ll | 305 + .../Transforms/Attributor/value-simplify.ll | 316 + llvm/test/Transforms/Attributor/willreturn.ll | 482 + llvm/test/Transforms/BDCE/pr41925.ll | 126 + .../Transforms/BlockExtractor/invalid-line.ll | 9 + .../ARM/bitreverse-recognize.ll | 2 +- .../ARM/sink-add-mul-shufflevector.ll | 219 + .../CodeGenPrepare/ARM/sinkchain.ll | 107 + .../PowerPC/split-store-alignment.ll | 111 + .../X86/memset_chk-simplify-nobuiltin.ll | 7 +- .../CodeGenPrepare/X86/overflow-intrinsics.ll | 20 - .../CodeGenPrepare/X86/sink-addrmode.ll | 41 + .../X86/split-store-alignment.ll | 74 + .../CodeGenPrepare/bitreverse-hang.ll | 2 +- .../Transforms/CodeGenPrepare/dom-tree.ll | 2 +- .../CodeGenPrepare/sink-shift-and-trunc.ll | 2 +- .../Transforms/ConstProp/InsertElement.ll | 33 +- llvm/test/Transforms/ConstProp/fma.ll | 246 + .../test/Transforms/ConstProp/overflow-ops.ll | 13 +- .../AArch64/const-hoist-intrinsics.ll | 89 + .../AArch64/consthoist-unreachable.ll | 64 + .../Transforms/ConstantHoisting/X86/ehpad.ll | 4 +- .../X86/pr43903-not-all-uses-rebased.ll | 51 + .../Coroutines/coro-alloc-with-param.ll | 96 + llvm/test/Transforms/Coroutines/coro-debug.ll | 10 +- llvm/test/Transforms/Coroutines/coro-early.ll | 2 +- .../Transforms/Coroutines/coro-param-copy.ll | 69 + .../Coroutines/coro-retcon-alloca.ll | 247 + .../Coroutines/coro-retcon-once-value.ll | 114 + .../Coroutines/coro-retcon-once-value2.ll | 71 + .../Coroutines/coro-retcon-resume-values.ll | 85 + .../Coroutines/coro-retcon-resume-values2.ll | 100 + .../Coroutines/coro-retcon-value.ll | 103 + .../test/Transforms/Coroutines/coro-retcon.ll | 113 + .../Transforms/Coroutines/coro-split-dbg.ll | 8 +- .../Coroutines/coro-split-musttail.ll | 27 +- .../Coroutines/coro-split-musttail1.ll | 104 + .../Transforms/Coroutines/coro-swifterror.ll | 143 + .../CorrelatedValuePropagation/and.ll | 127 + .../CorrelatedValuePropagation/icmp.ll | 381 +- .../CorrelatedValuePropagation/mul.ll | 175 + .../CorrelatedValuePropagation/overflows.ll | 587 +- .../phi-common-val.ll | 26 + .../CorrelatedValuePropagation/range.ll | 3 +- .../CorrelatedValuePropagation/select.ll | 163 +- .../CorrelatedValuePropagation/sext.ll | 135 + .../CorrelatedValuePropagation/shl.ll | 378 + llvm/test/Transforms/DCE/calls-errno.ll | 17 +- llvm/test/Transforms/DCE/dbg-value-removal.ll | 112 + .../DeadArgElim/2010-04-30-DbgInfo.ll | 2 +- llvm/test/Transforms/DeadArgElim/dbginfo.ll | 4 +- .../Transforms/DeadArgElim/naked_functions.ll | 2 +- .../DeleteThrowableInst.ll | 41 + .../DeadStoreElimination/libcalls.ll | 10 +- .../DeadStoreElimination/libcalls2.ll | 14 + .../mda-with-dbg-values.ll | 2 +- .../Transforms/DeadStoreElimination/simple.ll | 4 +- .../DivRemPairs/X86/div-expanded-rem-pair.ll | 60 +- .../DivRemPairs/X86/div-rem-pairs.ll | 8 +- llvm/test/Transforms/EarlyCSE/commute.ll | 85 +- .../Transforms/EarlyCSE/debug-info-undef.ll | 2 +- .../test/Transforms/EarlyCSE/floatingpoint.ll | 12 + llvm/test/Transforms/EarlyCSE/writeonly.ll | 15 + .../EntryExitInstrumenter/mcount.ll | 4 +- .../Transforms/ExpandMemCmp/X86/memcmp.ll | 140 +- llvm/test/Transforms/Float2Int/basic.ll | 412 +- .../Transforms/FunctionAttrs/arg_returned.ll | 176 +- .../Transforms/FunctionAttrs/comdat-ipo.ll | 4 +- .../FunctionAttrs/naked_functions.ll | 2 +- .../Transforms/FunctionAttrs/nocapture.ll | 122 +- .../FunctionAttrs/nofree-attributor.ll | 75 +- llvm/test/Transforms/FunctionAttrs/nonnull.ll | 559 +- .../Transforms/FunctionAttrs/norecurse.ll | 4 +- .../test/Transforms/FunctionAttrs/nounwind.ll | 20 +- .../Transforms/FunctionAttrs/readattrs.ll | 12 +- .../test/Transforms/FunctionAttrs/readnone.ll | 8 +- .../Transforms/FunctionAttrs/writeonly.ll | 30 + .../FunctionImport/Inputs/comdat.ll | 2 +- .../FunctionImport/Inputs/funcimport_debug.ll | 4 +- .../Inputs/funcimport_resolved1.ll | 2 +- .../Inputs/funcimport_resolved2.ll | 2 +- .../FunctionImport/Inputs/funcimport_var2.ll | 2 +- .../Inputs/hotness_based_import.ll | 2 +- .../FunctionImport/Inputs/import_stats.ll | 2 +- .../FunctionImport/Inputs/not-prevailing.ll | 2 +- llvm/test/Transforms/FunctionImport/comdat.ll | 2 +- .../FunctionImport/funcimport_debug.ll | 6 +- .../FunctionImport/funcimport_resolved.ll | 2 +- .../FunctionImport/funcimport_var.ll | 2 +- .../FunctionImport/hotness_based_import.ll | 2 +- .../Transforms/FunctionImport/import_stats.ll | 2 +- .../FunctionImport/not-prevailing.ll | 2 +- .../Transforms/GCOVProfiling/global-ctor.ll.x | 2 +- .../test/Transforms/GCOVProfiling/linezero.ll | 4 +- .../GCOVProfiling/return-block.ll.x | 4 +- .../2011-06-01-NonLocalMemdepMiscompile.ll | 10 +- llvm/test/Transforms/GVN/PRE/rle.ll | 125 + llvm/test/Transforms/GVN/cond_br2.ll | 6 +- llvm/test/Transforms/GVN/edge.ll | 69 + llvm/test/Transforms/GVN/equality-assume.ll | 168 + llvm/test/Transforms/GVN/pr42605.ll | 87 + llvm/test/Transforms/GVN/preserve-analysis.ll | 56 + .../GVNHoist/hoist-unsafe-pr31729.ll | 2 +- llvm/test/Transforms/GVNHoist/pr30499.ll | 4 +- .../GlobalDCE/virtual-functions-base-call.ll | 78 + .../virtual-functions-base-pointer-call.ll | 118 + .../virtual-functions-derived-call.ll | 78 + .../virtual-functions-derived-pointer-call.ll | 120 + .../virtual-functions-visibility-post-lto.ll | 95 + .../virtual-functions-visibility-pre-lto.ll | 94 + .../Transforms/GlobalDCE/virtual-functions.ll | 55 + llvm/test/Transforms/GlobalDCE/vtable-rtti.ll | 47 + llvm/test/Transforms/GlobalOpt/atomic.ll | 8 +- .../Transforms/GlobalOpt/large-int-crash.ll | 23 + .../GlobalOpt/long-compilation-global-sra.ll | 61 + .../Transforms/GlobalOpt/naked_functions.ll | 2 +- .../basic_widenable_condition_guards.ll | 143 +- .../Transforms/GuardWidening/mixed_guards.ll | 3 +- .../HardwareLoops/ARM/calls-codegen.ll | 5 +- .../Transforms/HardwareLoops/ARM/calls.ll | 13 +- .../Transforms/HardwareLoops/ARM/counter.ll | 2 +- .../Transforms/HardwareLoops/ARM/do-rem.ll | 2 +- .../HardwareLoops/ARM/fp-emulation.ll | 4 +- .../Transforms/HardwareLoops/ARM/simple-do.ll | 5 +- .../Transforms/HardwareLoops/ARM/structure.ll | 25 +- .../assumption-cache-invalidation.ll | 50 + llvm/test/Transforms/HotColdSplit/noreturn.ll | 20 + .../Transforms/HotColdSplit/retain-section.ll | 28 + .../IPConstantProp/2008-06-09-WeakProp.ll | 4 +- .../IPConstantProp/2009-09-24-byval-ptr.ll | 6 +- .../test/Transforms/IPConstantProp/PR16052.ll | 19 +- .../test/Transforms/IPConstantProp/PR26044.ll | 58 +- .../test/Transforms/IPConstantProp/PR43857.ll | 29 + .../IPConstantProp/arg-count-mismatch.ll | 5 +- .../IPConstantProp/multiple_callbacks.ll | 1 + .../Transforms/IPConstantProp/naked-return.ll | 2 +- .../Transforms/IPConstantProp/recursion.ll | 5 +- .../IPConstantProp/return-argument.ll | 7 +- .../IPConstantProp/return-constant.ll | 7 +- .../IPConstantProp/return-constants.ll | 7 +- ...fter-each-resolving-undefs-for-function.ll | 17 +- .../IPConstantProp/user-with-multiple-uses.ll | 3 - .../IRCE/ranges_of_different_types.ll | 8 +- .../test/Transforms/IRCE/rc-negative-bound.ll | 56 +- .../IndVarSimplify/eliminate-exit-no-dl.ll | 45 + .../IndVarSimplify/eliminate-exit.ll | 34 + .../IndVarSimplify/eliminate-trunc.ll | 2 +- .../IndVarSimplify/exit_value_tests.ll | 1 - .../IndVarSimplify/floating-point-iv.ll | 231 + .../IndVarSimplify/indvar-debug-value.ll | 92 + .../IndVarSimplify/indvar-debug-value2.ll | 90 + .../IndVarSimplify/lftr-dead-ivs.ll | 2 +- .../Transforms/IndVarSimplify/lftr-pr20680.ll | 6 +- .../Transforms/IndVarSimplify/lftr-reuse.ll | 2 +- llvm/test/Transforms/IndVarSimplify/lftr.ll | 31 +- .../loop-invariant-conditions.ll | 2 +- .../IndVarSimplify/loop-predication.ll | 858 ++ .../IndVarSimplify/loop_evaluate_1.ll | 41 +- .../IndVarSimplify/no-iv-rewrite.ll | 2 +- .../test/Transforms/IndVarSimplify/pr24356.ll | 2 +- .../test/Transforms/IndVarSimplify/pr38674.ll | 11 +- .../test/Transforms/IndVarSimplify/pr39673.ll | 2 +- .../IndVarSimplify/replace-loop-exit-folds.ll | 2 +- .../Transforms/IndVarSimplify/rlev-add-me.ll | 228 + .../IndVarSimplify/sink-from-preheader.ll | 2 +- .../Transforms/IndVarSimplify/widen-nsw.ll | 2 +- .../AMDGPU/address-space-id-funcs.ll | 55 + .../Transforms/InferFunctionAttrs/annotate.ll | 16 +- .../InferFunctionAttrs/dereferenceable.ll | 79 + llvm/test/Transforms/Inline/X86/switch.ll | 160 + .../Inline/alloca-dbgdeclare-merge.ll | 4 +- .../Inline/debug-info-duplicate-calls.ll | 6 +- .../Inline/inline-indirect-chain.ll | 55 + .../Inline/inline-skip-use-empty-alloca.ll | 71 + llvm/test/Transforms/Inline/inline-vla.ll | 4 +- llvm/test/Transforms/Inline/inline_invoke.ll | 2 +- .../test/Transforms/Inline/inline_minisize.ll | 2 +- .../Inline/no-inline-line-tables.ll | 99 + llvm/test/Transforms/Inline/noalias-calls.ll | 2 +- .../optimization-remarks-hotness-threshold.ll | 2 +- .../optimization-remarks-passed-yaml.ll | 2 +- .../Transforms/Inline/optimization-remarks.ll | 6 +- ...004-11-27-SetCCForCastLargerAndConstant.ll | 233 +- .../InstCombine/2008-12-17-SRemNegConstVec.ll | 7 +- .../2009-06-16-SRemDemandedBits.ll | 15 +- .../InstCombine/2010-01-28-NegativeSRem.ll | 15 +- .../AMDGPU/amdgcn-demanded-vector-elts.ll | 803 +- .../InstCombine/AMDGPU/amdgcn-intrinsics.ll | 10 +- .../test/Transforms/InstCombine/AMDGPU/tan.ll | 21 + .../Transforms/InstCombine/ARM/mve-v2i2v.ll | 330 + .../test/Transforms/InstCombine/ARM/strcmp.ll | 63 +- llvm/test/Transforms/InstCombine/PR37526.ll | 7 +- .../InstCombine/X86/2009-03-23-i80-fp80.ll | 20 + .../Transforms/InstCombine/X86/x86-avx2.ll | 21 +- .../Transforms/InstCombine/X86/x86-avx512.ll | 154 +- .../Transforms/InstCombine/X86/x86-bmi-tbm.ll | 132 + .../Transforms/InstCombine/X86/x86-f16c.ll | 4 +- .../Transforms/InstCombine/X86/x86-fma.ll | 4 +- .../Transforms/InstCombine/X86/x86-pack.ll | 9 +- .../Transforms/InstCombine/X86/x86-pshufb.ll | 95 +- .../Transforms/InstCombine/X86/x86-sse.ll | 80 +- .../Transforms/InstCombine/X86/x86-sse4a.ll | 20 +- .../InstCombine/X86/x86-vector-shifts.ll | 570 +- .../Transforms/InstCombine/X86/x86-vpermil.ll | 67 +- .../Transforms/InstCombine/X86/x86-xop.ll | 32 +- llvm/test/Transforms/InstCombine/abs-1.ll | 156 +- llvm/test/Transforms/InstCombine/add4.ll | 33 +- .../Transforms/InstCombine/addrspacecast.ll | 2 +- .../InstCombine/adjust-for-minmax.ll | 164 +- .../test/Transforms/InstCombine/align-addr.ll | 64 +- .../Transforms/InstCombine/and-or-icmps.ll | 145 +- llvm/test/Transforms/InstCombine/and.ll | 3 +- llvm/test/Transforms/InstCombine/and2.ll | 38 +- .../Transforms/InstCombine/apint-shift.ll | 7 +- llvm/test/Transforms/InstCombine/apint-sub.ll | 38 +- llvm/test/Transforms/InstCombine/assume.ll | 155 +- .../InstCombine/assume_inevitable.ll | 4 +- llvm/test/Transforms/InstCombine/bcopy.ll | 25 + .../InstCombine/bitcast-function.ll | 206 + .../InstCombine/bitcast-phi-uselistorder.ll | 33 + .../InstCombine/bitcast-vec-canon.ll | 31 + .../Transforms/InstCombine/bitreverse-hang.ll | 2 +- llvm/test/Transforms/InstCombine/bswap.ll | 115 +- .../builtin-dynamic-object-size.ll | 8 +- .../builtin-object-size-custom-dl.ll | 32 + .../InstCombine/builtin-object-size-ptr.ll | 61 +- .../test/Transforms/InstCombine/call-guard.ll | 3 +- ...etween-negative-and-positive-thresholds.ll | 479 + ...ern-between-zero-and-positive-threshold.ll | 399 + ...th-select-of-constant-threshold-pattern.ll | 134 + ...nt-low-bit-mask-and-icmp-eq-to-icmp-ule.ll | 2 +- ...nt-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll | 12 +- ...t-low-bit-mask-and-icmp-sge-to-icmp-sle.ll | 2 +- ...t-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll | 14 +- ...t-low-bit-mask-and-icmp-sle-to-icmp-sle.ll | 2 +- ...t-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll | 12 +- ...t-low-bit-mask-and-icmp-uge-to-icmp-ule.ll | 2 +- ...t-low-bit-mask-and-icmp-ugt-to-icmp-ugt.ll | 14 +- ...t-low-bit-mask-and-icmp-ule-to-icmp-ule.ll | 2 +- ...t-low-bit-mask-and-icmp-ult-to-icmp-ugt.ll | 12 +- .../Transforms/InstCombine/cast-mul-select.ll | 2 + llvm/test/Transforms/InstCombine/cast.ll | 1006 +- .../Transforms/InstCombine/clamp-to-minmax.ll | 157 +- .../InstCombine/compare-unescaped.ll | 108 +- ...e-length-signext-after-high-bit-extract.ll | 1139 ++ .../InstCombine/constant-fold-libfunc.ll | 2 +- llvm/test/Transforms/InstCombine/copysign.ll | 94 +- llvm/test/Transforms/InstCombine/cos-1.ll | 20 +- .../Transforms/InstCombine/debuginfo-dce.ll | 2 +- .../Transforms/InstCombine/deref-alloc-fns.ll | 229 + .../Transforms/InstCombine/disable-builtin.ll | 21 + .../Transforms/InstCombine/div-shift-crash.ll | 2 +- llvm/test/Transforms/InstCombine/div.ll | 9 +- .../InstCombine/do-not-clone-dbg-declare.ll | 144 + .../InstCombine/element-atomic-memintrins.ll | 66 +- .../InstCombine/expensive-combines.ll | 28 + .../Transforms/InstCombine/fabs-copysign.ll | 119 + .../InstCombine/fadd-fsub-factor.ll | 162 + llvm/test/Transforms/InstCombine/fadd.ll | 362 + llvm/test/Transforms/InstCombine/fast-math.ll | 2 +- llvm/test/Transforms/InstCombine/fdiv.ll | 71 + llvm/test/Transforms/InstCombine/fma.ll | 351 +- llvm/test/Transforms/InstCombine/fmul.ll | 180 +- .../Transforms/InstCombine/fortify-folding.ll | 131 +- llvm/test/Transforms/InstCombine/fpextend.ll | 302 +- llvm/test/Transforms/InstCombine/fptrunc.ll | 138 + llvm/test/Transforms/InstCombine/freeze.ll | 20 + llvm/test/Transforms/InstCombine/fsub.ll | 372 + .../Transforms/InstCombine/gc.relocate.ll | 11 + llvm/test/Transforms/InstCombine/gep-alias.ll | 15 + .../Transforms/InstCombine/gep-custom-dl.ll | 33 +- .../InstCombine/gep-inbounds-null.ll | 236 + .../test/Transforms/InstCombine/gep-vector.ll | 80 +- .../Transforms/InstCombine/getelementptr.ll | 865 +- .../high-bit-signmask-with-trunc.ll | 148 + .../InstCombine/high-bit-signmask.ll | 126 + ...n-out-of-bias-calculation-with-constant.ll | 98 + .../hoist-negation-out-of-bias-calculation.ll | 158 + llvm/test/Transforms/InstCombine/icmp-add.ll | 164 +- .../Transforms/InstCombine/icmp-custom-dl.ll | 10 +- .../InstCombine/icmp-div-constant.ll | 136 +- .../Transforms/InstCombine/icmp-shr-lt-gt.ll | 177 +- llvm/test/Transforms/InstCombine/icmp-sub.ll | 59 + llvm/test/Transforms/InstCombine/icmp-vec.ll | 19 +- llvm/test/Transforms/InstCombine/icmp.ll | 213 +- .../InstCombine/insert-extract-shuffle.ll | 161 + llvm/test/Transforms/InstCombine/intptr8.ll | 77 + .../Transforms/InstCombine/known-non-zero.ll | 46 +- .../InstCombine/lifetime-sanitizer.ll | 64 + .../InstCombine/limit-max-iterations.ll | 44 + .../InstCombine/load-bitcast-vec.ll | 46 + .../Transforms/InstCombine/load-bitcast64.ll | 63 +- llvm/test/Transforms/InstCombine/load-cmp.ll | 50 +- .../InstCombine/load-insert-store.ll | 98 + llvm/test/Transforms/InstCombine/load.ll | 48 + llvm/test/Transforms/InstCombine/log-pow.ll | 143 +- .../Transforms/InstCombine/logical-select.ll | 4 +- .../InstCombine/lower-dbg-declare.ll | 239 +- .../InstCombine/malloc-free-delete.ll | 157 +- .../InstCombine/mem-deref-bytes-addrspaces.ll | 22 + .../Transforms/InstCombine/mem-deref-bytes.ll | 163 + .../InstCombine/mem-par-metadata-memcpy.ll | 2 +- llvm/test/Transforms/InstCombine/memccpy.ll | 214 + llvm/test/Transforms/InstCombine/memchr.ll | 46 +- .../InstCombine/memcmp-constant-fold.ll | 13 +- llvm/test/Transforms/InstCombine/memcpy-1.ll | 12 +- .../InstCombine/memcpy-from-global.ll | 213 +- .../Transforms/InstCombine/memcpy-to-load.ll | 6 +- llvm/test/Transforms/InstCombine/memcpy.ll | 2 +- .../Transforms/InstCombine/memcpy_chk-1.ll | 34 +- .../Transforms/InstCombine/memmove_chk-1.ll | 21 +- llvm/test/Transforms/InstCombine/mempcpy.ll | 56 + llvm/test/Transforms/InstCombine/memrchr.ll | 55 + llvm/test/Transforms/InstCombine/memset-1.ll | 103 +- llvm/test/Transforms/InstCombine/memset.ll | 2 +- .../Transforms/InstCombine/memset_chk-1.ll | 73 +- .../Transforms/InstCombine/minmax-fold.ll | 196 +- llvm/test/Transforms/InstCombine/minmax-fp.ll | 90 +- .../InstCombine/minmax-of-minmax.ll | 326 + llvm/test/Transforms/InstCombine/mul.ll | 90 + llvm/test/Transforms/InstCombine/not.ll | 153 + .../test/Transforms/InstCombine/objsize-64.ll | 29 +- llvm/test/Transforms/InstCombine/objsize.ll | 181 +- .../Transforms/InstCombine/overflow_to_sat.ll | 710 + ...nput-masking-after-truncation-variant-a.ll | 273 + ...nput-masking-after-truncation-variant-b.ll | 310 + ...nput-masking-after-truncation-variant-c.ll | 219 + ...nput-masking-after-truncation-variant-d.ll | 247 + ...nput-masking-after-truncation-variant-e.ll | 219 + ...dant-left-shift-input-masking-variant-a.ll | 157 + ...dant-left-shift-input-masking-variant-b.ll | 157 + ...dant-left-shift-input-masking-variant-c.ll | 117 + ...dant-left-shift-input-masking-variant-d.ll | 137 + ...dant-left-shift-input-masking-variant-e.ll | 117 + .../phi-equal-incoming-pointers.ll | 616 + .../phi-known-bits-operand-order.ll | 99 + llvm/test/Transforms/InstCombine/pow-exp.ll | 1 + llvm/test/Transforms/InstCombine/pow-sqrt.ll | 140 +- .../test/Transforms/InstCombine/pow_fp_int.ll | 64 + llvm/test/Transforms/InstCombine/pr26992.ll | 31 +- llvm/test/Transforms/InstCombine/pr39177.ll | 2 +- llvm/test/Transforms/InstCombine/pr43081.ll | 15 + ...edStrictnessPredicateAndConstant-assert.ll | 36 + llvm/test/Transforms/InstCombine/pr43893.ll | 55 + llvm/test/Transforms/InstCombine/pr44242.ll | 190 + llvm/test/Transforms/InstCombine/pr44245.ll | 192 + llvm/test/Transforms/InstCombine/pr44541.ll | 25 + llvm/test/Transforms/InstCombine/pr44552.ll | 59 + llvm/test/Transforms/InstCombine/pr44835.ll | 29 + llvm/test/Transforms/InstCombine/printf-1.ll | 97 +- llvm/test/Transforms/InstCombine/printf-2.ll | 21 +- llvm/test/Transforms/InstCombine/puts-1.ll | 11 +- llvm/test/Transforms/InstCombine/realloc.ll | 4 +- ...nput-masking-after-truncation-variant-a.ll | 199 + ...nput-masking-after-truncation-variant-b.ll | 238 + ...nput-masking-after-truncation-variant-c.ll | 169 + ...nput-masking-after-truncation-variant-d.ll | 189 + ...nput-masking-after-truncation-variant-e.ll | 169 + ...nput-masking-after-truncation-variant-f.ll | 198 + ...dant-left-shift-input-masking-variant-a.ll | 47 +- ...dant-left-shift-input-masking-variant-b.ll | 47 +- ...dant-left-shift-input-masking-variant-c.ll | 58 +- ...dant-left-shift-input-masking-variant-d.ll | 81 +- ...dant-left-shift-input-masking-variant-e.ll | 70 +- ...dant-left-shift-input-masking-variant-f.ll | 61 +- llvm/test/Transforms/InstCombine/rem.ll | 94 +- ...f-negative-is-non-zero-and-no-underflow.ll | 247 + ...ve-or-zero-is-non-zero-and-no-underflow.ll | 161 + ...ult-of-usub-is-non-zero-and-no-overflow.ll | 430 + .../reuse-constant-from-select-in-icmp.ll | 336 + llvm/test/Transforms/InstCombine/sadd_sat.ll | 503 + .../InstCombine/saturating-add-sub.ll | 86 + .../InstCombine/select-bitext-bitwise-ops.ll | 36 +- .../Transforms/InstCombine/select-crash.ll | 4 +- .../InstCombine/select-ctlz-to-cttz.ll | 219 + .../InstCombine/select-extractelement.ll | 111 +- .../InstCombine/select-imm-canon.ll | 70 + .../InstCombine/select-obo-peo-ops.ll | 466 +- llvm/test/Transforms/InstCombine/select.ll | 218 +- .../InstCombine/select_arithmetic.ll | 45 + ...ciation-in-bittest-with-truncation-lshr.ll | 468 + ...ociation-in-bittest-with-truncation-shl.ll | 463 + .../shift-amount-reassociation-in-bittest.ll | 28 + ...ount-reassociation-with-truncation-ashr.ll | 200 + ...ount-reassociation-with-truncation-lshr.ll | 200 + ...mount-reassociation-with-truncation-shl.ll | 212 + .../InstCombine/shift-amount-reassociation.ll | 134 + .../InstCombine/shift-by-signext.ll | 173 + .../Transforms/InstCombine/shift-logic.ll | 188 + llvm/test/Transforms/InstCombine/shift.ll | 391 +- .../Transforms/InstCombine/shuffle_select.ll | 3 +- .../InstCombine/shufflevector-div-rem.ll | 122 + ...-test-via-right-shifting-all-other-bits.ll | 362 + .../Transforms/InstCombine/sink-alloca.ll | 2 +- llvm/test/Transforms/InstCombine/snprintf.ll | 10 +- llvm/test/Transforms/InstCombine/sprintf-1.ll | 85 +- llvm/test/Transforms/InstCombine/sqrt.ll | 12 + .../InstCombine/srem-via-sdiv-mul-sub.ll | 117 + .../InstCombine/stacksave-debuginfo.ll | 9 +- .../Transforms/InstCombine/stdio-custom-dl.ll | 22 + llvm/test/Transforms/InstCombine/stpcpy-1.ll | 28 +- .../Transforms/InstCombine/stpcpy_chk-1.ll | 40 +- llvm/test/Transforms/InstCombine/strchr-1.ll | 87 +- llvm/test/Transforms/InstCombine/strcmp-1.ll | 75 +- .../Transforms/InstCombine/strcmp-memcmp.ll | 72 +- llvm/test/Transforms/InstCombine/strcpy-1.ll | 25 +- .../Transforms/InstCombine/strcpy_chk-1.ll | 40 +- llvm/test/Transforms/InstCombine/strcspn-1.ll | 17 +- ...low-check-to-comparison-of-sub-operands.ll | 111 + llvm/test/Transforms/InstCombine/strlen-1.ll | 34 +- llvm/test/Transforms/InstCombine/strncat-2.ll | 74 +- llvm/test/Transforms/InstCombine/strncat-3.ll | 6 +- llvm/test/Transforms/InstCombine/strncmp-1.ll | 87 +- llvm/test/Transforms/InstCombine/strncmp-2.ll | 6 +- llvm/test/Transforms/InstCombine/strncpy-1.ll | 90 +- llvm/test/Transforms/InstCombine/strncpy-2.ll | 5 +- .../Transforms/InstCombine/strncpy_chk-1.ll | 26 +- llvm/test/Transforms/InstCombine/strndup.ll | 67 + llvm/test/Transforms/InstCombine/strpbrk-1.ll | 20 +- llvm/test/Transforms/InstCombine/strrchr-1.ll | 55 +- llvm/test/Transforms/InstCombine/strstr-1.ll | 41 +- .../InstCombine/sub-and-or-neg-xor.ll | 115 + .../sub-ashr-and-to-icmp-select.ll | 213 + .../InstCombine/sub-ashr-or-to-icmp-select.ll | 239 + llvm/test/Transforms/InstCombine/sub-gep.ll | 101 + .../test/Transforms/InstCombine/sub-minmax.ll | 52 +- .../InstCombine/sub-of-negatible.ll | 377 + .../Transforms/InstCombine/sub-or-and-xor.ll | 104 + .../InstCombine/sub-xor-or-neg-and.ll | 115 + llvm/test/Transforms/InstCombine/sub.ll | 18 +- .../subtract-from-one-hand-of-select.ll | 78 + .../subtract-of-one-hand-of-select.ll | 78 + .../test/Transforms/InstCombine/trunc-load.ll | 50 +- llvm/test/Transforms/InstCombine/trunc.ll | 18 + .../InstCombine/unavailable-debug.ll | 2 +- .../InstCombine/unordered-fcmp-select.ll | 157 +- .../test/Transforms/InstCombine/unpack-fca.ll | 18 +- .../unrecognized_three-way-comparison.ll | 222 + ...gned-add-lack-of-overflow-check-via-add.ll | 236 + ...gned-add-lack-of-overflow-check-via-xor.ll | 189 + .../unsigned-add-lack-of-overflow-check.ll | 199 + .../unsigned-add-overflow-check-via-add.ll | 232 + .../unsigned-add-overflow-check-via-xor.ll | 189 + .../unsigned-add-overflow-check.ll | 199 + ...mul-lack-of-overflow-check-via-mul-udiv.ll | 173 + ...k-of-overflow-check-via-udiv-of-allones.ll | 109 + ...nsigned-mul-overflow-check-via-mul-udiv.ll | 167 + ...-mul-overflow-check-via-udiv-of-allones.ll | 105 + .../unsigned-sub-lack-of-overflow-check.ll | 155 + .../unsigned-sub-overflow-check.ll | 155 + .../InstCombine/unsigned_saturated_sub.ll | 293 + .../Transforms/InstCombine/unused-nonnull.ll | 45 + .../InstCombine/urem-via-udiv-mul-sub.ll | 117 + ...signext-of-variable-high-bit-extraction.ll | 575 + .../InstCombine/vec_demanded_elts.ll | 35 +- .../Transforms/InstCombine/vec_shuffle.ll | 204 +- .../InstCombine/vec_udiv_to_shift.ll | 16 + .../Transforms/InstCombine/vector-urem.ll | 2 +- .../InstCombine/widenable-conditions.ll | 156 + .../Transforms/InstCombine/with_overflow.ll | 21 +- .../xor-of-icmps-with-extra-uses.ll | 163 + .../InstCombine/zext-bool-add-sub.ll | 20 +- .../Transforms/InstCombine/zext-or-icmp.ll | 2 +- llvm/test/Transforms/InstCombine/zext.ll | 268 +- .../Transforms/InstMerge/st_sink_split_bb.ll | 94 + .../InstSimplify/assume-non-zero.ll | 233 + llvm/test/Transforms/InstSimplify/assume.ll | 8 +- llvm/test/Transforms/InstSimplify/call.ll | 235 + .../div-by-0-guard-before-smul_ov-not.ll | 102 + .../div-by-0-guard-before-smul_ov.ll | 90 + .../div-by-0-guard-before-umul_ov-not.ll | 102 + .../div-by-0-guard-before-umul_ov.ll | 90 + .../Transforms/InstSimplify/fcmp-select.ll | 168 + llvm/test/Transforms/InstSimplify/freeze.ll | 20 + .../Transforms/InstSimplify/icmp-abs-nabs.ll | 17 + .../Transforms/InstSimplify/insertelement.ll | 23 + .../InstSimplify/known-never-nan.ll | 39 +- .../Transforms/InstSimplify/known-non-zero.ll | 57 +- ...ck-in-uadd_with_overflow-of-nonnull-ptr.ll | 265 + .../InstSimplify/remove-dead-call.ll | 28 + ...f-negative-is-non-zero-and-no-underflow.ll | 125 + ...-by-nonzero-is-non-zero-and-no-overflow.ll | 86 + ...ult-of-usub-is-non-zero-and-no-overflow.ll | 339 + llvm/test/Transforms/InstSimplify/select.ll | 169 +- .../Transforms/InstSimplify/shufflevector.ll | 27 + .../InstSimplify/unsigned-range-checks.ll | 130 + .../ARM/interleaved-accesses.ll | 1552 ++- .../Internalize/vcall-visibility.ll | 64 + .../bb-unreachable-from-entry.ll | 9 +- .../Transforms/JumpThreading/ddt-crash3.ll | 2 +- .../Transforms/JumpThreading/header-succ.ll | 130 +- .../Transforms/JumpThreading/induction.ll | 17 +- .../Transforms/JumpThreading/thread-loads.ll | 2 +- .../JumpThreading/unreachable-loops.ll | 63 + .../JumpThreading/update-edge-weight.ll | 1 + llvm/test/Transforms/LCSSA/pr44058.ll | 37 + llvm/test/Transforms/LICM/AMDGPU/bitcast.ll | 21 + llvm/test/Transforms/LICM/argmemonly-call.ll | 6 +- llvm/test/Transforms/LICM/atomics.ll | 11 +- llvm/test/Transforms/LICM/callbr-crash.ll | 18 + llvm/test/Transforms/LICM/explicit_guards.ll | 49 +- llvm/test/Transforms/LICM/guards.ll | 33 +- llvm/test/Transforms/LICM/hoist-deref-load.ll | 2 +- llvm/test/Transforms/LICM/hoist-mustexec.ll | 4 +- llvm/test/Transforms/LICM/hoist-phi.ll | 4 +- llvm/test/Transforms/LICM/hoisting.ll | 26 +- llvm/test/Transforms/LICM/pr38513.ll | 27 + llvm/test/Transforms/LICM/pr42969.ll | 70 + .../Transforms/LICM/pragma-licm-disable.ll | 33 + llvm/test/Transforms/LICM/promote-order.ll | 9 +- llvm/test/Transforms/LICM/read-only-calls.ll | 5 +- llvm/test/Transforms/LICM/store-hoisting.ll | 3 +- llvm/test/Transforms/LICM/volatile-alias.ll | 2 +- .../AMDGPU/vect-ptr-ptr-size-mismatch.ll | 96 + .../test/Transforms/LoopFusion/cannot_fuse.ll | 307 +- .../LoopFusion/diagnostics_analysis.ll | 130 + .../LoopFusion/diagnostics_missed.ll | 269 + llvm/test/Transforms/LoopFusion/four_loops.ll | 146 +- llvm/test/Transforms/LoopFusion/guarded.ll | 65 + llvm/test/Transforms/LoopFusion/loop_nest.ll | 101 +- llvm/test/Transforms/LoopFusion/simple.ll | 365 +- .../test/Transforms/LoopInstSimplify/basic.ll | 2 +- .../LoopInterchange/interchangeable.ll | 58 +- .../LoopInterchange/perserve-lcssa.ll | 33 +- .../LoopInterchange/phi-ordering.ll | 10 +- .../pr43176-move-to-new-latch.ll | 140 + ...r43473-invalid-lcssa-phis-in-inner-exit.ll | 108 + ...97-lcssa-for-multiple-outer-loop-blocks.ll | 150 + .../reductions-across-inner-and-outer-loop.ll | 2 + .../update-condbranch-duplicate-successors.ll | 145 + .../basic_widenable_branch_guards.ll | 66 + .../LoopPredication/predicate-exits.ll | 1110 ++ .../Transforms/LoopReroll/reroll_with_dbg.ll | 2 +- llvm/test/Transforms/LoopRotate/basic.ll | 2 +- .../LoopRotate/dbg-value-duplicates-2.ll | 77 + llvm/test/Transforms/LoopRotate/pr35210.ll | 2 +- llvm/test/Transforms/LoopRotate/switch.ll | 166 + .../test/Transforms/LoopSimplify/basictest.ll | 240 +- ...fferent-addrspace-addressing-mode-loops.ll | 2 +- .../LoopStrengthReduce/X86/pr17473.ll | 4 +- .../Transforms/LoopStrengthReduce/pr18165.ll | 4 +- .../two-combinations-bug.ll | 2 +- .../LoopUnroll/AArch64/unroll-optsize.ll | 4 +- .../LoopUnroll/AMDGPU/unroll-for-private.ll | 2 +- .../LoopUnroll/ARM/dont-unroll-loopdec.ll | 40 + .../Transforms/LoopUnroll/ARM/mve-nounroll.ll | 176 + .../LoopUnroll/disable-full-unroll-by-opt.ll | 35 + .../LoopUnroll/optsize-loop-size.ll | 58 + .../LoopUnroll/peel-loop-conditions-pgo-1.ll | 43 + .../LoopUnroll/peel-loop-conditions-pgo-2.ll | 43 + .../LoopUnroll/peel-loop-conditions.ll | 598 +- .../Transforms/LoopUnroll/peel-loop-inner.ll | 50 + .../LoopUnroll/peel-loop-pgo-deopt-idom-2.ll | 46 + .../LoopUnroll/peel-loop-pgo-deopt.ll | 19 +- .../Transforms/LoopUnroll/peel-loop-pgo.ll | 8 +- .../LoopUnroll/runtime-epilog-debuginfo.ll | 2 +- .../LoopUnroll/runtime-loop-multiple-exits.ll | 6 +- .../LoopUnroll/runtime-small-upperbound.ll | 70 + llvm/test/Transforms/LoopUnroll/scevunroll.ll | 10 +- .../LoopUnroll/unroll-preserve-scev-lcssa.ll | 71 + .../LoopUnrollAndJam/dependencies.ll | 1 + .../Transforms/LoopUnrollAndJam/disable.ll | 1 + .../LoopUnrollAndJam/pragma-explicit.ll | 1 + .../LoopUnrollAndJam/unroll-and-jam.ll | 1 + .../AArch64/arbitrary-induction-step.ll | 18 +- .../LoopVectorize/AArch64/pr33053.ll | 2 +- .../LoopVectorize/ARM/arm-ieee-vectorize.ll | 9 +- .../LoopVectorize/ARM/mve-interleaved-cost.ll | 1111 ++ .../LoopVectorize/ARM/mve-maskedldst.ll | 176 + .../LoopVectorize/ARM/mve-shiftcost.ll | 87 + .../Transforms/LoopVectorize/ARM/mve-vldn.ll | 87 + .../ARM/prefer-tail-loop-folding.ll | 655 + .../ARM/tail-folding-counting-down.ll | 47 + .../LoopVectorize/ARM/tail-loop-folding.ll | 120 + .../LoopVectorize/PowerPC/reg-usage.ll | 281 + .../LoopVectorize/PowerPC/vectorize-bswap.ll | 97 + .../predicated-first-order-recurrence.ll | 103 + .../X86/consecutive-ptr-uniforms.ll | 83 + .../LoopVectorize/X86/constant-fold.ll | 2 +- .../LoopVectorize/X86/cost-model-assert.ll | 127 + .../illegal-parallel-loop-uniform-write.ll | 2 +- .../LoopVectorize/X86/int128_no_gather.ll | 4 +- .../LoopVectorize/X86/interleave_short_tc.ll | 59 + .../interleaved-accesses-waw-dependency.ll | 110 + .../LoopVectorize/X86/interleaving.ll | 12 +- .../LoopVectorize/X86/load-deref-pred.ll | 2128 +++ .../X86/no_fpmath_with_hotness.ll | 2 +- .../Transforms/LoopVectorize/X86/pr42674.ll | 61 + .../LoopVectorize/X86/reg-usage-debug.ll | 12 +- .../Transforms/LoopVectorize/X86/reg-usage.ll | 34 +- .../LoopVectorize/X86/scatter_crash.ll | 2 +- .../X86/tail_folding_and_assume_safety.ll | 166 + .../LoopVectorize/X86/tail_loop_folding.ll | 152 + .../X86/vect.omp.force.small-tc.ll | 22 +- .../X86/vectorization-remarks-profitable.ll | 2 +- .../Transforms/LoopVectorize/dbg.value.ll | 2 +- .../test/Transforms/LoopVectorize/debugloc.ll | 2 +- .../diag-missing-instr-debug-loc.ll | 2 +- .../LoopVectorize/diag-with-hotness-info-2.ll | 2 +- .../LoopVectorize/diag-with-hotness-info.ll | 2 +- .../first-order-recurrence-complex.ll | 275 + ...t-order-recurrence-multiply-recurrences.ll | 45 + .../LoopVectorize/first-order-recurrence.ll | 73 + .../Transforms/LoopVectorize/hoist-loads.ll | 4 +- .../LoopVectorize/if-conversion-nest.ll | 54 +- .../LoopVectorize/if-pred-stores.ll | 4 +- .../LoopVectorize/incorrect-dom-info.ll | 2 +- .../interleaved-accesses-uniform-load.ll | 49 + .../LoopVectorize/interleaved-accesses.ll | 10 +- .../LoopVectorize/metadata-width.ll | 2 +- .../LoopVectorize/multiple-address-spaces.ll | 2 +- .../LoopVectorize/no_int_induction.ll | 4 +- .../LoopVectorize/nofloat-report.ll | 27 + llvm/test/Transforms/LoopVectorize/nuw.ll | 58 + llvm/test/Transforms/LoopVectorize/optsize.ll | 37 + .../LoopVectorize/pr44488-predication.ll | 79 + .../LoopVectorize/reduction-small-size.ll | 2 +- .../Transforms/LoopVectorize/reduction.ll | 2 +- .../runtime-check-address-space.ll | 2 +- .../runtime-check-readonly-address-space.ll | 2 +- .../Transforms/LoopVectorize/runtime-check.ll | 64 +- .../LoopVectorize/scev-exitlim-crash.ll | 4 +- .../tail-folding-counting-down.ll | 42 + .../LoopVectorize/vectorize-once.ll | 2 +- .../constant-intrinsics.ll | 114 + .../crash-on-large-allocas.ll | 16 + .../objectsize_basic.ll | 83 + .../Transforms/LowerExpectIntrinsic/basic.ll | 31 +- .../bigger-expressions-double.ll | 513 + .../multiply-double-contraction-fmf.ll | 69 + .../multiply-double-contraction.ll | 69 + .../LowerMatrixIntrinsics/multiply-double.ll | 254 + .../multiply-float-contraction-fmf.ll | 69 + .../multiply-float-contraction.ll | 69 + .../LowerMatrixIntrinsics/multiply-float.ll | 254 + .../LowerMatrixIntrinsics/multiply-i32.ll | 254 + .../propagate-backward.ll | 96 + .../propagate-backwards-unsupported.ll | 135 + .../propagate-forward.ll | 116 + .../propagate-mixed-users.ll | 53 + .../propagate-multiple-iterations.ll | 84 + .../strided-load-double.ll | 68 + .../strided-load-float.ll | 68 + .../LowerMatrixIntrinsics/strided-load-i32.ll | 68 + .../strided-store-double.ll | 68 + .../strided-store-float.ll | 68 + .../strided-store-i32.ll | 68 + .../LowerMatrixIntrinsics/transpose-double.ll | 114 + .../LowerMatrixIntrinsics/transpose-float.ll | 114 + .../LowerMatrixIntrinsics/transpose-i32.ll | 114 + llvm/test/Transforms/LowerTypeTests/align.ll | 16 + .../LowerTypeTests/export-rename-local.ll | 15 + .../Transforms/LowerTypeTests/import-icall.ll | 18 +- .../MemCpyOpt/aggregate-type-crash.ll | 30 + llvm/test/Transforms/MemCpyOpt/lifetime.ll | 2 +- .../memcpy-to-memset-with-lifetimes.ll | 37 +- llvm/test/Transforms/MemCpyOpt/memcpy.ll | 2 +- .../Transforms/MemCpyOpt/store-to-memset.ll | 77 + .../MergeFunc/byval-attr-congruent-type.ll | 30 + llvm/test/Transforms/MergeFunc/comdat.ll | 2 +- .../MergeFunc/merge-block-address.ll | 1 + .../Transforms/MergeFunc/phi-check-blocks.ll | 8 +- llvm/test/Transforms/MergeFunc/weak-small.ll | 8 +- .../MergeICmps/X86/int64-and-ptr.ll | 2 +- .../test/Transforms/MergeICmps/X86/pr41917.ll | 4 +- .../Transforms/NewGVN/basic-cyclic-opt.ll | 2 +- llvm/test/Transforms/NewGVN/cond_br2.ll | 6 +- llvm/test/Transforms/NewGVN/equivalent-phi.ll | 2 +- .../test/Transforms/NewGVN/memory-handling.ll | 6 +- .../Transforms/NewGVN/phi-edge-handling.ll | 2 +- llvm/test/Transforms/NewGVN/pr31483.ll | 4 +- llvm/test/Transforms/NewGVN/pr31501.ll | 2 +- llvm/test/Transforms/NewGVN/pr33187.ll | 2 +- llvm/test/Transforms/NewGVN/pr33305.ll | 4 +- llvm/test/Transforms/NewGVN/pr34430.ll | 2 +- llvm/test/Transforms/NewGVN/pr34452.ll | 2 +- llvm/test/Transforms/NewGVN/pr43441.ll | 42 + llvm/test/Transforms/ObjCARC/basic.ll | 2 +- llvm/test/Transforms/ObjCARC/code-motion.ll | 39 + ...e-that-exception-unwind-path-is-visited.ll | 2 +- .../inlined-autorelease-return-value.ll | 292 + llvm/test/Transforms/ObjCARC/nested.ll | 2 +- llvm/test/Transforms/ObjCARC/post-inlining.ll | 64 + .../Transforms/ObjCARC/unsafe-claim-rv.ll | 3 +- .../Inputs/thinlto_cspgo_bar_gen.ll | 2 +- .../Inputs/thinlto_cspgo_bar_use.ll | 2 +- .../Inputs/thinlto_indirect_call_promotion.ll | 2 +- .../Inputs/thinlto_samplepgo_icp.ll | 2 +- .../Inputs/thinlto_samplepgo_icp2a.ll | 2 +- .../Inputs/thinlto_samplepgo_icp2b.ll | 2 +- .../Inputs/thinlto_samplepgo_icp3.ll | 2 +- llvm/test/Transforms/PGOProfile/X86/macho.ll | 2 +- llvm/test/Transforms/PGOProfile/branch1.ll | 2 +- llvm/test/Transforms/PGOProfile/branch2.ll | 2 +- llvm/test/Transforms/PGOProfile/chr.ll | 205 +- .../Transforms/PGOProfile/comdat_internal.ll | 7 +- .../Transforms/PGOProfile/criticaledge.ll | 2 +- .../PGOProfile/cspgo_profile_summary.ll | 2 +- .../PGOProfile/diag_FE_profile.ll.x | 2 +- .../Transforms/PGOProfile/diag_mismatch.ll | 2 +- .../PGOProfile/diag_no_funcprofdata.ll | 2 +- .../PGOProfile/diag_no_profile.ll.x | 2 +- .../PGOProfile/diag_no_value_sites.ll | 2 +- .../PGOProfile/do-not-instrument.ll | 2 +- llvm/test/Transforms/PGOProfile/func_entry.ll | 29 +- .../PGOProfile/icp_covariant_call_return.ll | 2 +- .../PGOProfile/icp_covariant_invoke_return.ll | 2 +- llvm/test/Transforms/PGOProfile/icp_invoke.ll | 2 +- .../Transforms/PGOProfile/icp_invoke_nouse.ll | 2 +- .../Transforms/PGOProfile/icp_mismatch_msg.ll | 2 +- llvm/test/Transforms/PGOProfile/icp_vararg.ll | 2 +- .../PGOProfile/indirect_call_annotation.ll | 2 +- .../PGOProfile/indirect_call_profile.ll | 2 +- .../PGOProfile/indirect_call_promotion.ll | 2 +- .../PGOProfile/indirect_call_promotion_vla.ll | 2 +- llvm/test/Transforms/PGOProfile/indirectbr.ll | 2 +- llvm/test/Transforms/PGOProfile/landingpad.ll | 2 +- .../PGOProfile/large_count_remarks.ll | 2 +- llvm/test/Transforms/PGOProfile/loop1.ll | 2 +- llvm/test/Transforms/PGOProfile/loop2.ll | 2 +- llvm/test/Transforms/PGOProfile/memcpy.ll | 2 +- .../PGOProfile/memop_size_annotation.ll | 2 +- .../Transforms/PGOProfile/memop_size_opt.ll | 2 +- .../PGOProfile/memop_size_opt_zero.ll | 2 +- .../PGOProfile/misexpect-branch-correct.ll | 94 + .../PGOProfile/misexpect-branch-stripped.ll | 115 + .../misexpect-branch-unpredictable.ll | 89 + .../Transforms/PGOProfile/misexpect-branch.ll | 130 + .../PGOProfile/misexpect-switch-default.ll | 196 + .../Transforms/PGOProfile/misexpect-switch.ll | 293 + .../PGOProfile/multiple_hash_profile.ll | 2 +- .../Transforms/PGOProfile/noreturncall.ll | 2 +- llvm/test/Transforms/PGOProfile/preinline.ll | 2 +- llvm/test/Transforms/PGOProfile/remap.ll | 2 +- llvm/test/Transforms/PGOProfile/select1.ll | 2 +- llvm/test/Transforms/PGOProfile/select2.ll | 2 +- .../PGOProfile/select_hash_conflict.ll | 2 +- llvm/test/Transforms/PGOProfile/single_bb.ll | 2 +- .../PGOProfile/statics_counter_naming.ll | 2 +- llvm/test/Transforms/PGOProfile/switch.ll | 2 +- .../PGOProfile/thinlto_cspgo_gen.ll | 2 +- .../PGOProfile/thinlto_cspgo_use.ll | 2 +- .../thinlto_indirect_call_promotion.ll | 2 +- .../PGOProfile/thinlto_samplepgo_icp.ll | 2 +- .../PGOProfile/thinlto_samplepgo_icp2.ll | 2 +- .../PGOProfile/thinlto_samplepgo_icp3.ll | 2 +- .../thinlto_samplepgo_icp_droppeddead.ll | 2 +- .../Transforms/PGOProfile/unreachable_bb.ll | 2 +- .../PhaseOrdering/lifetime-sanitizer.ll | 71 + .../PhaseOrdering/min-max-abs-cse.ll | 114 + .../PhaseOrdering/scev-custom-dl.ll | 70 + .../PhaseOrdering/simplifycfg-options.ll | 8 +- .../PhaseOrdering/two-shifts-by-sext.ll | 125 + .../unsigned-multiply-overflow-check.ll | 161 + .../PreISelIntrinsicLowering/objc-arc.ll | 10 +- .../Reassociate/2002-05-15-MissedTree.ll | 16 +- .../Reassociate/2002-05-15-SubReassociate.ll | 9 +- .../Reassociate/2019-08-22-FNegAssert.ll | 24 + .../Reassociate/canonicalize-neg-const.ll | 385 +- llvm/test/Transforms/Reassociate/commute.ll | 15 +- .../Reassociate/fast-ReassociateVector.ll | 19 +- .../Reassociate/fast-SubReassociate.ll | 39 +- .../Transforms/Reassociate/fast-basictest.ll | 21 +- .../Transforms/Reassociate/fast-fp-commute.ll | 11 +- .../Transforms/Reassociate/fast-multistep.ll | 13 +- llvm/test/Transforms/Reassociate/looptest.ll | 74 +- .../Reassociate/mixed-fast-nonfast-fp.ll | 17 +- llvm/test/Transforms/Reassociate/mulfactor.ll | 39 +- llvm/test/Transforms/Reassociate/multistep.ll | 13 +- llvm/test/Transforms/Reassociate/no-op.ll | 24 +- .../Transforms/Reassociate/optional-flags.ll | 25 +- .../Transforms/Reassociate/propagate-flags.ll | 3 +- .../Reassociate/reassoc-intermediate-fnegs.ll | 10 +- .../Reassociate/reassociate-landingpad.ll | 2 +- .../reassociate_salvages_debug_info.ll | 50 + .../Transforms/Reassociate/shift-factor.ll | 3 +- ...f_intrinsics_when_deleting_instructions.ll | 95 + .../Transforms/Reassociate/vaarg_movable.ll | 21 +- .../test/Transforms/Reassociate/wrap-flags.ll | 43 +- .../Transforms/Reassociate/xor_reassoc.ll | 198 +- llvm/test/Transforms/SCCP/apint-basictest3.ll | 2 +- llvm/test/Transforms/SCCP/indirectbr.ll | 34 +- .../Transforms/SCCP/ipsccp-predinfo-order.ll | 76 + .../SCCP/struct-arg-resolve-undefs.ll | 49 + llvm/test/Transforms/SCCP/ubsan_overflow.ll | 13 + .../SLPVectorizer/AArch64/ext-trunc.ll | 30 +- .../SLPVectorizer/AArch64/gather-root.ll | 102 +- .../SLPVectorizer/AArch64/getelementptr.ll | 57 +- .../SLPVectorizer/AArch64/horizontal.ll | 16 - .../SLPVectorizer/AArch64/spillcost-di.ll | 4 +- .../SLPVectorizer/AArch64/transpose.ll | 99 +- .../address-space-ptr-sze-gep-index-assert.ll | 13 + .../NVPTX/non-vectorizable-intrinsic.ll | 2 +- .../Transforms/SLPVectorizer/X86/PR31847.ll | 153 + .../Transforms/SLPVectorizer/X86/PR34635.ll | 175 +- .../Transforms/SLPVectorizer/X86/PR35628_1.ll | 13 +- .../Transforms/SLPVectorizer/X86/PR35628_2.ll | 5 - .../Transforms/SLPVectorizer/X86/PR35777.ll | 46 +- .../Transforms/SLPVectorizer/X86/PR39774.ll | 72 +- .../Transforms/SLPVectorizer/X86/PR40310.ll | 16 - .../Transforms/SLPVectorizer/X86/align.ll | 2 +- .../SLPVectorizer/X86/alternate-cast.ll | 98 +- .../SLPVectorizer/X86/alternate-int.ll | 41 +- .../SLPVectorizer/X86/arith-add-ssat.ll | 85 +- .../SLPVectorizer/X86/arith-add-usat.ll | 127 +- .../Transforms/SLPVectorizer/X86/arith-add.ll | 91 +- .../Transforms/SLPVectorizer/X86/arith-fix.ll | 250 +- .../Transforms/SLPVectorizer/X86/arith-fp.ll | 48 +- .../Transforms/SLPVectorizer/X86/arith-mul.ll | 91 +- .../SLPVectorizer/X86/arith-sub-ssat.ll | 85 +- .../SLPVectorizer/X86/arith-sub-usat.ll | 127 +- .../Transforms/SLPVectorizer/X86/arith-sub.ll | 91 +- .../SLPVectorizer/X86/bad-reduction.ll | 358 + .../SLPVectorizer/X86/bitreverse.ll | 28 +- .../SLPVectorizer/X86/blending-shuffle.ll | 47 +- .../test/Transforms/SLPVectorizer/X86/call.ll | 2 +- .../test/Transforms/SLPVectorizer/X86/cast.ll | 43 +- .../SLPVectorizer/X86/consecutive-access.ll | 34 +- .../SLPVectorizer/X86/crash_bullet3.ll | 2 +- .../SLPVectorizer/X86/crash_flop7.ll | 2 +- .../Transforms/SLPVectorizer/X86/crash_gep.ll | 23 + .../SLPVectorizer/X86/crash_smallpt.ll | 2 +- .../test/Transforms/SLPVectorizer/X86/ctlz.ll | 346 +- .../Transforms/SLPVectorizer/X86/ctpop.ll | 90 +- .../test/Transforms/SLPVectorizer/X86/cttz.ll | 44 +- .../SLPVectorizer/X86/debug_info.ll | 2 +- .../SLPVectorizer/X86/different-vec-widths.ll | 104 + .../Transforms/SLPVectorizer/X86/fptosi.ll | 37 +- .../Transforms/SLPVectorizer/X86/fptoui.ll | 43 +- .../test/Transforms/SLPVectorizer/X86/hadd.ll | 57 +- .../SLPVectorizer/X86/horizontal-list.ll | 354 +- .../SLPVectorizer/X86/horizontal-minmax.ll | 800 +- .../SLPVectorizer/X86/horizontal.ll | 148 - .../test/Transforms/SLPVectorizer/X86/hsub.ll | 57 +- .../X86/insert-element-build-vector.ll | 626 +- .../SLPVectorizer/X86/jumbled_store_crash.ll | 104 + .../SLPVectorizer/X86/load-merge.ll | 158 + .../SLPVectorizer/X86/long_chains.ll | 8 +- .../Transforms/SLPVectorizer/X86/lookahead.ll | 406 +- .../SLPVectorizer/X86/no_alternate_divrem.ll | 129 + .../Transforms/SLPVectorizer/X86/pr16899.ll | 2 +- .../Transforms/SLPVectorizer/X86/pr19657.ll | 104 +- .../Transforms/SLPVectorizer/X86/pr35497.ll | 13 +- .../Transforms/SLPVectorizer/X86/pr42022.ll | 278 + .../SLPVectorizer/X86/reassociated-loads.ll | 31 - .../Transforms/SLPVectorizer/X86/reduction.ll | 84 + .../SLPVectorizer/X86/reduction2.ll | 117 +- .../SLPVectorizer/X86/reduction_loads.ll | 24 - .../SLPVectorizer/X86/reduction_unrolled.ll | 35 +- .../SLPVectorizer/X86/remark_horcost.ll | 6 +- .../SLPVectorizer/X86/reorder_repeated_ops.ll | 122 +- .../X86/reuse-extracts-in-wider-vect.ll | 67 + .../test/Transforms/SLPVectorizer/X86/sext.ll | 1173 +- .../SLPVectorizer/X86/shift-ashr.ll | 77 +- .../SLPVectorizer/X86/shift-lshr.ll | 77 +- .../Transforms/SLPVectorizer/X86/shift-shl.ll | 77 +- .../Transforms/SLPVectorizer/X86/sitofp.ll | 115 +- .../test/Transforms/SLPVectorizer/X86/sqrt.ll | 7 +- .../SLPVectorizer/X86/store-jumbled.ll | 7 +- .../SLPVectorizer/X86/stores_vectorize.ll | 45 +- .../Transforms/SLPVectorizer/X86/uitofp.ll | 115 +- .../SLPVectorizer/X86/undef_vect.ll | 10 - .../SLPVectorizer/X86/used-reduced-op.ll | 529 + .../SLPVectorizer/X86/vector_gep.ll | 2 +- .../X86/vectorize-reorder-reuse.ll | 42 - .../test/Transforms/SLPVectorizer/X86/zext.ll | 1098 +- llvm/test/Transforms/SROA/addrspacecast.ll | 11 +- llvm/test/Transforms/SROA/alignment.ll | 15 + .../Transforms/SROA/mem-par-metadata-sroa.ll | 2 +- llvm/test/Transforms/SROA/tbaa-struct.ll | 32 + .../SROA/vector-promotion-different-size.ll | 24 + llvm/test/Transforms/SafeStack/ARM/debug.ll | 6 +- .../Transforms/SafeStack/X86/debug-loc.ll | 4 +- .../Transforms/SafeStack/X86/debug-loc2.ll | 13 +- .../Inputs/profile-symbol-list.ll | 134 + llvm/test/Transforms/SampleProfile/branch.ll | 6 +- .../compressed-profile-symbol-list.ll | 5 + .../SampleProfile/entry_counts_cold.ll | 4 +- .../entry_counts_missing_dbginfo.ll | 4 +- .../Transforms/SampleProfile/gcc-simple.ll | 4 +- .../Transforms/SampleProfile/inline-act.ll | 2 +- .../SampleProfile/inline-callee-update.ll | 73 + .../Transforms/SampleProfile/inline-cold.ll | 102 + .../SampleProfile/inline-coverage.ll | 2 +- .../SampleProfile/inline-mergeprof.ll | 97 + .../Transforms/SampleProfile/inline-stats.ll | 104 + .../SampleProfile/inline-topdown.ll | 123 + llvm/test/Transforms/SampleProfile/offset.ll | 2 +- .../SampleProfile/profile-format-compress.ll | 123 + .../SampleProfile/profile-format.ll | 123 + .../SampleProfile/profile-sample-accurate.ll | 157 + .../Transforms/SampleProfile/propagate.ll | 6 +- llvm/test/Transforms/SampleProfile/remap.ll | 6 +- llvm/test/Transforms/SampleProfile/remarks.ll | 12 +- .../uncompressed-profile-symbol-list.ll | 4 + .../X86/expand-masked-compressstore.ll | 72 +- .../X86/expand-masked-expandload.ll | 90 +- .../X86/expand-masked-gather.ll | 11 +- .../X86/expand-masked-load.ll | 81 +- .../X86/expand-masked-scatter.ll | 64 + .../X86/expand-masked-store.ll | 23 +- llvm/test/Transforms/Scalarizer/dbginfo.ll | 2 +- llvm/test/Transforms/Scalarizer/intrinsics.ll | 12 + .../Scalarizer/phi-unreachable-pred.ll | 98 + .../SimpleLoopUnswitch/basictest-profmd.ll | 2 +- .../SimpleLoopUnswitch/basictest.ll | 2 +- .../SimpleLoopUnswitch/delete-dead-blocks.ll | 1 + .../exponential-nontrivial-unswitch-nested.ll | 20 + ...exponential-nontrivial-unswitch-nested2.ll | 22 + .../exponential-nontrivial-unswitch.ll | 20 + .../exponential-nontrivial-unswitch2.ll | 20 + .../exponential-switch-unswitch.ll | 20 + .../Transforms/SimpleLoopUnswitch/guards.ll | 2 +- .../Transforms/SimpleLoopUnswitch/msan.ll | 2 +- .../nontrivial-unswitch-cost.ll | 1 + .../nontrivial-unswitch-redundant-switch.ll | 1 + .../SimpleLoopUnswitch/nontrivial-unswitch.ll | 1 + .../preserve-scev-exiting-multiple-loops.ll | 63 + .../trivial-unswitch-iteration.ll | 2 +- .../trivial-unswitch-profmd.ll | 2 +- .../SimpleLoopUnswitch/trivial-unswitch.ll | 2 +- .../SimpleLoopUnswitch/update-scev.ll | 2 +- .../Hexagon/switch-to-lookup-table.ll | 2 +- llvm/test/Transforms/SimplifyCFG/HoistCode.ll | 39 +- .../Transforms/SimplifyCFG/PhiEliminate3.ll | 59 +- .../Transforms/SimplifyCFG/SpeculativeExec.ll | 135 +- .../SimplifyCFG/X86/merge-cond-stores-cost.ll | 43 + .../SimplifyCFG/X86/speculate-cttz-ctlz.ll | 304 +- .../SimplifyCFG/X86/switch_to_lookup_table.ll | 6 +- .../dce-cond-after-folding-terminator.ll | 5 +- .../SimplifyCFG/hoist-dbgvalue-inlined.ll | 1 - .../Transforms/SimplifyCFG/invalidate-dom.ll | 90 + .../SimplifyCFG/merge-cleanuppads.ll | 4 +- .../SimplifyCFG/merge-cond-stores-2.ll | 92 +- .../SimplifyCFG/merge-cond-stores.ll | 28 +- llvm/test/Transforms/SimplifyCFG/safe-abs.ll | 30 + .../SimplifyCFG/safe-low-bit-extract.ll | 31 + .../signbit-like-value-extension.ll | 35 + .../SimplifyCFG/sink-common-code.ll | 27 + .../Transforms/SimplifyCFG/speculate-math.ll | 112 +- .../SimplifyCFG/switch-range-to-icmp.ll | 38 + .../SimplifyCFG/switch_create-custom-dl.ll | 16 +- .../Transforms/SimplifyCFG/switch_msan.ll | 102 + .../unsigned-multiplication-will-overflow.ll | 36 + .../SimplifyCFG/volatile-phioper.ll | 4 +- .../Transforms/SimplifyCFG/wc-widen-block.ll | 451 + .../AMDGPU/pr23975.ll | 2 +- .../ThinLTOBitcodeWriter/associated.ll | 14 + .../cfi-functions-canonical-jump-tables.ll | 23 + .../Transforms/TypePromotion/ARM/calls.ll | 342 + .../Transforms/TypePromotion/ARM/casts.ll | 1072 ++ .../TypePromotion/ARM/clear-structures.ll | 124 + .../Transforms/TypePromotion/ARM/icmps.ll | 349 + .../Transforms/TypePromotion/ARM/large-int.ll | 66 + .../Transforms/TypePromotion/ARM/phis-ret.ll | 344 + .../Transforms/TypePromotion/ARM/pointers.ll | 240 + .../TypePromotion/ARM/signed-icmps.ll | 103 + .../Transforms/TypePromotion/ARM/signed.ll | 123 + .../Transforms/TypePromotion/ARM/switch.ll | 291 + .../Transforms/TypePromotion/ARM/wrapping.ll | 356 + .../Transforms/Util/PredicateInfo/condprop.ll | 6 +- .../Util/PredicateInfo/testandor.ll | 26 +- llvm/test/Transforms/Util/add-TLI-mappings.ll | 61 + llvm/test/Transforms/Util/dbg-call-bitcast.ll | 48 + llvm/test/Transforms/Util/dbg-user-of-aext.ll | 2 +- llvm/test/Transforms/Util/flattencfg.ll | 60 + .../Util/libcalls-fast-math-inf-loop.ll | 4 +- .../Util/simplify-dbg-declare-load.ll | 1 + .../WholeProgramDevirt/branch-funnel.ll | 2 +- .../WholeProgramDevirt/export-single-impl.ll | 6 +- .../virtual-const-prop-begin.ll | 14 +- .../virtual-const-prop-end.ll | 16 +- llvm/test/Verifier/blockbyref.ll | 4 +- llvm/test/Verifier/callbr.ll | 50 + llvm/test/Verifier/comdat2.ll.x | 4 +- .../Verifier/dereferenceable-md-inttoptr.ll | 6 + llvm/test/Verifier/dereferenceable-md.ll.x | 48 +- .../diexpression-dwarf-entry-value.ll | 8 + .../diexpression-entry-value-llvm-ir.ll | 31 + .../Verifier/diexpression-entry-value.ll.x | 11 +- .../diexpression-valid-entry-value.ll | 2 +- llvm/test/Verifier/fp-intrinsics.ll | 28 +- llvm/test/Verifier/intrinsic-bad-arg-type.ll | 10 + llvm/test/Verifier/intrinsic-immarg.ll.x | 22 +- .../invalid-frame-pointer-attr-empty.ll | 9 + .../invalid-frame-pointer-attr-no-value.ll | 9 + .../Verifier/invalid-frame-pointer-attr.ll | 9 + .../invalid-patchable-function-entry.ll | 21 + .../update_llc_test_checks/Inputs/basic.ll | 32 + .../Inputs/argument_name_reuse.ll | 7 + .../update_test_checks/Inputs/basic.ll | 49 + .../update_test_checks/Inputs/scrub_attrs.ll | 8 + .../Inputs/sometimes_deleted_function.ll | 12 + llvm/test/tools/dsymutil/Inputs/frame-dw2.ll | 4 +- llvm/test/tools/dsymutil/Inputs/frame-dw4.ll | 4 +- .../gold/X86/linkonce_odr_unnamed_addr.ll | 2 + llvm/test/tools/gold/X86/slp-vectorize-pm.ll | 79 + llvm/test/tools/gold/X86/split-dwarf.ll | 2 +- llvm/test/tools/gold/X86/strip_names.ll | 2 +- .../X86/v1.12/thinlto_emit_linked_objects.ll | 2 +- .../test/tools/llvm-dwarfdump/AArch64/arch.ll | 1 - .../test/tools/llvm-dwarfdump/X86/locstats.ll | 211 + .../tools/llvm-dwarfdump/X86/statistics.ll | 2 +- .../X86/stats-dbg-callsite-info.ll | 76 + .../X86/valid-call-site-GNU-extensions.ll | 103 + llvm/test/tools/llvm-lib/Inputs/arm64.ll | 2 +- llvm/test/tools/llvm-lib/Inputs/i386.ll | 2 +- llvm/test/tools/llvm-lib/Inputs/x86_64.ll | 2 +- llvm/test/tools/llvm-locstats/locstats.ll | 175 + .../tools/llvm-locstats/no_scope_bytes.ll | 39 + llvm/test/tools/llvm-lto2/X86/pipeline.ll | 2 +- .../tools/llvm-lto2/X86/slp-vectorize-pm.ll | 51 + .../tools/llvm-lto2/X86/stats-file-option.ll | 2 +- .../tools/llvm-objdump/AMDGPU/source-lines.ll | 2 +- .../Hexagon/source-interleave-hexagon.ll | 2 +- ...rce-interleave-same-line-different-file.ll | 2 +- .../X86/Inputs/source-interleave.ll | 2 +- .../X86/disassemble-archive-with-source.ll | 2 +- .../llvm-objdump/X86/macho-data-in-code.ll | 45 + .../tools/llvm-readobj/ELF/Inputs/trivial.ll | 20 + 5149 files changed, 561647 insertions(+), 147823 deletions(-) create mode 100644 llvm/test/Analysis/BasicAA/assume-index-positive.ll create mode 100644 llvm/test/Analysis/BasicAA/dereferenceable.ll create mode 100644 llvm/test/Analysis/BasicAA/ptrmask.ll create mode 100644 llvm/test/Analysis/BranchProbabilityInfo/fcmp.ll create mode 100644 llvm/test/Analysis/ConstantFolding/binop-identity-undef.ll create mode 100644 llvm/test/Analysis/ConstantFolding/copysign.ll create mode 100644 llvm/test/Analysis/ConstantFolding/gep-alias.ll create mode 100644 llvm/test/Analysis/ConstantFolding/insertelement.ll create mode 100644 llvm/test/Analysis/ConstantFolding/math-1.ll create mode 100644 llvm/test/Analysis/ConstantFolding/math-2.ll create mode 100644 llvm/test/Analysis/ConstantFolding/rint.ll create mode 100644 llvm/test/Analysis/ConstantFolding/round.ll create mode 100644 llvm/test/Analysis/ConstantFolding/shufflevector.ll create mode 100644 llvm/test/Analysis/ConstantFolding/trunc.ll create mode 100644 llvm/test/Analysis/CostModel/AArch64/aggregates.ll create mode 100644 llvm/test/Analysis/CostModel/AMDGPU/fma.ll create mode 100644 llvm/test/Analysis/CostModel/ARM/arith.ll create mode 100644 llvm/test/Analysis/CostModel/ARM/fparith.ll create mode 100644 llvm/test/Analysis/CostModel/ARM/freeshift.ll create mode 100644 llvm/test/Analysis/CostModel/ARM/load_store.ll create mode 100644 llvm/test/Analysis/CostModel/PowerPC/future-cost-model.ll create mode 100644 llvm/test/Analysis/CostModel/X86/aggregates.ll create mode 100644 llvm/test/Analysis/DDG/basic-a.ll create mode 100644 llvm/test/Analysis/DDG/basic-b.ll create mode 100644 llvm/test/Analysis/DDG/basic-loopnest.ll create mode 100644 llvm/test/Analysis/DDG/root-node.ll create mode 100644 llvm/test/Analysis/DependenceAnalysis/Dump.ll create mode 100644 llvm/test/Analysis/DivergenceAnalysis/AMDGPU/b42473-r1-crash.ll create mode 100644 llvm/test/Analysis/GlobalsModRef/intrinsic_addressnottaken1.ll create mode 100644 llvm/test/Analysis/GlobalsModRef/intrinsic_addressnottaken2.ll create mode 100644 llvm/test/Analysis/GlobalsModRef/intrinsic_addresstaken.ll create mode 100644 llvm/test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll create mode 100644 llvm/test/Analysis/LoopCacheAnalysis/PowerPC/loads-store.ll create mode 100644 llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matmul.ll create mode 100644 llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matvecmul.ll create mode 100644 llvm/test/Analysis/LoopCacheAnalysis/PowerPC/single-store.ll create mode 100644 llvm/test/Analysis/LoopCacheAnalysis/PowerPC/stencil.ll create mode 100644 llvm/test/Analysis/MemorySSA/debugvalue.ll create mode 100644 llvm/test/Analysis/MemorySSA/debugvalue2.ll create mode 100644 llvm/test/Analysis/MemorySSA/loop-rotate-disablebasicaa.ll create mode 100644 llvm/test/Analysis/MemorySSA/loop-rotate-simplified-clone.ll create mode 100644 llvm/test/Analysis/MemorySSA/loop-unswitch.ll create mode 100644 llvm/test/Analysis/MemorySSA/pr42940.ll create mode 100644 llvm/test/Analysis/MemorySSA/pr43044.ll create mode 100644 llvm/test/Analysis/MemorySSA/pr43317.ll create mode 100644 llvm/test/Analysis/MemorySSA/pr43320.ll create mode 100644 llvm/test/Analysis/MemorySSA/pr43426.ll create mode 100644 llvm/test/Analysis/MemorySSA/pr43427.ll create mode 100644 llvm/test/Analysis/MemorySSA/pr43438.ll create mode 100644 llvm/test/Analysis/MemorySSA/pr43493.ll create mode 100644 llvm/test/Analysis/MemorySSA/pr43540.ll create mode 100644 llvm/test/Analysis/MemorySSA/pr43541.ll create mode 100644 llvm/test/Analysis/MemorySSA/pr43569.ll create mode 100644 llvm/test/Analysis/MemorySSA/pr43641.ll create mode 100644 llvm/test/Analysis/MemorySSA/pr44027.ll create mode 100644 llvm/test/Analysis/MemorySSA/pr44029.ll create mode 100644 llvm/test/Analysis/MemorySSA/renamephis.ll create mode 100644 llvm/test/Analysis/MemorySSA/unreachable.ll create mode 100644 llvm/test/Analysis/MustExecute/must_be_executed_context.ll create mode 100644 llvm/test/Analysis/ScalarEvolution/multiple-max-iterations.ll create mode 100644 llvm/test/Analysis/ScalarEvolution/range_nw_flag.ll create mode 100644 llvm/test/Analysis/ScalarEvolution/trip-count-andor.ll create mode 100644 llvm/test/Analysis/ScalarEvolution/trip-count15.ll create mode 100644 llvm/test/Analysis/ScalarEvolution/umin-umax-folds.ll create mode 100644 llvm/test/Analysis/ScalarEvolution/widenable-condition.ll create mode 100644 llvm/test/Assembler/2003-11-11-ImplicitRename.ll create mode 100644 llvm/test/Assembler/datalayout-invalid-function-ptr-alignment.ll create mode 100644 llvm/test/Assembler/datalayout-invalid-stack-natural-alignment.ll create mode 100644 llvm/test/Assembler/export-symbol-anonymous-class.ll create mode 100644 llvm/test/Assembler/invalid-arg-num-1.ll create mode 100644 llvm/test/Assembler/invalid-arg-num-2.ll create mode 100644 llvm/test/Assembler/invalid-arg-num-3.ll create mode 100644 llvm/test/Assembler/multi-mod-disassemble.ll create mode 100644 llvm/test/Assembler/multi-summary-disassemble.ll create mode 100644 llvm/test/Bindings/llvm-c/freeze.ll create mode 100644 llvm/test/Bitcode/aarch64-addp-upgrade.ll create mode 100644 llvm/test/Bitcode/invalid-functionptr-align.ll create mode 100644 llvm/test/Bitcode/invalid-type-for-null-constant.ll create mode 100644 llvm/test/Bitcode/thinlto-alias3.ll create mode 100644 llvm/test/Bitcode/upgrade-arc-runtime-calls-bitcast.ll create mode 100644 llvm/test/Bitcode/upgrade-arc-runtime-calls.ll create mode 100644 llvm/test/Bitcode/upgrade-datalayout.ll create mode 100644 llvm/test/Bitcode/upgrade-datalayout2.ll create mode 100644 llvm/test/Bitcode/upgrade-datalayout3.ll create mode 100644 llvm/test/Bitcode/upgrade-frame-pointer.ll create mode 100644 llvm/test/BugPoint/attr-crash.ll create mode 100644 llvm/test/BugPoint/retain-crashing-metadata.ll create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-gep.ll create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-i128-on-stack.ll create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/call-translator-musttail.ll create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call-weak.ll create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/combiner-load-store-indexing.ll create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/const-0.ll create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/integration-shuffle-vector.ll create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-extends.ll create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-split-vector-arg.ll create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-tbaa.ll create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/legalize-sext-128.ll create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/memcpy_chk_no_tail.ll create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/no-neon-no-fp.ll create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/swiftself.ll create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/tail-call-no-save-fp-lr.ll create mode 100644 llvm/test/CodeGen/AArch64/aarch64-sve-asm-negative.ll create mode 100644 llvm/test/CodeGen/AArch64/aarch64-sve-asm.ll create mode 100644 llvm/test/CodeGen/AArch64/align-down.ll create mode 100644 llvm/test/CodeGen/AArch64/arm64-code-model-large-darwin.ll create mode 100644 llvm/test/CodeGen/AArch64/arm64-memset-to-bzero-pgso.ll create mode 100644 llvm/test/CodeGen/AArch64/arm64-preserve-most.ll create mode 100644 llvm/test/CodeGen/AArch64/arm64-tls-initial-exec.ll create mode 100644 llvm/test/CodeGen/AArch64/arm64-tls-local-exec.ll create mode 100644 llvm/test/CodeGen/AArch64/arm64_32-addrs.ll create mode 100644 llvm/test/CodeGen/AArch64/arm64_32-atomics.ll create mode 100644 llvm/test/CodeGen/AArch64/arm64_32-fastisel.ll create mode 100644 llvm/test/CodeGen/AArch64/arm64_32-frame-pointers.ll create mode 100644 llvm/test/CodeGen/AArch64/arm64_32-gep-sink.ll create mode 100644 llvm/test/CodeGen/AArch64/arm64_32-memcpy.ll create mode 100644 llvm/test/CodeGen/AArch64/arm64_32-neon.ll create mode 100644 llvm/test/CodeGen/AArch64/arm64_32-null.ll create mode 100644 llvm/test/CodeGen/AArch64/arm64_32-pointer-extend.ll create mode 100644 llvm/test/CodeGen/AArch64/arm64_32-stack-pointers.ll create mode 100644 llvm/test/CodeGen/AArch64/arm64_32-tls.ll create mode 100644 llvm/test/CodeGen/AArch64/arm64_32-va.ll create mode 100644 llvm/test/CodeGen/AArch64/arm64_32.ll create mode 100644 llvm/test/CodeGen/AArch64/bti-branch-relaxation.ll create mode 100644 llvm/test/CodeGen/AArch64/cfguard-checks.ll create mode 100644 llvm/test/CodeGen/AArch64/cfguard-module-flag.ll create mode 100644 llvm/test/CodeGen/AArch64/cls.ll create mode 100644 llvm/test/CodeGen/AArch64/csr-split.ll create mode 100644 llvm/test/CodeGen/AArch64/dbg-value-tag-offset.ll create mode 100644 llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-signed.ll create mode 100644 llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-unsigned.ll create mode 100644 llvm/test/CodeGen/AArch64/fast-isel-branch-uncond-debug.ll create mode 100644 llvm/test/CodeGen/AArch64/fp-intrinsics.ll create mode 100644 llvm/test/CodeGen/AArch64/fp16-fmla.ll create mode 100644 llvm/test/CodeGen/AArch64/fpconv-vector-op-scalarize-strict.ll create mode 100644 llvm/test/CodeGen/AArch64/global-merge-hidden-minsize.ll create mode 100644 llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll create mode 100644 llvm/test/CodeGen/AArch64/jump-table-32.ll create mode 100644 llvm/test/CodeGen/AArch64/large-stack.ll create mode 100644 llvm/test/CodeGen/AArch64/lower-ptrmask.ll create mode 100644 llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-cfi.ll create mode 100644 llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-diff-scope-same-key.ll create mode 100644 llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-non-leaf.ll create mode 100644 llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-same-scope-diff-key.ll create mode 100644 llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-same-scope-same-key-a.ll create mode 100644 llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-same-scope-same-key-b.ll create mode 100644 llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-sp-mod.ll create mode 100644 llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-subtarget.ll create mode 100644 llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll create mode 100644 llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-v8-3.ll create mode 100644 llvm/test/CodeGen/AArch64/macro-fusion.ll create mode 100644 llvm/test/CodeGen/AArch64/neon-vcadd.ll create mode 100644 llvm/test/CodeGen/AArch64/no_cfi.ll create mode 100644 llvm/test/CodeGen/AArch64/note-gnu-property-pac-bti-0.ll create mode 100644 llvm/test/CodeGen/AArch64/note-gnu-property-pac-bti-1.ll create mode 100644 llvm/test/CodeGen/AArch64/note-gnu-property-pac-bti-2.ll create mode 100644 llvm/test/CodeGen/AArch64/note-gnu-property-pac-bti-3.ll create mode 100644 llvm/test/CodeGen/AArch64/note-gnu-property-pac-bti-4.ll create mode 100644 llvm/test/CodeGen/AArch64/note-gnu-property-pac-bti-5.ll create mode 100644 llvm/test/CodeGen/AArch64/note-gnu-property-pac-bti-6.ll create mode 100644 llvm/test/CodeGen/AArch64/note-gnu-property-pac-bti-7.ll create mode 100644 llvm/test/CodeGen/AArch64/note-gnu-property-pac-bti-8.ll create mode 100644 llvm/test/CodeGen/AArch64/overeager_mla_fusing.ll create mode 100644 llvm/test/CodeGen/AArch64/patchable-function-entry-bti.ll create mode 100644 llvm/test/CodeGen/AArch64/patchable-function-entry.ll create mode 100644 llvm/test/CodeGen/AArch64/powi-windows.ll create mode 100644 llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll create mode 100644 llvm/test/CodeGen/AArch64/sadd_sat_plus.ll create mode 100644 llvm/test/CodeGen/AArch64/select_const.ll create mode 100644 llvm/test/CodeGen/AArch64/shift-by-signext.ll create mode 100644 llvm/test/CodeGen/AArch64/shift-logic.ll create mode 100644 llvm/test/CodeGen/AArch64/space.ll create mode 100644 llvm/test/CodeGen/AArch64/srem-lkk.ll create mode 100644 llvm/test/CodeGen/AArch64/srem-seteq-optsize.ll create mode 100644 llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll create mode 100644 llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll create mode 100644 llvm/test/CodeGen/AArch64/srem-seteq.ll create mode 100644 llvm/test/CodeGen/AArch64/srem-vector-lkk.ll create mode 100644 llvm/test/CodeGen/AArch64/ssub_sat_plus.ll create mode 100644 llvm/test/CodeGen/AArch64/stack-tagging-ex-1.ll create mode 100644 llvm/test/CodeGen/AArch64/stack-tagging-ex-2.ll create mode 100644 llvm/test/CodeGen/AArch64/stack-tagging-initializer-merge.ll create mode 100644 llvm/test/CodeGen/AArch64/stack-tagging-unchecked-ld-st.ll create mode 100644 llvm/test/CodeGen/AArch64/stack-tagging-untag-placement.ll create mode 100644 llvm/test/CodeGen/AArch64/stackmap.ll create mode 100644 llvm/test/CodeGen/AArch64/strict-fp-int-promote.ll create mode 100644 llvm/test/CodeGen/AArch64/sub-of-bias.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-alloca-stackid.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-calling-convention.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-fp.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-int-arith-pred.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-int-arith.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-int-div-pred.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-int-imm.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-int-log-imm.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-int-log-pred.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-int-log.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-int-mad-pred.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-int-mul-pred.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-int-reduce-pred.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-conversion.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-fp-compares.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-fp-converts.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-scaled-offsets.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-unscaled-offsets.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-scaled-offset.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-unscaled-offset.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares-with-imm.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-logical.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-pred-creation.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-pred-operations.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-pred-testing.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-reversal.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-32bit-scaled-offsets.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-32bit-unscaled-offsets.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-64bit-scaled-offset.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-64bit-unscaled-offset.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-sqdec.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-sqinc.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-uqdec.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-uqinc.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-masked-ldst-trunc.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-neg-int-arith-imm-2.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-neg-int-arith-imm.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-pred-log.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-select.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-setcc.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-vector-splat.ll create mode 100644 llvm/test/CodeGen/AArch64/sve2-intrinsics-binary-narrowing-add-sub.ll create mode 100644 llvm/test/CodeGen/AArch64/sve2-intrinsics-binary-narrowing-shr.ll create mode 100644 llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-converts.ll create mode 100644 llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-int-binary-logarithm.ll create mode 100644 llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-widening-mul-acc.ll create mode 100644 llvm/test/CodeGen/AArch64/sve2-intrinsics-non-widening-pairwise-arith.ll create mode 100644 llvm/test/CodeGen/AArch64/sve2-intrinsics-unary-narrowing.ll create mode 100644 llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll create mode 100644 llvm/test/CodeGen/AArch64/tagged-globals.ll create mode 100644 llvm/test/CodeGen/AArch64/tailcall-bitcast-memcpy.ll create mode 100644 llvm/test/CodeGen/AArch64/tme.ll create mode 100644 llvm/test/CodeGen/AArch64/uadd_sat_plus.ll create mode 100644 llvm/test/CodeGen/AArch64/urem-lkk.ll create mode 100644 llvm/test/CodeGen/AArch64/urem-seteq-nonzero.ll create mode 100644 llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll create mode 100644 llvm/test/CodeGen/AArch64/urem-seteq-vec-tautological.ll create mode 100644 llvm/test/CodeGen/AArch64/urem-vector-lkk.ll create mode 100644 llvm/test/CodeGen/AArch64/use-cr-result-of-dom-icmp-st.ll create mode 100644 llvm/test/CodeGen/AArch64/usub_sat_plus.ll create mode 100644 llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll create mode 100644 llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll create mode 100644 llvm/test/CodeGen/AArch64/vecreduce-fmul-legalization-strict.ll create mode 100644 llvm/test/CodeGen/AArch64/vselect-constants.ll create mode 100644 llvm/test/CodeGen/AArch64/win64-no-uwtable.ll create mode 100644 llvm/test/CodeGen/AArch64/windows-extern-weak.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-sendmsg.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-value.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/lds-size.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.gfx10.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.swap.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.wave32.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.sleep.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.vote.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/read_register.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/readcyclecounter.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/ret.ll create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-mul24-knownbits.ll create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-unroll-threshold.ll create mode 100644 llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll create mode 100644 llvm/test/CodeGen/AMDGPU/call-constant.ll create mode 100644 llvm/test/CodeGen/AMDGPU/cc-sgpr-limit.ll create mode 100644 llvm/test/CodeGen/AMDGPU/cc-sgpr-over-limit.ll create mode 100644 llvm/test/CodeGen/AMDGPU/computeNumSignBits-mul.ll create mode 100644 llvm/test/CodeGen/AMDGPU/cse-phi-incoming-val.ll create mode 100644 llvm/test/CodeGen/AMDGPU/dead-machine-elim-after-dead-lane.ll create mode 100644 llvm/test/CodeGen/AMDGPU/divergence-at-use.ll create mode 100644 llvm/test/CodeGen/AMDGPU/dpp_combine.ll create mode 100644 llvm/test/CodeGen/AMDGPU/extract-subvector.ll create mode 100644 llvm/test/CodeGen/AMDGPU/fneg-fold-legalize-dag-increase-insts.ll create mode 100644 llvm/test/CodeGen/AMDGPU/fpow.ll create mode 100644 llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll create mode 100644 llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent-v3.ll create mode 100644 llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent.ll create mode 100644 llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3.ll create mode 100644 llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present.ll create mode 100644 llvm/test/CodeGen/AMDGPU/kill-infinite-loop.ll create mode 100644 llvm/test/CodeGen/AMDGPU/lcssa-optnone.ll create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.wave32.ll create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll create mode 100644 llvm/test/CodeGen/AMDGPU/mfma-loop.ll create mode 100644 llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll create mode 100644 llvm/test/CodeGen/AMDGPU/occupancy-levels.ll create mode 100644 llvm/test/CodeGen/AMDGPU/offset-split-flat.ll create mode 100644 llvm/test/CodeGen/AMDGPU/offset-split-global.ll create mode 100644 llvm/test/CodeGen/AMDGPU/opencl-printf-no-hostcall.ll create mode 100644 llvm/test/CodeGen/AMDGPU/opencl-printf.ll create mode 100644 llvm/test/CodeGen/AMDGPU/smrd_vmem_war.ll create mode 100644 llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll create mode 100644 llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll create mode 100644 llvm/test/CodeGen/AMDGPU/sub-zext-cc-zext-cc.ll create mode 100644 llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll create mode 100644 llvm/test/CodeGen/ARM/ParallelDSP/blocks.ll create mode 100644 llvm/test/CodeGen/ARM/ParallelDSP/complex_dot_prod.ll create mode 100644 llvm/test/CodeGen/ARM/ParallelDSP/exchange.ll create mode 100644 llvm/test/CodeGen/ARM/ParallelDSP/overlapping.ll create mode 100644 llvm/test/CodeGen/ARM/ParallelDSP/pr42729.ll create mode 100644 llvm/test/CodeGen/ARM/ParallelDSP/pr43073.ll create mode 100644 llvm/test/CodeGen/ARM/ParallelDSP/sext-acc.ll create mode 100644 llvm/test/CodeGen/ARM/cfguard-checks.ll create mode 100644 llvm/test/CodeGen/ARM/cfguard-module-flag.ll create mode 100644 llvm/test/CodeGen/ARM/cls.ll create mode 100644 llvm/test/CodeGen/ARM/cmov_fp16.ll create mode 100644 llvm/test/CodeGen/ARM/csr-split.ll create mode 100644 llvm/test/CodeGen/ARM/debuginfo-split-carryexpr.ll create mode 100644 llvm/test/CodeGen/ARM/dwarf-frame.ll create mode 100644 llvm/test/CodeGen/ARM/fp-intrinsics.ll create mode 100644 llvm/test/CodeGen/ARM/fp16-fusedMAC.ll create mode 100644 llvm/test/CodeGen/ARM/fragmented-args-multiple-regs.ll create mode 100644 llvm/test/CodeGen/ARM/gnu_mcount_nc.ll create mode 100644 llvm/test/CodeGen/ARM/intrinsics-cmse.ll create mode 100644 llvm/test/CodeGen/ARM/ipra-exact-definition.ll create mode 100644 llvm/test/CodeGen/ARM/ipra-no-csr.ll create mode 100644 llvm/test/CodeGen/ARM/ipra-r0-returned.ll create mode 100644 llvm/test/CodeGen/ARM/ipra.ll create mode 100644 llvm/test/CodeGen/ARM/legalize-bitcast.ll create mode 100644 llvm/test/CodeGen/ARM/neon-vcadd.ll create mode 100644 llvm/test/CodeGen/ARM/neon-vqaddsub-upgrade.ll create mode 100644 llvm/test/CodeGen/ARM/postrasched.ll create mode 100644 llvm/test/CodeGen/ARM/qdadd.ll create mode 100644 llvm/test/CodeGen/ARM/sadd_sat.ll create mode 100644 llvm/test/CodeGen/ARM/sadd_sat_plus.ll create mode 100644 llvm/test/CodeGen/ARM/signext-inreg.ll create mode 100644 llvm/test/CodeGen/ARM/softfp-constant-comparison.ll create mode 100644 llvm/test/CodeGen/ARM/ssub_sat.ll create mode 100644 llvm/test/CodeGen/ARM/ssub_sat_plus.ll create mode 100644 llvm/test/CodeGen/ARM/uadd_sat.ll create mode 100644 llvm/test/CodeGen/ARM/uadd_sat_plus.ll create mode 100644 llvm/test/CodeGen/ARM/usub_sat.ll create mode 100644 llvm/test/CodeGen/ARM/usub_sat_plus.ll create mode 100644 llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-soft-float.ll create mode 100644 llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-strict.ll create mode 100644 llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-strict.ll create mode 100644 llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-1.ll create mode 100644 llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-2.ll create mode 100644 llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-3.ll create mode 100644 llvm/test/CodeGen/BPF/BTF/extern-builtin.ll create mode 100644 llvm/test/CodeGen/BPF/BTF/extern-func-arg.ll create mode 100644 llvm/test/CodeGen/BPF/BTF/extern-var-func-weak-section.ll create mode 100644 llvm/test/CodeGen/BPF/BTF/extern-var-func-weak.ll create mode 100644 llvm/test/CodeGen/BPF/BTF/extern-var-func.ll create mode 100644 llvm/test/CodeGen/BPF/BTF/extern-var-section.ll create mode 100644 llvm/test/CodeGen/BPF/BTF/extern-var-struct-weak.ll create mode 100644 llvm/test/CodeGen/BPF/BTF/extern-var-struct.ll create mode 100644 llvm/test/CodeGen/BPF/BTF/extern-var-weak-section.ll create mode 100644 llvm/test/CodeGen/BPF/BTF/static-func.ll create mode 100644 llvm/test/CodeGen/BPF/BTF/weak-global-2.ll create mode 100644 llvm/test/CodeGen/BPF/BTF/weak-global.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/field-reloc-alu32.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/field-reloc-bitfield-1.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/field-reloc-bitfield-2.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/intrinsic-fieldinfo-byte-size-1.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/intrinsic-fieldinfo-byte-size-2.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/intrinsic-fieldinfo-byte-size-3.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/intrinsic-fieldinfo-byte-size-4.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/intrinsic-fieldinfo-existence-1.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/intrinsic-fieldinfo-existence-2.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/intrinsic-fieldinfo-existence-3.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/intrinsic-fieldinfo-lshift-1.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/intrinsic-fieldinfo-lshift-2.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/intrinsic-fieldinfo-rshift-1.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/intrinsic-fieldinfo-rshift-2.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/intrinsic-fieldinfo-rshift-3.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/intrinsic-fieldinfo-signedness-1.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/intrinsic-fieldinfo-signedness-2.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/intrinsic-fieldinfo-signedness-3.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/intrinsic-transforms.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/no-elf-ama-symbol.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/no-narrow-load.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/offset-reloc-cast-array-1.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/offset-reloc-cast-array-2.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/offset-reloc-cast-struct-1.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/offset-reloc-cast-struct-2.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/offset-reloc-cast-struct-3.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/offset-reloc-cast-union-1.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/offset-reloc-cast-union-2.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/offset-reloc-end-load.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/offset-reloc-end-ret.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/offset-reloc-fieldinfo-1.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/offset-reloc-fieldinfo-2.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/offset-reloc-global-1.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/offset-reloc-global-2.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/offset-reloc-global-3.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/offset-reloc-ignore.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/offset-reloc-middle-chain.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/offset-reloc-multi-array-1.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/offset-reloc-multi-array-2.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/offset-reloc-pointer-1.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/offset-reloc-pointer-2.ll create mode 100644 llvm/test/CodeGen/BPF/callx.ll create mode 100644 llvm/test/CodeGen/BPF/optnone-1.ll create mode 100644 llvm/test/CodeGen/BPF/remove_truncate_6.ll create mode 100644 llvm/test/CodeGen/Generic/DbgValueAggregate.ll create mode 100644 llvm/test/CodeGen/Hexagon/64bit_tstbit.ll create mode 100644 llvm/test/CodeGen/Hexagon/autohvx/isel-setcc-v256i1.ll create mode 100644 llvm/test/CodeGen/Hexagon/autohvx/minmax-128b.ll create mode 100644 llvm/test/CodeGen/Hexagon/autohvx/minmax-64b.ll create mode 100644 llvm/test/CodeGen/Hexagon/isel-bitcast-v8i1-i8.ll create mode 100644 llvm/test/CodeGen/Hexagon/isel-bitcast-v8i8-v4i16.ll create mode 100644 llvm/test/CodeGen/Hexagon/isel-minmax-v64bit.ll create mode 100644 llvm/test/CodeGen/Hexagon/isel-vselect-v4i8.ll create mode 100644 llvm/test/CodeGen/Hexagon/muxii-bug.ll create mode 100644 llvm/test/CodeGen/Hexagon/packetizer-resources.ll create mode 100644 llvm/test/CodeGen/Hexagon/reg-by-name.ll create mode 100644 llvm/test/CodeGen/Hexagon/signext-inreg.ll create mode 100644 llvm/test/CodeGen/Hexagon/swp-crash-iter.ll create mode 100644 llvm/test/CodeGen/Hexagon/swp-epilog-phi12.ll create mode 100644 llvm/test/CodeGen/MIR/Mips/setRegClassOrRegBank.ll create mode 100644 llvm/test/CodeGen/MSP430/selectcc.ll create mode 100644 llvm/test/CodeGen/MSP430/shift-amount-threshold-b.ll create mode 100644 llvm/test/CodeGen/MSP430/shift-amount-threshold.ll create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/irtranslator/aggregate_struct_return.ll create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/irtranslator/sret_pointer.ll create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/irtranslator/var_arg.ll create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add_vec.ll create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add_vec_builtin.ll create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/aggregate_struct_return.ll create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitreverse.ll create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/brindirect.ll create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bswap.ll create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/dyn_stackalloc.ll create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fabs_vec.ll create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fabs_vec_builtin.ll create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fence.ll create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/floating_point_vec_arithmetic_operations.ll create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/floating_point_vec_arithmetic_operations_builtin.ll create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fsqrt_vec.ll create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fsqrt_vec_builtin.ll create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/implicit_def.ll create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/inttoptr_and_ptrtoint.ll create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/jump_table_and_brjt.ll create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/load_store_fold.ll create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/load_store_vec.ll create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul_vec.ll create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul_vec_builtin.ll create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/rem_and_div_vec.ll create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/rem_and_div_vec_builtin.ll create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sret_pointer.ll create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sub_vec.ll create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sub_vec_builtin.ll create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/trap.ll create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/trunc.ll create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/var_arg.ll create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/zext_and_sext.ll create mode 100644 llvm/test/CodeGen/Mips/atomic-min-max-64.ll create mode 100644 llvm/test/CodeGen/Mips/atomic-min-max.ll create mode 100644 llvm/test/CodeGen/Mips/constraint-empty.ll create mode 100644 llvm/test/CodeGen/Mips/copy-fp64.ll create mode 100644 llvm/test/CodeGen/Mips/cpus-no-mips64.ll create mode 100644 llvm/test/CodeGen/Mips/cpus.ll create mode 100644 llvm/test/CodeGen/Mips/mcount.ll create mode 100644 llvm/test/CodeGen/Mips/msa/nori.b.ll create mode 100644 llvm/test/CodeGen/Mips/msa/remat-ldi.ll create mode 100644 llvm/test/CodeGen/Mips/no-frame-pointer-elim.ll create mode 100644 llvm/test/CodeGen/Mips/pr42736.ll create mode 100644 llvm/test/CodeGen/NVPTX/bug41651.ll create mode 100644 llvm/test/CodeGen/NVPTX/shfl-p.ll create mode 100644 llvm/test/CodeGen/NVPTX/shfl-sync-p.ll create mode 100644 llvm/test/CodeGen/PowerPC/absol-jump-table-enabled.ll create mode 100644 llvm/test/CodeGen/PowerPC/aix-byval-param.ll create mode 100644 llvm/test/CodeGen/PowerPC/aix-cc-abi.ll create mode 100644 llvm/test/CodeGen/PowerPC/aix-cc-altivec.ll create mode 100644 llvm/test/CodeGen/PowerPC/aix-external-sym-sdnode-lowering.ll create mode 100644 llvm/test/CodeGen/PowerPC/aix-func-dsc-gen.ll create mode 100644 llvm/test/CodeGen/PowerPC/aix-indirect-call.ll create mode 100644 llvm/test/CodeGen/PowerPC/aix-lower-block-address.ll create mode 100644 llvm/test/CodeGen/PowerPC/aix-lower-constant-pool-index.ll create mode 100644 llvm/test/CodeGen/PowerPC/aix-lower-jump-table.ll create mode 100644 llvm/test/CodeGen/PowerPC/aix-lr.ll create mode 100644 llvm/test/CodeGen/PowerPC/aix-nest-param.ll create mode 100644 llvm/test/CodeGen/PowerPC/aix-readonly-with-relocation.ll create mode 100644 llvm/test/CodeGen/PowerPC/aix-reference-func-addr-const.ll create mode 100644 llvm/test/CodeGen/PowerPC/aix-return55.ll create mode 100644 llvm/test/CodeGen/PowerPC/aix-space.ll create mode 100644 llvm/test/CodeGen/PowerPC/aix-sret-param.ll create mode 100644 llvm/test/CodeGen/PowerPC/aix-stackargs.ll create mode 100644 llvm/test/CodeGen/PowerPC/aix-trampoline.ll create mode 100644 llvm/test/CodeGen/PowerPC/aix-undef-func-call.ll create mode 100644 llvm/test/CodeGen/PowerPC/aix-user-defined-memcpy.ll create mode 100644 llvm/test/CodeGen/PowerPC/aix-weak-undef-func-call.ll create mode 100644 llvm/test/CodeGen/PowerPC/aix-xcoff-data-only-notoc.ll create mode 100644 llvm/test/CodeGen/PowerPC/aix-xcoff-data.ll create mode 100644 llvm/test/CodeGen/PowerPC/aix-xcoff-lcomm.ll create mode 100644 llvm/test/CodeGen/PowerPC/aix-xcoff-mergeable-const.ll create mode 100644 llvm/test/CodeGen/PowerPC/aix-xcoff-mergeable-str.ll create mode 100644 llvm/test/CodeGen/PowerPC/aix-xcoff-rodata.ll create mode 100644 llvm/test/CodeGen/PowerPC/aix-xcoff-textdisassembly.ll create mode 100644 llvm/test/CodeGen/PowerPC/aix-xcoff-toc.ll create mode 100644 llvm/test/CodeGen/PowerPC/and-mask.ll create mode 100644 llvm/test/CodeGen/PowerPC/build-vector-allones.ll create mode 100644 llvm/test/CodeGen/PowerPC/check-cpu.ll create mode 100644 llvm/test/CodeGen/PowerPC/csr-split.ll create mode 100644 llvm/test/CodeGen/PowerPC/dform-adjust.ll create mode 100644 llvm/test/CodeGen/PowerPC/elf-common.ll create mode 100644 llvm/test/CodeGen/PowerPC/fdiv.ll create mode 100644 llvm/test/CodeGen/PowerPC/float-vector-gather.ll create mode 100644 llvm/test/CodeGen/PowerPC/fma-combine.ll create mode 100644 llvm/test/CodeGen/PowerPC/fold-rlwinm-1.ll create mode 100644 llvm/test/CodeGen/PowerPC/fp-intrinsics-fptosi-legal.ll create mode 100644 llvm/test/CodeGen/PowerPC/ifunc.ll create mode 100644 llvm/test/CodeGen/PowerPC/inline-asm-vsx-clobbers.ll create mode 100644 llvm/test/CodeGen/PowerPC/inlineasm-extendedmne.ll create mode 100644 llvm/test/CodeGen/PowerPC/instr-properties.ll create mode 100644 llvm/test/CodeGen/PowerPC/load-and-splat.ll create mode 100644 llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll create mode 100644 llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll create mode 100644 llvm/test/CodeGen/PowerPC/lower-globaladdr32-aix-asm.ll create mode 100644 llvm/test/CodeGen/PowerPC/lower-globaladdr32-aix.ll create mode 100644 llvm/test/CodeGen/PowerPC/lower-globaladdr64-aix-asm.ll create mode 100644 llvm/test/CodeGen/PowerPC/lower-globaladdr64-aix.ll create mode 100644 llvm/test/CodeGen/PowerPC/lower-massv-attr.ll create mode 100644 llvm/test/CodeGen/PowerPC/lower-massv.ll create mode 100644 llvm/test/CodeGen/PowerPC/lsr-insns-cost.ll create mode 100644 llvm/test/CodeGen/PowerPC/mi-peephole-splat.ll create mode 100644 llvm/test/CodeGen/PowerPC/no-duplicate.ll create mode 100644 llvm/test/CodeGen/PowerPC/popcnt-zext.ll create mode 100644 llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll create mode 100644 llvm/test/CodeGen/PowerPC/pr25080.ll create mode 100644 llvm/test/CodeGen/PowerPC/pr41088.ll create mode 100644 llvm/test/CodeGen/PowerPC/pr43527.ll create mode 100644 llvm/test/CodeGen/PowerPC/pr44183.ll create mode 100644 llvm/test/CodeGen/PowerPC/pr44239.ll create mode 100644 llvm/test/CodeGen/PowerPC/reduce_scalarization02.ll create mode 100644 llvm/test/CodeGen/PowerPC/remove-redundant-load-imm.ll create mode 100644 llvm/test/CodeGen/PowerPC/scalar-min-max.ll create mode 100644 llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll create mode 100644 llvm/test/CodeGen/PowerPC/sext-vector-inreg.ll create mode 100644 llvm/test/CodeGen/PowerPC/shrink-wrap.ll create mode 100644 llvm/test/CodeGen/PowerPC/sms-cpy-1.ll create mode 100644 llvm/test/CodeGen/PowerPC/sms-phi-1.ll create mode 100644 llvm/test/CodeGen/PowerPC/sms-phi-2.ll create mode 100644 llvm/test/CodeGen/PowerPC/sms-phi-3.ll create mode 100644 llvm/test/CodeGen/PowerPC/sms-phi-5.ll create mode 100644 llvm/test/CodeGen/PowerPC/smulfixsat.ll create mode 100644 llvm/test/CodeGen/PowerPC/spill_p9_setb.ll create mode 100644 llvm/test/CodeGen/PowerPC/srem-lkk.ll create mode 100644 llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll create mode 100644 llvm/test/CodeGen/PowerPC/test_func_desc.ll create mode 100644 llvm/test/CodeGen/PowerPC/umulfixsat.ll create mode 100644 llvm/test/CodeGen/PowerPC/unaligned-floats.ll create mode 100644 llvm/test/CodeGen/PowerPC/urem-lkk.ll create mode 100644 llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll create mode 100644 llvm/test/CodeGen/PowerPC/vavg.ll create mode 100644 llvm/test/CodeGen/PowerPC/vec-bswap.ll create mode 100644 llvm/test/CodeGen/PowerPC/vector-extend-sign.ll create mode 100644 llvm/test/CodeGen/PowerPC/vector-rotates.ll create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/calllowering-ret.ll create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/irtranslator-calllowering.ll create mode 100644 llvm/test/CodeGen/RISCV/copysign-casts.ll create mode 100644 llvm/test/CodeGen/RISCV/disjoint.ll create mode 100644 llvm/test/CodeGen/RISCV/fastcc-float.ll create mode 100644 llvm/test/CodeGen/RISCV/fastcc-int.ll create mode 100644 llvm/test/CodeGen/RISCV/fp16-promote.ll create mode 100644 llvm/test/CodeGen/RISCV/get-register-invalid.ll create mode 100644 llvm/test/CodeGen/RISCV/get-register-noreserve.ll create mode 100644 llvm/test/CodeGen/RISCV/get-register-reserve.ll create mode 100644 llvm/test/CodeGen/RISCV/inline-asm-abi-names.ll create mode 100644 llvm/test/CodeGen/RISCV/inline-asm-clobbers.ll create mode 100644 llvm/test/CodeGen/RISCV/inline-asm-d-abi-names.ll create mode 100644 llvm/test/CodeGen/RISCV/inline-asm-f-abi-names.ll create mode 100644 llvm/test/CodeGen/RISCV/interrupt-attr-callee.ll create mode 100644 llvm/test/CodeGen/RISCV/intrinsics/trap.ll create mode 100644 llvm/test/CodeGen/RISCV/mir-target-flags.ll create mode 100644 llvm/test/CodeGen/RISCV/module-target-abi.ll create mode 100644 llvm/test/CodeGen/RISCV/module-target-abi2.ll create mode 100644 llvm/test/CodeGen/RISCV/reserved-reg-errors.ll create mode 100644 llvm/test/CodeGen/RISCV/reserved-regs.ll create mode 100644 llvm/test/CodeGen/RISCV/rv64-large-stack.ll create mode 100644 llvm/test/CodeGen/RISCV/rv64i-complex-float.ll create mode 100644 llvm/test/CodeGen/RISCV/rv64i-single-softfloat.ll create mode 100644 llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll create mode 100644 llvm/test/CodeGen/RISCV/rv64m-w-insts-legalization.ll create mode 100644 llvm/test/CodeGen/RISCV/shrinkwrap.ll create mode 100644 llvm/test/CodeGen/RISCV/split-sp-adjust.ll create mode 100644 llvm/test/CodeGen/RISCV/srem-lkk.ll create mode 100644 llvm/test/CodeGen/RISCV/srem-vector-lkk.ll create mode 100644 llvm/test/CodeGen/RISCV/stack-realignment-with-variable-sized-objects.ll create mode 100644 llvm/test/CodeGen/RISCV/subtarget-features-std-ext.ll create mode 100644 llvm/test/CodeGen/RISCV/urem-lkk.ll create mode 100644 llvm/test/CodeGen/RISCV/urem-vector-lkk.ll create mode 100644 llvm/test/CodeGen/SPARC/64atomics.ll create mode 100644 llvm/test/CodeGen/SystemZ/fentry-insertion.ll create mode 100644 llvm/test/CodeGen/SystemZ/fp-mul-13.ll create mode 100644 llvm/test/CodeGen/SystemZ/fp-strict-cmp-01.ll create mode 100644 llvm/test/CodeGen/SystemZ/fp-strict-cmp-02.ll create mode 100644 llvm/test/CodeGen/SystemZ/fp-strict-cmp-03.ll create mode 100644 llvm/test/CodeGen/SystemZ/fp-strict-cmp-04.ll create mode 100644 llvm/test/CodeGen/SystemZ/fp-strict-cmp-05.ll create mode 100644 llvm/test/CodeGen/SystemZ/fp-strict-cmp-06.ll create mode 100644 llvm/test/CodeGen/SystemZ/fp-strict-cmps-01.ll create mode 100644 llvm/test/CodeGen/SystemZ/fp-strict-cmps-02.ll create mode 100644 llvm/test/CodeGen/SystemZ/fp-strict-cmps-03.ll create mode 100644 llvm/test/CodeGen/SystemZ/fp-strict-cmps-04.ll create mode 100644 llvm/test/CodeGen/SystemZ/fp-strict-cmps-05.ll create mode 100644 llvm/test/CodeGen/SystemZ/fp-strict-cmps-06.ll create mode 100644 llvm/test/CodeGen/SystemZ/fp-strict-conv-05.ll create mode 100644 llvm/test/CodeGen/SystemZ/fp-strict-conv-06.ll create mode 100644 llvm/test/CodeGen/SystemZ/fp-strict-conv-07.ll create mode 100644 llvm/test/CodeGen/SystemZ/fp-strict-conv-08.ll create mode 100644 llvm/test/CodeGen/SystemZ/fp-strict-conv-09.ll create mode 100644 llvm/test/CodeGen/SystemZ/fp-strict-conv-10.ll create mode 100644 llvm/test/CodeGen/SystemZ/fp-strict-conv-11.ll create mode 100644 llvm/test/CodeGen/SystemZ/fp-strict-conv-12.ll create mode 100644 llvm/test/CodeGen/SystemZ/fp-strict-conv-13.ll create mode 100644 llvm/test/CodeGen/SystemZ/fp-strict-conv-14.ll create mode 100644 llvm/test/CodeGen/SystemZ/fp-strict-conv-16.ll create mode 100644 llvm/test/CodeGen/SystemZ/fp-strict-mul-12.ll create mode 100644 llvm/test/CodeGen/SystemZ/fp-strict-mul-13.ll create mode 100644 llvm/test/CodeGen/SystemZ/frame-22.ll create mode 100644 llvm/test/CodeGen/SystemZ/ghc-cc-01.ll create mode 100644 llvm/test/CodeGen/SystemZ/ghc-cc-02.ll create mode 100644 llvm/test/CodeGen/SystemZ/ghc-cc-03.ll create mode 100644 llvm/test/CodeGen/SystemZ/ghc-cc-04.ll create mode 100644 llvm/test/CodeGen/SystemZ/ghc-cc-05.ll create mode 100644 llvm/test/CodeGen/SystemZ/ghc-cc-06.ll create mode 100644 llvm/test/CodeGen/SystemZ/ghc-cc-07.ll create mode 100644 llvm/test/CodeGen/SystemZ/int-cmp-56.ll create mode 100644 llvm/test/CodeGen/SystemZ/int-cmp-57.ll create mode 100644 llvm/test/CodeGen/SystemZ/la-05.ll create mode 100644 llvm/test/CodeGen/SystemZ/mnop-mcount-01.ll create mode 100644 llvm/test/CodeGen/SystemZ/mnop-mcount-02.ll create mode 100644 llvm/test/CodeGen/SystemZ/mrecord-mcount-01.ll create mode 100644 llvm/test/CodeGen/SystemZ/mrecord-mcount-02.ll create mode 100644 llvm/test/CodeGen/SystemZ/vec-strict-cmp-01.ll create mode 100644 llvm/test/CodeGen/SystemZ/vec-strict-cmp-02.ll create mode 100644 llvm/test/CodeGen/SystemZ/vec-strict-cmp-03.ll create mode 100644 llvm/test/CodeGen/SystemZ/vec-strict-cmps-01.ll create mode 100644 llvm/test/CodeGen/SystemZ/vec-strict-cmps-02.ll create mode 100644 llvm/test/CodeGen/SystemZ/vec-strict-cmps-03.ll create mode 100644 llvm/test/CodeGen/SystemZ/vec-strict-conv-01.ll create mode 100644 llvm/test/CodeGen/SystemZ/vec-strict-conv-02.ll create mode 100644 llvm/test/CodeGen/SystemZ/vec-strict-conv-03.ll create mode 100644 llvm/test/CodeGen/Thumb/cmp-and-fold.ll create mode 100644 llvm/test/CodeGen/Thumb/scheduler-clone-cpsr-def.ll create mode 100644 llvm/test/CodeGen/Thumb2/LowOverheadLoops/basic-tail-pred.ll create mode 100644 llvm/test/CodeGen/Thumb2/LowOverheadLoops/branch-targets.ll create mode 100644 llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll create mode 100644 llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll create mode 100644 llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll create mode 100644 llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll create mode 100644 llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll create mode 100644 llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-le-simple.ll create mode 100644 llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-narrow.ll create mode 100644 llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-pattern-fail.ll create mode 100644 llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll create mode 100644 llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-reduce.ll create mode 100644 llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll create mode 100644 llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll create mode 100644 llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-unroll.ll create mode 100644 llvm/test/CodeGen/Thumb2/bug-subw.ll create mode 100644 llvm/test/CodeGen/Thumb2/csel.ll create mode 100644 llvm/test/CodeGen/Thumb2/lsll0.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-be.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-bitreverse.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-bswap.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-ctlz.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-ctpop.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-cttz.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-extractelt.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-fmas.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-gather-ind16-scaled.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-gather-ind16-unscaled.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-gather-ind32-scaled.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-gather-ind32-unscaled.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-gather-ind8-unscaled.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-gather-ptrs.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-gather-scatter-opt.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/load-store.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/predicates.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/scalar-shifts.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/scatter-gather.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vabavq.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vadc-multiple.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vadc.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vaddq.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vandq.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vbicq.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vcaddq.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vcmlaq.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vcmulq.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vcvt.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm-dyadic.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-var.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/veorq.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vhaddq.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vhsubq.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vld24.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vldr.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxnmq.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxq.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vminq.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vminvq.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vmldav.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vmlldav.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vmulhq.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vmullbq.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vmulltq.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vmulq.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vornq.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vorrq.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vqaddq.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vqdmulhq.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vqrdmulhq.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vqsubq.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vrhaddq.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vrmulhq.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vsubq.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-masked-ldst-offset.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-masked-ldst-postinc.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-masked-ldst-preinc.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-masked-load.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-masked-store.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-multivec-spill.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-phireg.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-pred-and.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-pred-build-const.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-pred-build-var.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-pred-ext.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-pred-not.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-pred-or.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-pred-shuffle.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-pred-spill.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-pred-threshold.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-pred-xor.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-shifts-scalar.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-shuffleext.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-shufflemov.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-stack.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-vaddqr.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-vaddv.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-vcmp.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-vcmpf.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-vcmpr.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-vcmpz.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-vctp.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-vector-spill.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-vfma.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-vhaddsub.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-vld2.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-vld3.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-vld4.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-vldst4.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-vmaxv.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-vmla.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-vmovn.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-vmulqr.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-vpsel.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-vpt-from-intrinsics.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-vst2.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-vst3.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-vst4.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-vsubqr.ll create mode 100644 llvm/test/CodeGen/Thumb2/t2peephole-t2ADDrr-to-t2ADDri.ll create mode 100644 llvm/test/CodeGen/Thumb2/vqabs.ll create mode 100644 llvm/test/CodeGen/Thumb2/vqneg.ll create mode 100644 llvm/test/CodeGen/VE/simple_prologue_epilogue.ll create mode 100644 llvm/test/CodeGen/VE/target_support.ll create mode 100644 llvm/test/CodeGen/WebAssembly/export-name.ll create mode 100644 llvm/test/CodeGen/WebAssembly/lower-em-exceptions-resume-only.ll create mode 100644 llvm/test/CodeGen/WebAssembly/lower-em-sjlj-alias.ll create mode 100644 llvm/test/CodeGen/WebAssembly/lower-em-sjlj-sret.ll create mode 100644 llvm/test/CodeGen/WebAssembly/simd-load-splat.ll create mode 100644 llvm/test/CodeGen/WinCFGuard/cfguard-cast.ll create mode 100644 llvm/test/CodeGen/X86/GlobalISel/ptr-add.ll create mode 100644 llvm/test/CodeGen/X86/align-branch-boundary-default.ll create mode 100644 llvm/test/CodeGen/X86/align-branch-boundary-noautopadding.ll create mode 100644 llvm/test/CodeGen/X86/align-branch-boundary-suppressions.ll create mode 100644 llvm/test/CodeGen/X86/align-down-const.ll create mode 100644 llvm/test/CodeGen/X86/align-down.ll create mode 100644 llvm/test/CodeGen/X86/atomic-non-integer-fp128.ll create mode 100644 llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll create mode 100644 llvm/test/CodeGen/X86/avx512-cmp-mask.ll create mode 100644 llvm/test/CodeGen/X86/avx512vbmi2-funnel-shifts.ll create mode 100644 llvm/test/CodeGen/X86/avx512vbmi2vl-funnel-shifts.ll create mode 100644 llvm/test/CodeGen/X86/avx512vnni.ll create mode 100644 llvm/test/CodeGen/X86/cfguard-checks.ll create mode 100644 llvm/test/CodeGen/X86/cfguard-module-flag.ll create mode 100644 llvm/test/CodeGen/X86/cfguard-x86-64-vectorcall.ll create mode 100644 llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll create mode 100644 llvm/test/CodeGen/X86/codegen-prepare-collapse.ll create mode 100644 llvm/test/CodeGen/X86/coff-fp-section-name.ll create mode 100644 llvm/test/CodeGen/X86/combineIncDecVector-crash.ll create mode 100644 llvm/test/CodeGen/X86/conditional-tailcall-pgso.ll create mode 100644 llvm/test/CodeGen/X86/csr-split.ll create mode 100644 llvm/test/CodeGen/X86/dbg-value-func-arg.ll create mode 100644 llvm/test/CodeGen/X86/debug-loclists-lto.ll create mode 100644 llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll create mode 100644 llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll create mode 100644 llvm/test/CodeGen/X86/dtor-priority-coff.ll create mode 100644 llvm/test/CodeGen/X86/fma-fneg-combine-2.ll create mode 100644 llvm/test/CodeGen/X86/fmf-reduction.ll create mode 100644 llvm/test/CodeGen/X86/fp-intrinsics-flags-x86_64.ll create mode 100644 llvm/test/CodeGen/X86/fp-intrinsics-flags.ll create mode 100644 llvm/test/CodeGen/X86/fp-intrinsics-fma.ll create mode 100644 llvm/test/CodeGen/X86/fp-strict-libcalls-msvc32.ll create mode 100644 llvm/test/CodeGen/X86/fp-strict-scalar-cmp.ll create mode 100644 llvm/test/CodeGen/X86/fp-strict-scalar-fptoint.ll create mode 100644 llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll create mode 100644 llvm/test/CodeGen/X86/fp-strict-scalar-round.ll create mode 100644 llvm/test/CodeGen/X86/fp-strict-scalar.ll create mode 100644 llvm/test/CodeGen/X86/fp128-cast-strict.ll create mode 100644 llvm/test/CodeGen/X86/fp128-libcalls-strict.ll create mode 100644 llvm/test/CodeGen/X86/fp80-strict-scalar-cmp.ll create mode 100644 llvm/test/CodeGen/X86/fp80-strict-scalar.ll create mode 100644 llvm/test/CodeGen/X86/haddsub-broadcast.ll create mode 100644 llvm/test/CodeGen/X86/isel-blendi-gettargetconstant.ll create mode 100644 llvm/test/CodeGen/X86/lower-ptrmask.ll create mode 100644 llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll create mode 100644 llvm/test/CodeGen/X86/memcmp-pgso.ll create mode 100644 llvm/test/CodeGen/X86/mixed-ptr-sizes.ll create mode 100644 llvm/test/CodeGen/X86/ms-inline-asm-PR44272.ll create mode 100644 llvm/test/CodeGen/X86/mulfix_combine.ll create mode 100644 llvm/test/CodeGen/X86/musttail-inalloca.ll create mode 100644 llvm/test/CodeGen/X86/musttail-tailcc.ll create mode 100644 llvm/test/CodeGen/X86/mxcsr-reg-usage.ll create mode 100644 llvm/test/CodeGen/X86/no-sse-win64.ll create mode 100644 llvm/test/CodeGen/X86/no-sse-x86.ll create mode 100644 llvm/test/CodeGen/X86/nomovtopush.ll create mode 100644 llvm/test/CodeGen/X86/noreturn-call-linux.ll create mode 100644 llvm/test/CodeGen/X86/noreturn-call-win64.ll create mode 100644 llvm/test/CodeGen/X86/not-of-dec.ll create mode 100644 llvm/test/CodeGen/X86/offset-operator.ll create mode 100644 llvm/test/CodeGen/X86/patchable-function-entry-ibt.ll create mode 100644 llvm/test/CodeGen/X86/patchable-function-entry.ll create mode 100644 llvm/test/CodeGen/X86/powi-windows.ll create mode 100644 llvm/test/CodeGen/X86/pr42727.ll create mode 100644 llvm/test/CodeGen/X86/pr42870.ll create mode 100644 llvm/test/CodeGen/X86/pr42905.ll create mode 100644 llvm/test/CodeGen/X86/pr42909.ll create mode 100644 llvm/test/CodeGen/X86/pr42998.ll create mode 100644 llvm/test/CodeGen/X86/pr43157.ll create mode 100644 llvm/test/CodeGen/X86/pr43507.ll create mode 100644 llvm/test/CodeGen/X86/pr43509.ll create mode 100644 llvm/test/CodeGen/X86/pr43529.ll create mode 100644 llvm/test/CodeGen/X86/pr43575.ll create mode 100644 llvm/test/CodeGen/X86/pr43820.ll create mode 100644 llvm/test/CodeGen/X86/pr43866.ll create mode 100644 llvm/test/CodeGen/X86/pr43952.ll create mode 100644 llvm/test/CodeGen/X86/pr44140.ll create mode 100644 llvm/test/CodeGen/X86/pr44396.ll create mode 100644 llvm/test/CodeGen/X86/pr44412.ll create mode 100644 llvm/test/CodeGen/X86/pr44812.ll create mode 100644 llvm/test/CodeGen/X86/psadbw.ll create mode 100644 llvm/test/CodeGen/X86/relptr-rodata.ll create mode 100644 llvm/test/CodeGen/X86/sadd_sat_plus.ll create mode 100644 llvm/test/CodeGen/X86/scalar-fp-to-i32.ll create mode 100644 llvm/test/CodeGen/X86/sdiv_fix.ll create mode 100644 llvm/test/CodeGen/X86/select-sra.ll create mode 100644 llvm/test/CodeGen/X86/select-testb-volatile-load.ll create mode 100644 llvm/test/CodeGen/X86/shift-by-signext.ll create mode 100644 llvm/test/CodeGen/X86/shift-logic.ll create mode 100644 llvm/test/CodeGen/X86/shrink-compare-pgso.ll create mode 100644 llvm/test/CodeGen/X86/srem-lkk.ll create mode 100644 llvm/test/CodeGen/X86/srem-seteq-optsize.ll create mode 100644 llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll create mode 100644 llvm/test/CodeGen/X86/srem-seteq-vec-splat.ll create mode 100644 llvm/test/CodeGen/X86/srem-seteq.ll create mode 100644 llvm/test/CodeGen/X86/srem-vector-lkk.ll create mode 100644 llvm/test/CodeGen/X86/ssub_sat_plus.ll create mode 100644 llvm/test/CodeGen/X86/stack-folding-int-avx512vnni.ll create mode 100644 llvm/test/CodeGen/X86/stack-protector-2.ll create mode 100644 llvm/test/CodeGen/X86/stack-protector-strong-macho-win32-xor.ll create mode 100644 llvm/test/CodeGen/X86/statepoint-no-realign-stack.ll create mode 100644 llvm/test/CodeGen/X86/sub-of-bias.ll create mode 100644 llvm/test/CodeGen/X86/tail-call-deref.ll create mode 100644 llvm/test/CodeGen/X86/tailcall-assume.ll create mode 100644 llvm/test/CodeGen/X86/tailcall-tailcc.ll create mode 100644 llvm/test/CodeGen/X86/tailcc-calleesave.ll create mode 100644 llvm/test/CodeGen/X86/tailcc-disable-tail-calls.ll create mode 100644 llvm/test/CodeGen/X86/tailcc-fastcc.ll create mode 100644 llvm/test/CodeGen/X86/tailcc-fastisel.ll create mode 100644 llvm/test/CodeGen/X86/tailcc-largecode.ll create mode 100644 llvm/test/CodeGen/X86/tailcc-stackalign.ll create mode 100644 llvm/test/CodeGen/X86/tailcc-structret.ll create mode 100644 llvm/test/CodeGen/X86/tailccbyval.ll create mode 100644 llvm/test/CodeGen/X86/tailccbyval64.ll create mode 100644 llvm/test/CodeGen/X86/tailccfp.ll create mode 100644 llvm/test/CodeGen/X86/tailccfp2.ll create mode 100644 llvm/test/CodeGen/X86/tailccpic1.ll create mode 100644 llvm/test/CodeGen/X86/tailccpic2.ll create mode 100644 llvm/test/CodeGen/X86/tailccstack64.ll create mode 100644 llvm/test/CodeGen/X86/taildup-heapallocsite.ll create mode 100644 llvm/test/CodeGen/X86/typeid-alias.ll create mode 100644 llvm/test/CodeGen/X86/uadd_sat_plus.ll create mode 100644 llvm/test/CodeGen/X86/udiv_fix.ll create mode 100644 llvm/test/CodeGen/X86/umul_fix_sat.ll create mode 100644 llvm/test/CodeGen/X86/urem-lkk.ll create mode 100644 llvm/test/CodeGen/X86/urem-seteq-nonzero.ll create mode 100644 llvm/test/CodeGen/X86/urem-seteq-vec-nonzero.ll create mode 100644 llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll create mode 100644 llvm/test/CodeGen/X86/urem-vector-lkk.ll create mode 100644 llvm/test/CodeGen/X86/use-cr-result-of-dom-icmp-st.ll create mode 100644 llvm/test/CodeGen/X86/usub_sat_plus.ll create mode 100644 llvm/test/CodeGen/X86/vec-strict-128.ll create mode 100644 llvm/test/CodeGen/X86/vec-strict-256.ll create mode 100644 llvm/test/CodeGen/X86/vec-strict-512.ll create mode 100644 llvm/test/CodeGen/X86/vec-strict-cmp-128.ll create mode 100644 llvm/test/CodeGen/X86/vec-strict-cmp-256.ll create mode 100644 llvm/test/CodeGen/X86/vec-strict-cmp-512.ll create mode 100644 llvm/test/CodeGen/X86/vec-strict-cmp-sub128.ll create mode 100644 llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll create mode 100644 llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll create mode 100644 llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll create mode 100644 llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll create mode 100644 llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll create mode 100644 llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll create mode 100644 llvm/test/CodeGen/X86/vec-strict-round-128.ll create mode 100644 llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics-flags.ll create mode 100644 llvm/test/CodeGen/X86/vector-lzcnt-sub128.ll create mode 100644 llvm/test/CodeGen/X86/vector-mulfix-legalize.ll create mode 100644 llvm/test/CodeGen/X86/vshli-simplify-demanded-bits.ll create mode 100644 llvm/test/CodeGen/X86/win64-eh-empty-block.ll create mode 100644 llvm/test/CodeGen/X86/win64-funclet-savexmm.ll create mode 100644 llvm/test/CodeGen/X86/win64-stackprobe-overflow.ll create mode 100644 llvm/test/DebugInfo/AArch64/call-site-info-output.ll create mode 100644 llvm/test/DebugInfo/ARM/call-site-info-output.ll create mode 100644 llvm/test/DebugInfo/ARM/entry-value-multi-byte-expr.ll create mode 100644 llvm/test/DebugInfo/COFF/line-zero.ll create mode 100644 llvm/test/DebugInfo/Generic/export-symbol-anonymous-class.ll create mode 100644 llvm/test/DebugInfo/RISCV/relax-debug-frame.ll create mode 100644 llvm/test/DebugInfo/Sparc/entry-value-complex-reg-expr.ll create mode 100644 llvm/test/DebugInfo/WebAssembly/dbg-value-dwarfdump.ll create mode 100644 llvm/test/DebugInfo/WebAssembly/dbg-value-ti.ll create mode 100644 llvm/test/DebugInfo/X86/DW_AT_deleted.ll create mode 100644 llvm/test/DebugInfo/X86/codegenprep-addrsink.ll create mode 100644 llvm/test/DebugInfo/X86/dbg-value-dropped-instcombine.ll create mode 100644 llvm/test/DebugInfo/X86/dbgcall-site-64-bit-imms.ll create mode 100644 llvm/test/DebugInfo/X86/dbgcall-site-zero-valued-imms.ll create mode 100644 llvm/test/DebugInfo/X86/debug-info-template-align.ll create mode 100644 llvm/test/DebugInfo/X86/debug-macinfo-split-dwarf.ll create mode 100644 llvm/test/DebugInfo/X86/gmlt-empty-base-address.ll create mode 100644 llvm/test/DebugInfo/X86/live-debug-values-expr-conflict.ll create mode 100644 llvm/test/DebugInfo/X86/live-debug-values-remove-range.ll create mode 100644 llvm/test/DebugInfo/X86/objc_direct.ll create mode 100644 llvm/test/DebugInfo/X86/sdag-transfer-dbgvalue.ll create mode 100644 llvm/test/DebugInfo/X86/sroa-after-inlining.ll create mode 100644 llvm/test/DebugInfo/salvage-cast-debug-info.ll create mode 100644 llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg-blockaddress.ll create mode 100644 llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg1.ll create mode 100644 llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg2-dead-block-order.ll create mode 100644 llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg3-phis.ll create mode 100644 llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg4-multiple-duplicate-cfg-updates.ll create mode 100644 llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg5-del-phis-for-dead-block.ll create mode 100644 llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg6-dead-self-loop.ll create mode 100644 llvm/test/ExecutionEngine/OrcLazy/emulated-tls.ll create mode 100644 llvm/test/ExecutionEngine/OrcLazy/printargv.ll create mode 100644 llvm/test/ExecutionEngine/OrcLazy/static-library-support.ll create mode 100644 llvm/test/Feature/load_extension.ll create mode 100644 llvm/test/Instrumentation/AddressSanitizer/debug-info-alloca.ll create mode 100644 llvm/test/Instrumentation/AddressSanitizer/global_addrspace.ll create mode 100644 llvm/test/Instrumentation/AddressSanitizer/version-mismatch-check.ll create mode 100644 llvm/test/Instrumentation/HWAddressSanitizer/alloca-compat.ll create mode 100644 llvm/test/Instrumentation/HWAddressSanitizer/basic-compat.ll create mode 100644 llvm/test/Instrumentation/HWAddressSanitizer/dbg-value-tag-offset.ll create mode 100644 llvm/test/Instrumentation/HWAddressSanitizer/globals.ll create mode 100644 llvm/test/Instrumentation/HWAddressSanitizer/personality.ll create mode 100644 llvm/test/Instrumentation/MemorySanitizer/attributes.ll create mode 100644 llvm/test/Instrumentation/MemorySanitizer/clmul.ll create mode 100644 llvm/test/Instrumentation/MemorySanitizer/msan_llvm_launder_invariant.ll create mode 100644 llvm/test/Instrumentation/MemorySanitizer/msan_llvm_strip_invariant.ll create mode 100644 llvm/test/LTO/ARM/lto-linking-metadata.ll create mode 100644 llvm/test/LTO/Resolution/X86/Inputs/ifunc2.ll create mode 100644 llvm/test/LTO/Resolution/X86/ifunc2.ll create mode 100644 llvm/test/LTO/Resolution/X86/not-prevailing-weak-aliasee.ll create mode 100644 llvm/test/LTO/X86/Inputs/start-lib1.ll create mode 100644 llvm/test/LTO/X86/Inputs/start-lib2.ll create mode 100644 llvm/test/LTO/X86/Inputs/type-mapping-bug3.ll create mode 100644 llvm/test/LTO/X86/embed-bitcode.ll create mode 100644 llvm/test/LTO/X86/type-mapping-bug3.ll create mode 100644 llvm/test/Linker/Inputs/module-max-warn.ll create mode 100644 llvm/test/Linker/addrspace.ll create mode 100644 llvm/test/Linker/module-max-warn.ll create mode 100644 llvm/test/MC/ELF/section-relro.ll create mode 100644 llvm/test/MC/WebAssembly/data-symbol-in-text-section.ll create mode 100644 llvm/test/Other/module-pass-printer.ll create mode 100644 llvm/test/Other/new-pm-pgo-O0.ll create mode 100644 llvm/test/Other/new-pm-pr42726-cgscc.ll create mode 100644 llvm/test/Other/print-slotindexes.ll create mode 100644 llvm/test/Other/scalable-vectors-core-ir.ll create mode 100644 llvm/test/Other/unroll-sroa.ll create mode 100644 llvm/test/Reduce/remove-args.ll create mode 100644 llvm/test/Reduce/remove-bbs.ll create mode 100644 llvm/test/Reduce/remove-funcs.ll create mode 100644 llvm/test/Reduce/remove-global-vars.ll create mode 100644 llvm/test/Reduce/remove-instructions.ll create mode 100644 llvm/test/Reduce/remove-metadata.ll create mode 100644 llvm/test/ThinLTO/X86/Inputs/devirt2.ll create mode 100644 llvm/test/ThinLTO/X86/Inputs/devirt_alias.ll create mode 100644 llvm/test/ThinLTO/X86/Inputs/devirt_available_externally.ll create mode 100644 llvm/test/ThinLTO/X86/Inputs/devirt_external_comdat_same_guid.ll create mode 100644 llvm/test/ThinLTO/X86/Inputs/devirt_local_same_guid.ll create mode 100644 llvm/test/ThinLTO/X86/Inputs/devirt_promote.ll create mode 100644 llvm/test/ThinLTO/X86/Inputs/devirt_single_hybrid_bar.ll create mode 100644 llvm/test/ThinLTO/X86/Inputs/devirt_single_hybrid_foo.ll create mode 100644 llvm/test/ThinLTO/X86/Inputs/funcimport_alwaysinline.ll create mode 100644 llvm/test/ThinLTO/X86/Inputs/guid_collision.ll create mode 100644 llvm/test/ThinLTO/X86/Inputs/internalize.ll create mode 100644 llvm/test/ThinLTO/X86/Inputs/thinlto-internalize-doublepromoted.ll create mode 100644 llvm/test/ThinLTO/X86/Inputs/writeonly-with-refs.ll create mode 100644 llvm/test/ThinLTO/X86/devirt2.ll create mode 100644 llvm/test/ThinLTO/X86/devirt_alias.ll create mode 100644 llvm/test/ThinLTO/X86/devirt_available_externally.ll create mode 100644 llvm/test/ThinLTO/X86/devirt_external_comdat_same_guid.ll create mode 100644 llvm/test/ThinLTO/X86/devirt_local_same_guid.ll create mode 100644 llvm/test/ThinLTO/X86/devirt_promote.ll create mode 100644 llvm/test/ThinLTO/X86/devirt_promote_legacy.ll create mode 100644 llvm/test/ThinLTO/X86/devirt_single_hybrid.ll create mode 100644 llvm/test/ThinLTO/X86/funcimport-stats.ll create mode 100644 llvm/test/ThinLTO/X86/funcimport_alwaysinline.ll create mode 100644 llvm/test/ThinLTO/X86/guid_collision.ll create mode 100644 llvm/test/ThinLTO/X86/nodevirt-nonpromoted-typeid.ll create mode 100644 llvm/test/ThinLTO/X86/not-internalized.ll create mode 100644 llvm/test/ThinLTO/X86/printer.ll create mode 100644 llvm/test/ThinLTO/X86/thinlto-internalize-doublepromoted.ll create mode 100644 llvm/test/ThinLTO/X86/writeonly-with-refs.ll create mode 100644 llvm/test/Transforms/AggressiveInstCombine/popcount.ll create mode 100644 llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll create mode 100644 llvm/test/Transforms/Attributor/ArgumentPromotion/2008-07-02-array-indexing.ll create mode 100644 llvm/test/Transforms/Attributor/ArgumentPromotion/2008-09-07-CGUpdate.ll create mode 100644 llvm/test/Transforms/Attributor/ArgumentPromotion/2008-09-08-CGUpdateSelfEdge.ll create mode 100644 llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll create mode 100644 llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll create mode 100644 llvm/test/Transforms/Attributor/ArgumentPromotion/X86/thiscall.ll create mode 100644 llvm/test/Transforms/Attributor/ArgumentPromotion/aggregate-promote.ll create mode 100644 llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll create mode 100644 llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll create mode 100644 llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll create mode 100644 llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll create mode 100644 llvm/test/Transforms/Attributor/ArgumentPromotion/chained.ll create mode 100644 llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow.ll create mode 100644 llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll create mode 100644 llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll create mode 100644 llvm/test/Transforms/Attributor/ArgumentPromotion/dbg.ll create mode 100644 llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll create mode 100644 llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll create mode 100644 llvm/test/Transforms/Attributor/ArgumentPromotion/invalidation.ll create mode 100644 llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll create mode 100644 llvm/test/Transforms/Attributor/ArgumentPromotion/musttail.ll create mode 100644 llvm/test/Transforms/Attributor/ArgumentPromotion/naked_functions.ll create mode 100644 llvm/test/Transforms/Attributor/ArgumentPromotion/nonzero-address-spaces.ll create mode 100644 llvm/test/Transforms/Attributor/ArgumentPromotion/pr27568.ll create mode 100644 llvm/test/Transforms/Attributor/ArgumentPromotion/pr3085.ll create mode 100644 llvm/test/Transforms/Attributor/ArgumentPromotion/pr32917.ll create mode 100644 llvm/test/Transforms/Attributor/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll create mode 100644 llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll create mode 100644 llvm/test/Transforms/Attributor/ArgumentPromotion/reserve-tbaa.ll create mode 100644 llvm/test/Transforms/Attributor/ArgumentPromotion/sret.ll create mode 100644 llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll create mode 100644 llvm/test/Transforms/Attributor/ArgumentPromotion/variadic.ll create mode 100644 llvm/test/Transforms/Attributor/IPConstantProp/2008-06-09-WeakProp.ll create mode 100644 llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll create mode 100644 llvm/test/Transforms/Attributor/IPConstantProp/PR16052.ll create mode 100644 llvm/test/Transforms/Attributor/IPConstantProp/PR26044.ll create mode 100644 llvm/test/Transforms/Attributor/IPConstantProp/PR43857.ll create mode 100644 llvm/test/Transforms/Attributor/IPConstantProp/arg-count-mismatch.ll create mode 100644 llvm/test/Transforms/Attributor/IPConstantProp/arg-type-mismatch.ll create mode 100644 llvm/test/Transforms/Attributor/IPConstantProp/comdat-ipo.ll create mode 100644 llvm/test/Transforms/Attributor/IPConstantProp/dangling-block-address.ll create mode 100644 llvm/test/Transforms/Attributor/IPConstantProp/deadarg.ll create mode 100644 llvm/test/Transforms/Attributor/IPConstantProp/fp-bc-icmp-const-fold.ll create mode 100644 llvm/test/Transforms/Attributor/IPConstantProp/global.ll create mode 100644 llvm/test/Transforms/Attributor/IPConstantProp/multiple_callbacks.ll create mode 100644 llvm/test/Transforms/Attributor/IPConstantProp/musttail-call.ll create mode 100644 llvm/test/Transforms/Attributor/IPConstantProp/naked-return.ll create mode 100644 llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll create mode 100644 llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll create mode 100644 llvm/test/Transforms/Attributor/IPConstantProp/recursion.ll create mode 100644 llvm/test/Transforms/Attributor/IPConstantProp/remove-call-inst.ll create mode 100644 llvm/test/Transforms/Attributor/IPConstantProp/return-argument.ll create mode 100644 llvm/test/Transforms/Attributor/IPConstantProp/return-constant.ll create mode 100644 llvm/test/Transforms/Attributor/IPConstantProp/return-constants.ll create mode 100644 llvm/test/Transforms/Attributor/IPConstantProp/solve-after-each-resolving-undefs-for-function.ll create mode 100644 llvm/test/Transforms/Attributor/IPConstantProp/thread_local_acs.ll create mode 100644 llvm/test/Transforms/Attributor/align.ll create mode 100644 llvm/test/Transforms/Attributor/callbacks.ll create mode 100644 llvm/test/Transforms/Attributor/dereferenceable-1.ll create mode 100644 llvm/test/Transforms/Attributor/dereferenceable-2.ll create mode 100644 llvm/test/Transforms/Attributor/heap_to_stack.ll create mode 100644 llvm/test/Transforms/Attributor/internal-noalias.ll create mode 100644 llvm/test/Transforms/Attributor/liveness.ll create mode 100644 llvm/test/Transforms/Attributor/lvi-after-jumpthreading.ll create mode 100644 llvm/test/Transforms/Attributor/lvi-for-ashr.ll create mode 100644 llvm/test/Transforms/Attributor/misc.ll create mode 100644 llvm/test/Transforms/Attributor/new_attributes.ll create mode 100644 llvm/test/Transforms/Attributor/noalias.ll create mode 100644 llvm/test/Transforms/Attributor/nocapture-1.ll create mode 100644 llvm/test/Transforms/Attributor/nocapture-2.ll create mode 100644 llvm/test/Transforms/Attributor/nofree.ll create mode 100644 llvm/test/Transforms/Attributor/nonnull.ll create mode 100644 llvm/test/Transforms/Attributor/norecurse.ll create mode 100644 llvm/test/Transforms/Attributor/noreturn.ll create mode 100644 llvm/test/Transforms/Attributor/noreturn_async.ll create mode 100644 llvm/test/Transforms/Attributor/noreturn_sync.ll create mode 100644 llvm/test/Transforms/Attributor/nosync.ll create mode 100644 llvm/test/Transforms/Attributor/nounwind.ll create mode 100644 llvm/test/Transforms/Attributor/range.ll create mode 100644 llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll create mode 100644 llvm/test/Transforms/Attributor/readattrs.ll create mode 100644 llvm/test/Transforms/Attributor/returned.ll create mode 100644 llvm/test/Transforms/Attributor/undefined_behavior.ll create mode 100644 llvm/test/Transforms/Attributor/value-simplify.ll create mode 100644 llvm/test/Transforms/Attributor/willreturn.ll create mode 100644 llvm/test/Transforms/BDCE/pr41925.ll create mode 100644 llvm/test/Transforms/BlockExtractor/invalid-line.ll create mode 100644 llvm/test/Transforms/CodeGenPrepare/ARM/sink-add-mul-shufflevector.ll create mode 100644 llvm/test/Transforms/CodeGenPrepare/ARM/sinkchain.ll create mode 100644 llvm/test/Transforms/CodeGenPrepare/PowerPC/split-store-alignment.ll create mode 100644 llvm/test/Transforms/CodeGenPrepare/X86/split-store-alignment.ll create mode 100644 llvm/test/Transforms/ConstProp/fma.ll create mode 100644 llvm/test/Transforms/ConstantHoisting/AArch64/const-hoist-intrinsics.ll create mode 100755 llvm/test/Transforms/ConstantHoisting/AArch64/consthoist-unreachable.ll create mode 100644 llvm/test/Transforms/ConstantHoisting/X86/pr43903-not-all-uses-rebased.ll create mode 100644 llvm/test/Transforms/Coroutines/coro-alloc-with-param.ll create mode 100644 llvm/test/Transforms/Coroutines/coro-param-copy.ll create mode 100644 llvm/test/Transforms/Coroutines/coro-retcon-alloca.ll create mode 100644 llvm/test/Transforms/Coroutines/coro-retcon-once-value.ll create mode 100644 llvm/test/Transforms/Coroutines/coro-retcon-once-value2.ll create mode 100644 llvm/test/Transforms/Coroutines/coro-retcon-resume-values.ll create mode 100644 llvm/test/Transforms/Coroutines/coro-retcon-resume-values2.ll create mode 100644 llvm/test/Transforms/Coroutines/coro-retcon-value.ll create mode 100644 llvm/test/Transforms/Coroutines/coro-retcon.ll create mode 100644 llvm/test/Transforms/Coroutines/coro-split-musttail1.ll create mode 100644 llvm/test/Transforms/Coroutines/coro-swifterror.ll create mode 100644 llvm/test/Transforms/CorrelatedValuePropagation/and.ll create mode 100644 llvm/test/Transforms/CorrelatedValuePropagation/mul.ll create mode 100644 llvm/test/Transforms/CorrelatedValuePropagation/sext.ll create mode 100644 llvm/test/Transforms/CorrelatedValuePropagation/shl.ll create mode 100644 llvm/test/Transforms/DCE/dbg-value-removal.ll create mode 100644 llvm/test/Transforms/DeadStoreElimination/DeleteThrowableInst.ll create mode 100644 llvm/test/Transforms/DeadStoreElimination/libcalls2.ll create mode 100644 llvm/test/Transforms/EarlyCSE/writeonly.ll create mode 100644 llvm/test/Transforms/FunctionAttrs/writeonly.ll create mode 100644 llvm/test/Transforms/GVN/equality-assume.ll create mode 100644 llvm/test/Transforms/GVN/pr42605.ll create mode 100644 llvm/test/Transforms/GVN/preserve-analysis.ll create mode 100644 llvm/test/Transforms/GlobalDCE/virtual-functions-base-call.ll create mode 100644 llvm/test/Transforms/GlobalDCE/virtual-functions-base-pointer-call.ll create mode 100644 llvm/test/Transforms/GlobalDCE/virtual-functions-derived-call.ll create mode 100644 llvm/test/Transforms/GlobalDCE/virtual-functions-derived-pointer-call.ll create mode 100644 llvm/test/Transforms/GlobalDCE/virtual-functions-visibility-post-lto.ll create mode 100644 llvm/test/Transforms/GlobalDCE/virtual-functions-visibility-pre-lto.ll create mode 100644 llvm/test/Transforms/GlobalDCE/virtual-functions.ll create mode 100644 llvm/test/Transforms/GlobalDCE/vtable-rtti.ll create mode 100644 llvm/test/Transforms/GlobalOpt/large-int-crash.ll create mode 100644 llvm/test/Transforms/GlobalOpt/long-compilation-global-sra.ll create mode 100644 llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll create mode 100644 llvm/test/Transforms/HotColdSplit/retain-section.ll create mode 100644 llvm/test/Transforms/IPConstantProp/PR43857.ll create mode 100644 llvm/test/Transforms/IndVarSimplify/eliminate-exit-no-dl.ll create mode 100644 llvm/test/Transforms/IndVarSimplify/indvar-debug-value.ll create mode 100644 llvm/test/Transforms/IndVarSimplify/indvar-debug-value2.ll create mode 100644 llvm/test/Transforms/IndVarSimplify/loop-predication.ll create mode 100644 llvm/test/Transforms/IndVarSimplify/rlev-add-me.ll create mode 100644 llvm/test/Transforms/InferAddressSpaces/AMDGPU/address-space-id-funcs.ll create mode 100644 llvm/test/Transforms/Inline/X86/switch.ll create mode 100644 llvm/test/Transforms/Inline/inline-indirect-chain.ll create mode 100644 llvm/test/Transforms/Inline/inline-skip-use-empty-alloca.ll create mode 100644 llvm/test/Transforms/Inline/no-inline-line-tables.ll create mode 100644 llvm/test/Transforms/InstCombine/AMDGPU/tan.ll create mode 100644 llvm/test/Transforms/InstCombine/ARM/mve-v2i2v.ll create mode 100644 llvm/test/Transforms/InstCombine/X86/2009-03-23-i80-fp80.ll create mode 100644 llvm/test/Transforms/InstCombine/bcopy.ll create mode 100644 llvm/test/Transforms/InstCombine/bitcast-function.ll create mode 100644 llvm/test/Transforms/InstCombine/bitcast-phi-uselistorder.ll create mode 100644 llvm/test/Transforms/InstCombine/builtin-object-size-custom-dl.ll create mode 100644 llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-between-negative-and-positive-thresholds.ll create mode 100644 llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-between-zero-and-positive-threshold.ll create mode 100644 llvm/test/Transforms/InstCombine/canonicalize-clamp-with-select-of-constant-threshold-pattern.ll create mode 100644 llvm/test/Transforms/InstCombine/conditional-variable-length-signext-after-high-bit-extract.ll create mode 100644 llvm/test/Transforms/InstCombine/deref-alloc-fns.ll create mode 100644 llvm/test/Transforms/InstCombine/disable-builtin.ll create mode 100644 llvm/test/Transforms/InstCombine/do-not-clone-dbg-declare.ll create mode 100644 llvm/test/Transforms/InstCombine/expensive-combines.ll create mode 100644 llvm/test/Transforms/InstCombine/fabs-copysign.ll create mode 100644 llvm/test/Transforms/InstCombine/fptrunc.ll create mode 100644 llvm/test/Transforms/InstCombine/freeze.ll create mode 100644 llvm/test/Transforms/InstCombine/gep-alias.ll create mode 100644 llvm/test/Transforms/InstCombine/gep-inbounds-null.ll create mode 100644 llvm/test/Transforms/InstCombine/high-bit-signmask-with-trunc.ll create mode 100644 llvm/test/Transforms/InstCombine/high-bit-signmask.ll create mode 100644 llvm/test/Transforms/InstCombine/hoist-negation-out-of-bias-calculation-with-constant.ll create mode 100644 llvm/test/Transforms/InstCombine/hoist-negation-out-of-bias-calculation.ll create mode 100644 llvm/test/Transforms/InstCombine/intptr8.ll create mode 100644 llvm/test/Transforms/InstCombine/lifetime-sanitizer.ll create mode 100644 llvm/test/Transforms/InstCombine/limit-max-iterations.ll create mode 100644 llvm/test/Transforms/InstCombine/load-insert-store.ll create mode 100644 llvm/test/Transforms/InstCombine/mem-deref-bytes-addrspaces.ll create mode 100644 llvm/test/Transforms/InstCombine/mem-deref-bytes.ll create mode 100644 llvm/test/Transforms/InstCombine/memccpy.ll create mode 100644 llvm/test/Transforms/InstCombine/mempcpy.ll create mode 100644 llvm/test/Transforms/InstCombine/memrchr.ll create mode 100644 llvm/test/Transforms/InstCombine/minmax-of-minmax.ll create mode 100644 llvm/test/Transforms/InstCombine/overflow_to_sat.ll create mode 100644 llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-a.ll create mode 100644 llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-b.ll create mode 100644 llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-c.ll create mode 100644 llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll create mode 100644 llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-e.ll create mode 100644 llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-a.ll create mode 100644 llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll create mode 100644 llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll create mode 100644 llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll create mode 100644 llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-e.ll create mode 100644 llvm/test/Transforms/InstCombine/phi-equal-incoming-pointers.ll create mode 100644 llvm/test/Transforms/InstCombine/phi-known-bits-operand-order.ll create mode 100644 llvm/test/Transforms/InstCombine/pr43081.ll create mode 100644 llvm/test/Transforms/InstCombine/pr43376-getFlippedStrictnessPredicateAndConstant-assert.ll create mode 100644 llvm/test/Transforms/InstCombine/pr43893.ll create mode 100644 llvm/test/Transforms/InstCombine/pr44242.ll create mode 100644 llvm/test/Transforms/InstCombine/pr44245.ll create mode 100644 llvm/test/Transforms/InstCombine/pr44541.ll create mode 100644 llvm/test/Transforms/InstCombine/pr44552.ll create mode 100644 llvm/test/Transforms/InstCombine/pr44835.ll create mode 100644 llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-a.ll create mode 100644 llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-b.ll create mode 100644 llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-c.ll create mode 100644 llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-d.ll create mode 100644 llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-e.ll create mode 100644 llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-f.ll create mode 100644 llvm/test/Transforms/InstCombine/result-of-add-of-negative-is-non-zero-and-no-underflow.ll create mode 100644 llvm/test/Transforms/InstCombine/result-of-add-of-negative-or-zero-is-non-zero-and-no-underflow.ll create mode 100644 llvm/test/Transforms/InstCombine/result-of-usub-is-non-zero-and-no-overflow.ll create mode 100644 llvm/test/Transforms/InstCombine/reuse-constant-from-select-in-icmp.ll create mode 100644 llvm/test/Transforms/InstCombine/sadd_sat.ll create mode 100644 llvm/test/Transforms/InstCombine/select-ctlz-to-cttz.ll create mode 100644 llvm/test/Transforms/InstCombine/select-imm-canon.ll create mode 100644 llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-lshr.ll create mode 100644 llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-shl.ll create mode 100644 llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-ashr.ll create mode 100644 llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-lshr.ll create mode 100644 llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll create mode 100644 llvm/test/Transforms/InstCombine/shift-by-signext.ll create mode 100644 llvm/test/Transforms/InstCombine/shift-logic.ll create mode 100644 llvm/test/Transforms/InstCombine/shufflevector-div-rem.ll create mode 100644 llvm/test/Transforms/InstCombine/sign-bit-test-via-right-shifting-all-other-bits.ll create mode 100644 llvm/test/Transforms/InstCombine/srem-via-sdiv-mul-sub.ll create mode 100644 llvm/test/Transforms/InstCombine/stdio-custom-dl.ll create mode 100644 llvm/test/Transforms/InstCombine/strict-sub-underflow-check-to-comparison-of-sub-operands.ll create mode 100644 llvm/test/Transforms/InstCombine/strndup.ll create mode 100644 llvm/test/Transforms/InstCombine/sub-and-or-neg-xor.ll create mode 100644 llvm/test/Transforms/InstCombine/sub-ashr-and-to-icmp-select.ll create mode 100644 llvm/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll create mode 100644 llvm/test/Transforms/InstCombine/sub-gep.ll create mode 100644 llvm/test/Transforms/InstCombine/sub-of-negatible.ll create mode 100644 llvm/test/Transforms/InstCombine/sub-or-and-xor.ll create mode 100644 llvm/test/Transforms/InstCombine/sub-xor-or-neg-and.ll create mode 100644 llvm/test/Transforms/InstCombine/subtract-from-one-hand-of-select.ll create mode 100644 llvm/test/Transforms/InstCombine/subtract-of-one-hand-of-select.ll create mode 100644 llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check-via-add.ll create mode 100644 llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check-via-xor.ll create mode 100644 llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check.ll create mode 100644 llvm/test/Transforms/InstCombine/unsigned-add-overflow-check-via-add.ll create mode 100644 llvm/test/Transforms/InstCombine/unsigned-add-overflow-check-via-xor.ll create mode 100644 llvm/test/Transforms/InstCombine/unsigned-add-overflow-check.ll create mode 100644 llvm/test/Transforms/InstCombine/unsigned-mul-lack-of-overflow-check-via-mul-udiv.ll create mode 100644 llvm/test/Transforms/InstCombine/unsigned-mul-lack-of-overflow-check-via-udiv-of-allones.ll create mode 100644 llvm/test/Transforms/InstCombine/unsigned-mul-overflow-check-via-mul-udiv.ll create mode 100644 llvm/test/Transforms/InstCombine/unsigned-mul-overflow-check-via-udiv-of-allones.ll create mode 100644 llvm/test/Transforms/InstCombine/unsigned-sub-lack-of-overflow-check.ll create mode 100644 llvm/test/Transforms/InstCombine/unsigned-sub-overflow-check.ll create mode 100644 llvm/test/Transforms/InstCombine/unused-nonnull.ll create mode 100644 llvm/test/Transforms/InstCombine/urem-via-udiv-mul-sub.ll create mode 100644 llvm/test/Transforms/InstCombine/variable-signext-of-variable-high-bit-extraction.ll create mode 100644 llvm/test/Transforms/InstCombine/vec_udiv_to_shift.ll create mode 100644 llvm/test/Transforms/InstCombine/widenable-conditions.ll create mode 100644 llvm/test/Transforms/InstCombine/xor-of-icmps-with-extra-uses.ll create mode 100644 llvm/test/Transforms/InstMerge/st_sink_split_bb.ll create mode 100644 llvm/test/Transforms/InstSimplify/assume-non-zero.ll create mode 100644 llvm/test/Transforms/InstSimplify/div-by-0-guard-before-smul_ov-not.ll create mode 100644 llvm/test/Transforms/InstSimplify/div-by-0-guard-before-smul_ov.ll create mode 100644 llvm/test/Transforms/InstSimplify/div-by-0-guard-before-umul_ov-not.ll create mode 100644 llvm/test/Transforms/InstSimplify/div-by-0-guard-before-umul_ov.ll create mode 100644 llvm/test/Transforms/InstSimplify/freeze.ll create mode 100644 llvm/test/Transforms/InstSimplify/redundant-null-check-in-uadd_with_overflow-of-nonnull-ptr.ll create mode 100755 llvm/test/Transforms/InstSimplify/remove-dead-call.ll create mode 100644 llvm/test/Transforms/InstSimplify/result-of-add-of-negative-is-non-zero-and-no-underflow.ll create mode 100644 llvm/test/Transforms/InstSimplify/result-of-usub-by-nonzero-is-non-zero-and-no-overflow.ll create mode 100644 llvm/test/Transforms/InstSimplify/result-of-usub-is-non-zero-and-no-overflow.ll create mode 100644 llvm/test/Transforms/InstSimplify/unsigned-range-checks.ll create mode 100644 llvm/test/Transforms/Internalize/vcall-visibility.ll create mode 100644 llvm/test/Transforms/JumpThreading/unreachable-loops.ll create mode 100644 llvm/test/Transforms/LCSSA/pr44058.ll create mode 100644 llvm/test/Transforms/LICM/AMDGPU/bitcast.ll create mode 100644 llvm/test/Transforms/LICM/callbr-crash.ll create mode 100644 llvm/test/Transforms/LICM/pr38513.ll create mode 100644 llvm/test/Transforms/LICM/pr42969.ll create mode 100644 llvm/test/Transforms/LICM/pragma-licm-disable.ll create mode 100644 llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/vect-ptr-ptr-size-mismatch.ll create mode 100644 llvm/test/Transforms/LoopFusion/diagnostics_analysis.ll create mode 100644 llvm/test/Transforms/LoopFusion/diagnostics_missed.ll create mode 100644 llvm/test/Transforms/LoopFusion/guarded.ll create mode 100644 llvm/test/Transforms/LoopInterchange/pr43176-move-to-new-latch.ll create mode 100644 llvm/test/Transforms/LoopInterchange/pr43473-invalid-lcssa-phis-in-inner-exit.ll create mode 100644 llvm/test/Transforms/LoopInterchange/pr43797-lcssa-for-multiple-outer-loop-blocks.ll create mode 100644 llvm/test/Transforms/LoopInterchange/update-condbranch-duplicate-successors.ll create mode 100644 llvm/test/Transforms/LoopPredication/predicate-exits.ll create mode 100644 llvm/test/Transforms/LoopRotate/dbg-value-duplicates-2.ll create mode 100644 llvm/test/Transforms/LoopRotate/switch.ll create mode 100644 llvm/test/Transforms/LoopUnroll/ARM/dont-unroll-loopdec.ll create mode 100644 llvm/test/Transforms/LoopUnroll/ARM/mve-nounroll.ll create mode 100644 llvm/test/Transforms/LoopUnroll/disable-full-unroll-by-opt.ll create mode 100644 llvm/test/Transforms/LoopUnroll/optsize-loop-size.ll create mode 100644 llvm/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-1.ll create mode 100644 llvm/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-2.ll create mode 100644 llvm/test/Transforms/LoopUnroll/peel-loop-inner.ll create mode 100644 llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt-idom-2.ll create mode 100644 llvm/test/Transforms/LoopUnroll/runtime-small-upperbound.ll create mode 100644 llvm/test/Transforms/LoopUnroll/unroll-preserve-scev-lcssa.ll create mode 100644 llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll create mode 100644 llvm/test/Transforms/LoopVectorize/ARM/mve-maskedldst.ll create mode 100644 llvm/test/Transforms/LoopVectorize/ARM/mve-shiftcost.ll create mode 100644 llvm/test/Transforms/LoopVectorize/ARM/mve-vldn.ll create mode 100644 llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll create mode 100644 llvm/test/Transforms/LoopVectorize/ARM/tail-folding-counting-down.ll create mode 100644 llvm/test/Transforms/LoopVectorize/ARM/tail-loop-folding.ll create mode 100644 llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll create mode 100644 llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-bswap.ll create mode 100644 llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll create mode 100644 llvm/test/Transforms/LoopVectorize/X86/cost-model-assert.ll create mode 100644 llvm/test/Transforms/LoopVectorize/X86/interleave_short_tc.ll create mode 100644 llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-waw-dependency.ll create mode 100644 llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll create mode 100644 llvm/test/Transforms/LoopVectorize/X86/pr42674.ll create mode 100644 llvm/test/Transforms/LoopVectorize/X86/tail_folding_and_assume_safety.ll create mode 100644 llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll create mode 100644 llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll create mode 100644 llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll create mode 100644 llvm/test/Transforms/LoopVectorize/interleaved-accesses-uniform-load.ll create mode 100644 llvm/test/Transforms/LoopVectorize/nofloat-report.ll create mode 100644 llvm/test/Transforms/LoopVectorize/nuw.ll create mode 100644 llvm/test/Transforms/LoopVectorize/pr44488-predication.ll create mode 100644 llvm/test/Transforms/LoopVectorize/tail-folding-counting-down.ll create mode 100644 llvm/test/Transforms/LowerConstantIntrinsics/constant-intrinsics.ll create mode 100644 llvm/test/Transforms/LowerConstantIntrinsics/crash-on-large-allocas.ll create mode 100644 llvm/test/Transforms/LowerConstantIntrinsics/objectsize_basic.ll create mode 100644 llvm/test/Transforms/LowerMatrixIntrinsics/bigger-expressions-double.ll create mode 100644 llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction-fmf.ll create mode 100644 llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction.ll create mode 100644 llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double.ll create mode 100644 llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction-fmf.ll create mode 100644 llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction.ll create mode 100644 llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float.ll create mode 100644 llvm/test/Transforms/LowerMatrixIntrinsics/multiply-i32.ll create mode 100644 llvm/test/Transforms/LowerMatrixIntrinsics/propagate-backward.ll create mode 100644 llvm/test/Transforms/LowerMatrixIntrinsics/propagate-backwards-unsupported.ll create mode 100644 llvm/test/Transforms/LowerMatrixIntrinsics/propagate-forward.ll create mode 100644 llvm/test/Transforms/LowerMatrixIntrinsics/propagate-mixed-users.ll create mode 100644 llvm/test/Transforms/LowerMatrixIntrinsics/propagate-multiple-iterations.ll create mode 100644 llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-double.ll create mode 100644 llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-float.ll create mode 100644 llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-i32.ll create mode 100644 llvm/test/Transforms/LowerMatrixIntrinsics/strided-store-double.ll create mode 100644 llvm/test/Transforms/LowerMatrixIntrinsics/strided-store-float.ll create mode 100644 llvm/test/Transforms/LowerMatrixIntrinsics/strided-store-i32.ll create mode 100644 llvm/test/Transforms/LowerMatrixIntrinsics/transpose-double.ll create mode 100644 llvm/test/Transforms/LowerMatrixIntrinsics/transpose-float.ll create mode 100644 llvm/test/Transforms/LowerMatrixIntrinsics/transpose-i32.ll create mode 100644 llvm/test/Transforms/LowerTypeTests/align.ll create mode 100644 llvm/test/Transforms/LowerTypeTests/export-rename-local.ll create mode 100644 llvm/test/Transforms/MemCpyOpt/aggregate-type-crash.ll create mode 100644 llvm/test/Transforms/MemCpyOpt/store-to-memset.ll create mode 100644 llvm/test/Transforms/MergeFunc/byval-attr-congruent-type.ll create mode 100644 llvm/test/Transforms/NewGVN/pr43441.ll create mode 100644 llvm/test/Transforms/ObjCARC/code-motion.ll create mode 100644 llvm/test/Transforms/ObjCARC/inlined-autorelease-return-value.ll create mode 100644 llvm/test/Transforms/PGOProfile/misexpect-branch-correct.ll create mode 100644 llvm/test/Transforms/PGOProfile/misexpect-branch-stripped.ll create mode 100644 llvm/test/Transforms/PGOProfile/misexpect-branch-unpredictable.ll create mode 100644 llvm/test/Transforms/PGOProfile/misexpect-branch.ll create mode 100644 llvm/test/Transforms/PGOProfile/misexpect-switch-default.ll create mode 100644 llvm/test/Transforms/PGOProfile/misexpect-switch.ll create mode 100644 llvm/test/Transforms/PhaseOrdering/lifetime-sanitizer.ll create mode 100644 llvm/test/Transforms/PhaseOrdering/min-max-abs-cse.ll create mode 100644 llvm/test/Transforms/PhaseOrdering/two-shifts-by-sext.ll create mode 100644 llvm/test/Transforms/PhaseOrdering/unsigned-multiply-overflow-check.ll create mode 100644 llvm/test/Transforms/Reassociate/2019-08-22-FNegAssert.ll create mode 100644 llvm/test/Transforms/Reassociate/reassociate_salvages_debug_info.ll create mode 100644 llvm/test/Transforms/Reassociate/undef_intrinsics_when_deleting_instructions.ll create mode 100644 llvm/test/Transforms/SCCP/ipsccp-predinfo-order.ll create mode 100644 llvm/test/Transforms/SCCP/struct-arg-resolve-undefs.ll create mode 100644 llvm/test/Transforms/SCCP/ubsan_overflow.ll create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/PR31847.ll create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/different-vec-widths.ll create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/reuse-extracts-in-wider-vect.ll create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll create mode 100644 llvm/test/Transforms/SROA/tbaa-struct.ll create mode 100644 llvm/test/Transforms/SROA/vector-promotion-different-size.ll create mode 100644 llvm/test/Transforms/SampleProfile/Inputs/profile-symbol-list.ll create mode 100644 llvm/test/Transforms/SampleProfile/compressed-profile-symbol-list.ll create mode 100644 llvm/test/Transforms/SampleProfile/inline-callee-update.ll create mode 100644 llvm/test/Transforms/SampleProfile/inline-cold.ll create mode 100644 llvm/test/Transforms/SampleProfile/inline-mergeprof.ll create mode 100644 llvm/test/Transforms/SampleProfile/inline-stats.ll create mode 100644 llvm/test/Transforms/SampleProfile/inline-topdown.ll create mode 100644 llvm/test/Transforms/SampleProfile/profile-format-compress.ll create mode 100644 llvm/test/Transforms/SampleProfile/profile-format.ll create mode 100644 llvm/test/Transforms/SampleProfile/profile-sample-accurate.ll create mode 100644 llvm/test/Transforms/SampleProfile/uncompressed-profile-symbol-list.ll create mode 100644 llvm/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-scatter.ll create mode 100644 llvm/test/Transforms/Scalarizer/phi-unreachable-pred.ll create mode 100644 llvm/test/Transforms/SimpleLoopUnswitch/preserve-scev-exiting-multiple-loops.ll create mode 100644 llvm/test/Transforms/SimplifyCFG/X86/merge-cond-stores-cost.ll create mode 100644 llvm/test/Transforms/SimplifyCFG/invalidate-dom.ll create mode 100644 llvm/test/Transforms/SimplifyCFG/safe-abs.ll create mode 100644 llvm/test/Transforms/SimplifyCFG/safe-low-bit-extract.ll create mode 100644 llvm/test/Transforms/SimplifyCFG/signbit-like-value-extension.ll create mode 100644 llvm/test/Transforms/SimplifyCFG/switch_msan.ll create mode 100644 llvm/test/Transforms/SimplifyCFG/unsigned-multiplication-will-overflow.ll create mode 100644 llvm/test/Transforms/SimplifyCFG/wc-widen-block.ll create mode 100644 llvm/test/Transforms/ThinLTOBitcodeWriter/associated.ll create mode 100644 llvm/test/Transforms/ThinLTOBitcodeWriter/cfi-functions-canonical-jump-tables.ll create mode 100644 llvm/test/Transforms/TypePromotion/ARM/calls.ll create mode 100644 llvm/test/Transforms/TypePromotion/ARM/casts.ll create mode 100644 llvm/test/Transforms/TypePromotion/ARM/clear-structures.ll create mode 100644 llvm/test/Transforms/TypePromotion/ARM/icmps.ll create mode 100644 llvm/test/Transforms/TypePromotion/ARM/large-int.ll create mode 100644 llvm/test/Transforms/TypePromotion/ARM/phis-ret.ll create mode 100644 llvm/test/Transforms/TypePromotion/ARM/pointers.ll create mode 100644 llvm/test/Transforms/TypePromotion/ARM/signed-icmps.ll create mode 100644 llvm/test/Transforms/TypePromotion/ARM/signed.ll create mode 100644 llvm/test/Transforms/TypePromotion/ARM/switch.ll create mode 100644 llvm/test/Transforms/TypePromotion/ARM/wrapping.ll create mode 100644 llvm/test/Transforms/Util/add-TLI-mappings.ll create mode 100644 llvm/test/Transforms/Util/dbg-call-bitcast.ll create mode 100644 llvm/test/Verifier/callbr.ll create mode 100644 llvm/test/Verifier/dereferenceable-md-inttoptr.ll create mode 100644 llvm/test/Verifier/diexpression-dwarf-entry-value.ll create mode 100644 llvm/test/Verifier/diexpression-entry-value-llvm-ir.ll create mode 100644 llvm/test/Verifier/intrinsic-bad-arg-type.ll create mode 100644 llvm/test/Verifier/invalid-frame-pointer-attr-empty.ll create mode 100644 llvm/test/Verifier/invalid-frame-pointer-attr-no-value.ll create mode 100644 llvm/test/Verifier/invalid-frame-pointer-attr.ll create mode 100644 llvm/test/Verifier/invalid-patchable-function-entry.ll create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/basic.ll create mode 100644 llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/argument_name_reuse.ll create mode 100644 llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/basic.ll create mode 100644 llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/scrub_attrs.ll create mode 100644 llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/sometimes_deleted_function.ll create mode 100644 llvm/test/tools/gold/X86/slp-vectorize-pm.ll create mode 100644 llvm/test/tools/llvm-dwarfdump/X86/locstats.ll create mode 100644 llvm/test/tools/llvm-dwarfdump/X86/stats-dbg-callsite-info.ll create mode 100644 llvm/test/tools/llvm-dwarfdump/X86/valid-call-site-GNU-extensions.ll create mode 100644 llvm/test/tools/llvm-locstats/locstats.ll create mode 100644 llvm/test/tools/llvm-locstats/no_scope_bytes.ll create mode 100644 llvm/test/tools/llvm-lto2/X86/slp-vectorize-pm.ll create mode 100644 llvm/test/tools/llvm-objdump/X86/macho-data-in-code.ll create mode 100644 llvm/test/tools/llvm-readobj/ELF/Inputs/trivial.ll diff --git a/llvm/Makefile b/llvm/Makefile index 86e5208e9..e9ab6e4af 100644 --- a/llvm/Makefile +++ b/llvm/Makefile @@ -1,5 +1,5 @@ # LLVM version. -VER=9.0.0 +VER=10.0.0 ROOT_DIR=$(shell pwd) @@ -21,7 +21,7 @@ llvm-${VER}.src: | llvm-${VER}.src.tar.xz tar -xJf llvm-${VER}.src.tar.xz llvm-${VER}.src.tar.xz: - wget -O $@ "https://releases.llvm.org/${VER}/llvm-${VER}.src.tar.xz" + wget -O $@ "https://github.com/llvm/llvm-project/releases/download/llvmorg-${VER}/llvm-${VER}.src.tar.xz" touch $@ clean: diff --git a/llvm/test/Analysis/BasicAA/assume-index-positive.ll b/llvm/test/Analysis/BasicAA/assume-index-positive.ll new file mode 100644 index 000000000..d89738a23 --- /dev/null +++ b/llvm/test/Analysis/BasicAA/assume-index-positive.ll @@ -0,0 +1,116 @@ +; RUN: opt -basicaa -aa-eval -print-all-alias-modref-info %s 2>&1 | FileCheck %s + +; %col.ptr.1 and %col.ptr.2 do not alias, if we know that %skip >= 0, because +; the distance between %col.ptr.1 and %col.ptr.2 is %skip + 6 and we load 6 +; elements. +define void @test1(double* %ptr, i32 %skip) { +; CHECK-LABEL: Function: test1: 4 pointers, 1 call sites +; CHECK-NEXT: MustAlias: <6 x double>* %col.ptr.1, double* %ptr +; CHECK-NEXT: NoAlias: double* %col.ptr.2, double* %ptr +; CHECK-NEXT: NoAlias: <6 x double>* %col.ptr.1, double* %col.ptr.2 +; CHECK-NEXT: NoAlias: <6 x double>* %col.ptr.2.cast, double* %ptr +; CHECK-NEXT: NoAlias: <6 x double>* %col.ptr.1, <6 x double>* %col.ptr.2.cast +; CHECK-NEXT: MustAlias: <6 x double>* %col.ptr.2.cast, double* %col.ptr.2 +; CHECK-NEXT: NoModRef: Ptr: double* %ptr <-> call void @llvm.assume(i1 %gt) +; CHECK-NEXT: NoModRef: Ptr: <6 x double>* %col.ptr.1 <-> call void @llvm.assume(i1 %gt) +; CHECK-NEXT: NoModRef: Ptr: double* %col.ptr.2 <-> call void @llvm.assume(i1 %gt) +; CHECK-NEXT: NoModRef: Ptr: <6 x double>* %col.ptr.2.cast <-> call void @llvm.assume(i1 %gt) +; + %gt = icmp sgt i32 %skip, -1 + call void @llvm.assume(i1 %gt) + %stride = add nsw nuw i32 %skip, 6 + %col.ptr.1 = bitcast double* %ptr to <6 x double>* + %lv.1 = load <6 x double>, <6 x double>* %col.ptr.1, align 8 + %col.ptr.2= getelementptr double, double* %ptr, i32 %stride + %col.ptr.2.cast = bitcast double* %col.ptr.2 to <6 x double>* + %lv.2 = load <6 x double>, <6 x double>* %col.ptr.2.cast, align 8 + %res.1 = fadd <6 x double> %lv.1, %lv.1 + %res.2 = fadd <6 x double> %lv.2, %lv.2 + store <6 x double> %res.1, <6 x double>* %col.ptr.1, align 8 + store <6 x double> %res.2, <6 x double>* %col.ptr.2.cast, align 8 + ret void +} + +; Same as @test1, but now we do not have an assume guaranteeing %skip >= 0. +define void @test2(double* %ptr, i32 %skip) { +; CHECK-LABEL: Function: test2: 4 pointers, 0 call sites +; CHECK-NEXT: MustAlias: <6 x double>* %col.ptr.1, double* %ptr +; CHECK-NEXT: MayAlias: double* %col.ptr.2, double* %ptr +; CHECK-NEXT: MayAlias: <6 x double>* %col.ptr.1, double* %col.ptr.2 +; CHECK-NEXT: MayAlias: <6 x double>* %col.ptr.2.cast, double* %ptr +; CHECK-NEXT: MayAlias: <6 x double>* %col.ptr.1, <6 x double>* %col.ptr.2.cast +; CHECK-NEXT: MustAlias: <6 x double>* %col.ptr.2.cast, double* %col.ptr.2 +; + %stride = add nsw nuw i32 %skip, 6 + %col.ptr.1 = bitcast double* %ptr to <6 x double>* + %lv.1 = load <6 x double>, <6 x double>* %col.ptr.1, align 8 + %col.ptr.2= getelementptr double, double* %ptr, i32 %stride + %col.ptr.2.cast = bitcast double* %col.ptr.2 to <6 x double>* + %lv.2 = load <6 x double>, <6 x double>* %col.ptr.2.cast, align 8 + %res.1 = fadd <6 x double> %lv.1, %lv.1 + %res.2 = fadd <6 x double> %lv.2, %lv.2 + store <6 x double> %res.1, <6 x double>* %col.ptr.1, align 8 + store <6 x double> %res.2, <6 x double>* %col.ptr.2.cast, align 8 + ret void +} + +; Same as @test1, but the assume just guarantees %skip > -3, which is not +; enough to derive NoAlias +define void @test3(double* %ptr, i32 %skip) { +; CHECK-LABEL: Function: test3: 4 pointers, 1 call sites +; CHECK-NEXT: MustAlias: <6 x double>* %col.ptr.1, double* %ptr +; CHECK-NEXT: MayAlias: double* %col.ptr.2, double* %ptr +; CHECK-NEXT: MayAlias: <6 x double>* %col.ptr.1, double* %col.ptr.2 +; CHECK-NEXT: MayAlias: <6 x double>* %col.ptr.2.cast, double* %ptr +; CHECK-NEXT: MayAlias: <6 x double>* %col.ptr.1, <6 x double>* %col.ptr.2.cast +; CHECK-NEXT: MustAlias: <6 x double>* %col.ptr.2.cast, double* %col.ptr.2 +; CHECK-NEXT: NoModRef: Ptr: double* %ptr <-> call void @llvm.assume(i1 %gt) +; CHECK-NEXT: NoModRef: Ptr: <6 x double>* %col.ptr.1 <-> call void @llvm.assume(i1 %gt) +; CHECK-NEXT: NoModRef: Ptr: double* %col.ptr.2 <-> call void @llvm.assume(i1 %gt) +; CHECK-NEXT: NoModRef: Ptr: <6 x double>* %col.ptr.2.cast <-> call void @llvm.assume(i1 %gt) +; + %gt = icmp sgt i32 %skip, -3 + call void @llvm.assume(i1 %gt) + %stride = add nsw nuw i32 %skip, 6 + %col.ptr.1 = bitcast double* %ptr to <6 x double>* + %lv.1 = load <6 x double>, <6 x double>* %col.ptr.1, align 8 + %col.ptr.2= getelementptr double, double* %ptr, i32 %stride + %col.ptr.2.cast = bitcast double* %col.ptr.2 to <6 x double>* + %lv.2 = load <6 x double>, <6 x double>* %col.ptr.2.cast, align 8 + %res.1 = fadd <6 x double> %lv.1, %lv.1 + %res.2 = fadd <6 x double> %lv.2, %lv.2 + store <6 x double> %res.1, <6 x double>* %col.ptr.1, align 8 + store <6 x double> %res.2, <6 x double>* %col.ptr.2.cast, align 8 + ret void +} + +; Same as @test1, but the assume uses the sge predicate for %skip >= 0. +define void @test4(double* %ptr, i32 %skip) { +; CHECK-LABEL: Function: test4: 4 pointers, 1 call sites +; CHECK-NEXT: MustAlias: <6 x double>* %col.ptr.1, double* %ptr +; CHECK-NEXT: NoAlias: double* %col.ptr.2, double* %ptr +; CHECK-NEXT: NoAlias: <6 x double>* %col.ptr.1, double* %col.ptr.2 +; CHECK-NEXT: NoAlias: <6 x double>* %col.ptr.2.cast, double* %ptr +; CHECK-NEXT: NoAlias: <6 x double>* %col.ptr.1, <6 x double>* %col.ptr.2.cast +; CHECK-NEXT: MustAlias: <6 x double>* %col.ptr.2.cast, double* %col.ptr.2 +; CHECK-NEXT: NoModRef: Ptr: double* %ptr <-> call void @llvm.assume(i1 %gt) +; CHECK-NEXT: NoModRef: Ptr: <6 x double>* %col.ptr.1 <-> call void @llvm.assume(i1 %gt) +; CHECK-NEXT: NoModRef: Ptr: double* %col.ptr.2 <-> call void @llvm.assume(i1 %gt) +; CHECK-NEXT: NoModRef: Ptr: <6 x double>* %col.ptr.2.cast <-> call void @llvm.assume(i1 %gt) +; + %gt = icmp sge i32 %skip, 0 + call void @llvm.assume(i1 %gt) + %stride = add nsw nuw i32 %skip, 6 + %col.ptr.1 = bitcast double* %ptr to <6 x double>* + %lv.1 = load <6 x double>, <6 x double>* %col.ptr.1, align 8 + %col.ptr.2= getelementptr double, double* %ptr, i32 %stride + %col.ptr.2.cast = bitcast double* %col.ptr.2 to <6 x double>* + %lv.2 = load <6 x double>, <6 x double>* %col.ptr.2.cast, align 8 + %res.1 = fadd <6 x double> %lv.1, %lv.1 + %res.2 = fadd <6 x double> %lv.2, %lv.2 + store <6 x double> %res.1, <6 x double>* %col.ptr.1, align 8 + store <6 x double> %res.2, <6 x double>* %col.ptr.2.cast, align 8 + ret void +} + +declare void @llvm.assume(i1 %cond) diff --git a/llvm/test/Analysis/BasicAA/cs-cs.ll b/llvm/test/Analysis/BasicAA/cs-cs.ll index 98899993d..beb9eaa83 100644 --- a/llvm/test/Analysis/BasicAA/cs-cs.ll +++ b/llvm/test/Analysis/BasicAA/cs-cs.ll @@ -364,26 +364,26 @@ entry: call void @an_argmemonly_func(i8* %q) #9 [ "unknown"() ] ret void -; CHECK: Just Ref: Ptr: i8* %p <-> call void @a_readonly_func(i8* %p) #6 [ "unknown"() ] -; CHECK: Just Ref: Ptr: i8* %q <-> call void @a_readonly_func(i8* %p) #6 [ "unknown"() ] -; CHECK: NoModRef: Ptr: i8* %p <-> call void @an_inaccessiblememonly_func() #7 [ "unknown"() ] -; CHECK: NoModRef: Ptr: i8* %q <-> call void @an_inaccessiblememonly_func() #7 [ "unknown"() ] -; CHECK: NoModRef: Ptr: i8* %p <-> call void @an_inaccessibleorargmemonly_func(i8* %q) #8 [ "unknown"() ] -; CHECK: Both ModRef (MustAlias): Ptr: i8* %q <-> call void @an_inaccessibleorargmemonly_func(i8* %q) #8 [ "unknown"() ] -; CHECK: NoModRef: Ptr: i8* %p <-> call void @an_argmemonly_func(i8* %q) #9 [ "unknown"() ] -; CHECK: Both ModRef (MustAlias): Ptr: i8* %q <-> call void @an_argmemonly_func(i8* %q) #9 [ "unknown"() ] -; CHECK: Just Ref: call void @a_readonly_func(i8* %p) #6 [ "unknown"() ] <-> call void @an_inaccessiblememonly_func() #7 [ "unknown"() ] -; CHECK: Just Ref: call void @a_readonly_func(i8* %p) #6 [ "unknown"() ] <-> call void @an_inaccessibleorargmemonly_func(i8* %q) #8 [ "unknown"() ] -; CHECK: Just Ref: call void @a_readonly_func(i8* %p) #6 [ "unknown"() ] <-> call void @an_argmemonly_func(i8* %q) #9 [ "unknown"() ] -; CHECK: Both ModRef: call void @an_inaccessiblememonly_func() #7 [ "unknown"() ] <-> call void @a_readonly_func(i8* %p) #6 [ "unknown"() ] -; CHECK: Both ModRef: call void @an_inaccessiblememonly_func() #7 [ "unknown"() ] <-> call void @an_inaccessibleorargmemonly_func(i8* %q) #8 [ "unknown"() ] -; CHECK: NoModRef: call void @an_inaccessiblememonly_func() #7 [ "unknown"() ] <-> call void @an_argmemonly_func(i8* %q) #9 [ "unknown"() ] -; CHECK: Both ModRef: call void @an_inaccessibleorargmemonly_func(i8* %q) #8 [ "unknown"() ] <-> call void @a_readonly_func(i8* %p) #6 [ "unknown"() ] -; CHECK: Both ModRef: call void @an_inaccessibleorargmemonly_func(i8* %q) #8 [ "unknown"() ] <-> call void @an_inaccessiblememonly_func() #7 [ "unknown"() ] -; CHECK: Both ModRef (MustAlias): call void @an_inaccessibleorargmemonly_func(i8* %q) #8 [ "unknown"() ] <-> call void @an_argmemonly_func(i8* %q) #9 [ "unknown"() ] -; CHECK: Both ModRef: call void @an_argmemonly_func(i8* %q) #9 [ "unknown"() ] <-> call void @a_readonly_func(i8* %p) #6 [ "unknown"() ] -; CHECK: NoModRef: call void @an_argmemonly_func(i8* %q) #9 [ "unknown"() ] <-> call void @an_inaccessiblememonly_func() #7 [ "unknown"() ] -; CHECK: Both ModRef (MustAlias): call void @an_argmemonly_func(i8* %q) #9 [ "unknown"() ] <-> call void @an_inaccessibleorargmemonly_func(i8* %q) #8 [ "unknown"() ] +; CHECK: Just Ref: Ptr: i8* %p <-> call void @a_readonly_func(i8* %p) #7 [ "unknown"() ] +; CHECK: Just Ref: Ptr: i8* %q <-> call void @a_readonly_func(i8* %p) #7 [ "unknown"() ] +; CHECK: NoModRef: Ptr: i8* %p <-> call void @an_inaccessiblememonly_func() #8 [ "unknown"() ] +; CHECK: NoModRef: Ptr: i8* %q <-> call void @an_inaccessiblememonly_func() #8 [ "unknown"() ] +; CHECK: NoModRef: Ptr: i8* %p <-> call void @an_inaccessibleorargmemonly_func(i8* %q) #9 [ "unknown"() ] +; CHECK: Both ModRef (MustAlias): Ptr: i8* %q <-> call void @an_inaccessibleorargmemonly_func(i8* %q) #9 [ "unknown"() ] +; CHECK: NoModRef: Ptr: i8* %p <-> call void @an_argmemonly_func(i8* %q) #10 [ "unknown"() ] +; CHECK: Both ModRef (MustAlias): Ptr: i8* %q <-> call void @an_argmemonly_func(i8* %q) #10 [ "unknown"() ] +; CHECK: Just Ref: call void @a_readonly_func(i8* %p) #7 [ "unknown"() ] <-> call void @an_inaccessiblememonly_func() #8 [ "unknown"() ] +; CHECK: Just Ref: call void @a_readonly_func(i8* %p) #7 [ "unknown"() ] <-> call void @an_inaccessibleorargmemonly_func(i8* %q) #9 [ "unknown"() ] +; CHECK: Just Ref: call void @a_readonly_func(i8* %p) #7 [ "unknown"() ] <-> call void @an_argmemonly_func(i8* %q) #10 [ "unknown"() ] +; CHECK: Both ModRef: call void @an_inaccessiblememonly_func() #8 [ "unknown"() ] <-> call void @a_readonly_func(i8* %p) #7 [ "unknown"() ] +; CHECK: Both ModRef: call void @an_inaccessiblememonly_func() #8 [ "unknown"() ] <-> call void @an_inaccessibleorargmemonly_func(i8* %q) #9 [ "unknown"() ] +; CHECK: NoModRef: call void @an_inaccessiblememonly_func() #8 [ "unknown"() ] <-> call void @an_argmemonly_func(i8* %q) #10 [ "unknown"() ] +; CHECK: Both ModRef: call void @an_inaccessibleorargmemonly_func(i8* %q) #9 [ "unknown"() ] <-> call void @a_readonly_func(i8* %p) #7 [ "unknown"() ] +; CHECK: Both ModRef: call void @an_inaccessibleorargmemonly_func(i8* %q) #9 [ "unknown"() ] <-> call void @an_inaccessiblememonly_func() #8 [ "unknown"() ] +; CHECK: Both ModRef (MustAlias): call void @an_inaccessibleorargmemonly_func(i8* %q) #9 [ "unknown"() ] <-> call void @an_argmemonly_func(i8* %q) #10 [ "unknown"() ] +; CHECK: Both ModRef: call void @an_argmemonly_func(i8* %q) #10 [ "unknown"() ] <-> call void @a_readonly_func(i8* %p) #7 [ "unknown"() ] +; CHECK: NoModRef: call void @an_argmemonly_func(i8* %q) #10 [ "unknown"() ] <-> call void @an_inaccessiblememonly_func() #8 [ "unknown"() ] +; CHECK: Both ModRef (MustAlias): call void @an_argmemonly_func(i8* %q) #10 [ "unknown"() ] <-> call void @an_inaccessibleorargmemonly_func(i8* %q) #9 [ "unknown"() ] } attributes #0 = { argmemonly nounwind } diff --git a/llvm/test/Analysis/BasicAA/dereferenceable.ll b/llvm/test/Analysis/BasicAA/dereferenceable.ll new file mode 100644 index 000000000..efc9addbe --- /dev/null +++ b/llvm/test/Analysis/BasicAA/dereferenceable.ll @@ -0,0 +1,149 @@ +; RUN: opt -basicaa -print-all-alias-modref-info -aa-eval -analyze < %s 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +@G = global i32 0, align 4 + +define i64 @global_and_deref_arg_1(i64* dereferenceable(8) %arg) { +; CHECK: Function: global_and_deref_arg_1: 2 pointers, 0 call sites +; CHECK-NEXT: NoAlias: i32* @G, i64* %arg +bb: + store i64 1, i64* %arg, align 8 + store i32 0, i32* @G, align 4 + %tmp = load i64, i64* %arg, align 8 + ret i64 %tmp +} + +define i32 @global_and_deref_arg_2(i32* dereferenceable(8) %arg) { +; CHECK: Function: global_and_deref_arg_2: 2 pointers, 0 call sites +; CHECK-NEXT: NoAlias: i32* %arg, i32* @G +bb: + store i32 1, i32* %arg, align 8 + store i32 0, i32* @G, align 4 + %tmp = load i32, i32* %arg, align 8 + ret i32 %tmp +} + +define i32 @byval_and_deref_arg_1(i32* byval %obj, i64* dereferenceable(8) %arg) { +; CHECK: Function: byval_and_deref_arg_1: 2 pointers, 0 call sites +; CHECK-NEXT: NoAlias: i32* %obj, i64* %arg +bb: + store i32 1, i32* %obj, align 4 + store i64 0, i64* %arg, align 8 + %tmp = load i32, i32* %obj, align 4 + ret i32 %tmp +} + +define i32 @byval_and_deref_arg_2(i32* byval %obj, i32* dereferenceable(8) %arg) { +; CHECK: Function: byval_and_deref_arg_2: 2 pointers, 0 call sites +; CHECK-NEXT: NoAlias: i32* %arg, i32* %obj +bb: + store i32 1, i32* %obj, align 4 + store i32 0, i32* %arg, align 8 + %tmp = load i32, i32* %obj, align 4 + ret i32 %tmp +} + +declare dereferenceable(8) i32* @get_i32_deref8() +declare dereferenceable(8) i64* @get_i64_deref8() +declare void @unknown(i32*) + +define i32 @local_and_deref_ret_1() { +; CHECK: Function: local_and_deref_ret_1: 2 pointers, 2 call sites +; CHECK-NEXT: NoAlias: i32* %obj, i64* %ret +bb: + %obj = alloca i32 + call void @unknown(i32* %obj) + %ret = call dereferenceable(8) i64* @get_i64_deref8() + store i32 1, i32* %obj, align 4 + store i64 0, i64* %ret, align 8 + %tmp = load i32, i32* %obj, align 4 + ret i32 %tmp +} + +define i32 @local_and_deref_ret_2() { +; CHECK: Function: local_and_deref_ret_2: 2 pointers, 2 call sites +; CHECK-NEXT: NoAlias: i32* %obj, i32* %ret +bb: + %obj = alloca i32 + call void @unknown(i32* %obj) + %ret = call dereferenceable(8) i32* @get_i32_deref8() + store i32 1, i32* %obj, align 4 + store i32 0, i32* %ret, align 8 + %tmp = load i32, i32* %obj, align 4 + ret i32 %tmp +} + + +; Baseline tests, same as above but with 2 instead of 8 dereferenceable bytes. + +define i64 @global_and_deref_arg_non_deref_1(i64* dereferenceable(2) %arg) { +; CHECK: Function: global_and_deref_arg_non_deref_1: 2 pointers, 0 call sites +; CHECK-NEXT: NoAlias: i32* @G, i64* %arg +bb: + store i64 1, i64* %arg, align 8 + store i32 0, i32* @G, align 4 + %tmp = load i64, i64* %arg, align 8 + ret i64 %tmp +} + +define i32 @global_and_deref_arg_non_deref_2(i32* dereferenceable(2) %arg) { +; CHECK: Function: global_and_deref_arg_non_deref_2: 2 pointers, 0 call sites +; Different result than above (see @global_and_deref_arg_2). +; CHECK-NEXT: MayAlias: i32* %arg, i32* @G +bb: + store i32 1, i32* %arg, align 8 + store i32 0, i32* @G, align 4 + %tmp = load i32, i32* %arg, align 8 + ret i32 %tmp +} + +define i32 @byval_and_deref_arg_non_deref_1(i32* byval %obj, i64* dereferenceable(2) %arg) { +; CHECK: Function: byval_and_deref_arg_non_deref_1: 2 pointers, 0 call sites +; CHECK-NEXT: NoAlias: i32* %obj, i64* %arg +bb: + store i32 1, i32* %obj, align 4 + store i64 0, i64* %arg, align 8 + %tmp = load i32, i32* %obj, align 4 + ret i32 %tmp +} + +define i32 @byval_and_deref_arg_non_deref_2(i32* byval %obj, i32* dereferenceable(2) %arg) { +; CHECK: Function: byval_and_deref_arg_non_deref_2: 2 pointers, 0 call sites +; CHECK-NEXT: NoAlias: i32* %arg, i32* %obj +bb: + store i32 1, i32* %obj, align 4 + store i32 0, i32* %arg, align 8 + %tmp = load i32, i32* %obj, align 4 + ret i32 %tmp +} + +declare dereferenceable(2) i32* @get_i32_deref2() +declare dereferenceable(2) i64* @get_i64_deref2() + +define i32 @local_and_deref_ret_non_deref_1() { +; CHECK: Function: local_and_deref_ret_non_deref_1: 2 pointers, 2 call sites +; CHECK-NEXT: NoAlias: i32* %obj, i64* %ret +bb: + %obj = alloca i32 + call void @unknown(i32* %obj) + %ret = call dereferenceable(2) i64* @get_i64_deref2() + store i32 1, i32* %obj, align 4 + store i64 0, i64* %ret, align 8 + %tmp = load i32, i32* %obj, align 4 + ret i32 %tmp +} + +define i32 @local_and_deref_ret_non_deref_2() { +; CHECK: Function: local_and_deref_ret_non_deref_2: 2 pointers, 2 call sites +; Different result than above (see @local_and_deref_ret_2). +; CHECK-NEXT: MayAlias: i32* %obj, i32* %ret +bb: + %obj = alloca i32 + call void @unknown(i32* %obj) + %ret = call dereferenceable(2) i32* @get_i32_deref2() + store i32 1, i32* %obj, align 4 + store i32 0, i32* %ret, align 8 + %tmp = load i32, i32* %obj, align 4 + ret i32 %tmp +} diff --git a/llvm/test/Analysis/BasicAA/gep-alias.ll b/llvm/test/Analysis/BasicAA/gep-alias.ll index 1e435af2f..5fd77e19e 100644 --- a/llvm/test/Analysis/BasicAA/gep-alias.ll +++ b/llvm/test/Analysis/BasicAA/gep-alias.ll @@ -247,7 +247,7 @@ define i32 @test12(i32 %x, i32 %y, i8* %p) nounwind { ; CHECK: [[U0ADDR:%[a-zA-Z0-9_]+]] = getelementptr inbounds [3 x i8], [3 x i8]* %u, i32 0, i32 0 ; CHECK: [[U0:%[a-zA-Z0-9_]+]] = load i8, i8* [[U0ADDR]], align 1 ; CHECK: [[U0ARG:%[a-zA-Z0-9_]+]] = zext i8 [[U0]] to i32 -; CHECK: call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0), i32 [[T0ARG]], i32 [[U0ARG]]) +; CHECK: call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0), i32 [[T0ARG]], i32 [[U0ARG]]) ; CHECK: ret define void @test13() { entry: diff --git a/llvm/test/Analysis/BasicAA/intrinsics.ll b/llvm/test/Analysis/BasicAA/intrinsics.ll index 68e59862b..cf792e8c6 100644 --- a/llvm/test/Analysis/BasicAA/intrinsics.ll +++ b/llvm/test/Analysis/BasicAA/intrinsics.ll @@ -22,6 +22,6 @@ entry: declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>) nounwind readonly declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>) nounwind -; CHECK: attributes #0 = { argmemonly nounwind readonly } -; CHECK: attributes #1 = { argmemonly nounwind } +; CHECK: attributes #0 = { argmemonly nounwind readonly willreturn } +; CHECK: attributes #1 = { argmemonly nounwind willreturn } ; CHECK: attributes [[ATTR]] = { nounwind } diff --git a/llvm/test/Analysis/BasicAA/ptrmask.ll b/llvm/test/Analysis/BasicAA/ptrmask.ll new file mode 100644 index 000000000..27c14ebb7 --- /dev/null +++ b/llvm/test/Analysis/BasicAA/ptrmask.ll @@ -0,0 +1,29 @@ +; RUN: opt -basicaa -aa-eval -print-no-aliases -disable-output %s 2>&1 | FileCheck %s + +%struct = type <{ [20 x i64] }> + +; CHECK-LABEL: Function: test_noalias: 4 pointers, 1 call sites +; CHECK-NEXT: NoAlias: %struct* %ptr1, i64* %ptr2 +; CHECK-NEXT: NoAlias: %struct* %addr.ptr, i64* %ptr2 +; CHECK-NEXT: NoAlias: i64* %gep, i64* %ptr2 +define void @test_noalias(%struct* noalias %ptr1, i64* %ptr2, i64 %offset) { +entry: + %addr.ptr = call %struct* @llvm.ptrmask.p0s_struct.p0s.struct.i64(%struct* %ptr1, i64 72057594037927928) + store i64 10, i64* %ptr2 + %gep = getelementptr inbounds %struct, %struct* %addr.ptr, i64 0, i32 0, i64 %offset + store i64 1, i64* %gep, align 8 + ret void +} + +; CHECK-NEXT: Function: test_alias: 4 pointers, 1 call sites +; CHECK-NOT: NoAlias +define void @test_alias(%struct* %ptr1, i64* %ptr2, i64 %offset) { +entry: + %addr.ptr = call %struct* @llvm.ptrmask.p0s_struct.p0s.struct.i64(%struct* %ptr1, i64 72057594037927928) + store i64 10, i64* %ptr2 + %gep = getelementptr inbounds %struct, %struct* %addr.ptr, i64 0, i32 0, i64 %offset + store i64 1, i64* %gep, align 8 + ret void +} + +declare %struct* @llvm.ptrmask.p0s_struct.p0s.struct.i64(%struct*, i64) diff --git a/llvm/test/Analysis/BasicAA/store-promote.ll b/llvm/test/Analysis/BasicAA/store-promote.ll index afe11c2a1..23b74bc1a 100644 --- a/llvm/test/Analysis/BasicAA/store-promote.ll +++ b/llvm/test/Analysis/BasicAA/store-promote.ll @@ -1,8 +1,9 @@ -; Test that LICM uses basicaa to do alias analysis, which is capable of +; Test that LICM uses basicaa to do alias analysis, which is capable of ; disambiguating some obvious cases. If LICM is able to disambiguate the ; two pointers, then the load should be hoisted, and the store sunk. -; RUN: opt < %s -basicaa -licm -S | FileCheck %s +; RUN: opt < %s -basicaa -licm -enable-mssa-loop-dependency=false -S | FileCheck %s -check-prefixes=CHECK,AST +; RUN: opt < %s -basicaa -licm -enable-mssa-loop-dependency=true -S | FileCheck %s -check-prefixes=CHECK,MSSA target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" @A = global i32 7 ; [#uses=3] @@ -25,10 +26,13 @@ Out: ; preds = %Loop ; The Loop block should be empty after the load/store are promoted. ; CHECK: @test1 ; CHECK: load i32, i32* @A +; MSSA: load i32, i32* @A +; MSSA: store i32 %Atmp, i32* @B ; CHECK: Loop: ; CHECK-NEXT: br i1 %c, label %Out, label %Loop ; CHECK: Out: -; CHECK: store i32 %Atmp, i32* @B +; AST: store i32 %Atmp, i32* @B +; AST: load i32, i32* @A } define i32 @test2(i1 %c) { diff --git a/llvm/test/Analysis/BranchProbabilityInfo/basic.ll b/llvm/test/Analysis/BranchProbabilityInfo/basic.ll index 64e0a8245..8212cc476 100644 --- a/llvm/test/Analysis/BranchProbabilityInfo/basic.ll +++ b/llvm/test/Analysis/BranchProbabilityInfo/basic.ll @@ -141,6 +141,24 @@ exit: ret i32 %result } +define i32 @test_cold_loop(i32 %a, i32 %b) { +entry: + %cond1 = icmp eq i32 %a, 42 + br i1 %cond1, label %header, label %exit + +header: + br label %body + +body: + %cond2 = icmp eq i32 %b, 42 + br i1 %cond2, label %header, label %exit +; CHECK: edge body -> header probability is 0x40000000 / 0x80000000 = 50.00% + +exit: + call void @coldfunc() + ret i32 %b +} + declare i32 @regular_function(i32 %i) define i32 @test_cold_call_sites_with_prof(i32 %a, i32 %b, i1 %flag, i1 %flag2) { diff --git a/llvm/test/Analysis/BranchProbabilityInfo/fcmp.ll b/llvm/test/Analysis/BranchProbabilityInfo/fcmp.ll new file mode 100644 index 000000000..8089916fb --- /dev/null +++ b/llvm/test/Analysis/BranchProbabilityInfo/fcmp.ll @@ -0,0 +1,41 @@ +; RUN: opt < %s -analyze -branch-prob | FileCheck %s + +; This function tests the floating point unorder comparison. The probability +; of NaN should be extremely small. +; CHECK: Printing analysis 'Branch Probability Analysis' for function 'uno' +; CHECK: edge -> a probability is 0x00000800 / 0x80000000 = 0.00% +; CHECK: edge -> b probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge] + +define void @uno(float %val1, float %val2) { + %cond = fcmp uno float %val1, %val2 + br i1 %cond, label %a, label %b + +a: + call void @fa() + ret void + +b: + call void @fb() + ret void +} + +; This function tests the floating point order comparison. +; CHECK: Printing analysis 'Branch Probability Analysis' for function 'ord' +; CHECK: edge -> a probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge -> b probability is 0x00000800 / 0x80000000 = 0.00% + +define void @ord(float %val1, float %val2) { + %cond = fcmp ord float %val1, %val2 + br i1 %cond, label %a, label %b + +a: + call void @fa() + ret void + +b: + call void @fb() + ret void +} + +declare void @fa() +declare void @fb() diff --git a/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll b/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll index 0566ca16c..6e01afd2c 100644 --- a/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll +++ b/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll @@ -79,6 +79,32 @@ exit: ret i32 %b } +define i32 @test4(i32 %a, i32 %b) { +; CHECK: Printing analysis {{.*}} for function 'test4' +; Make sure we handle loops post-dominated by unreachables. +entry: + %cond1 = icmp eq i32 %a, 42 + br i1 %cond1, label %header, label %exit +; CHECK: edge entry -> header probability is 0x00000001 / 0x80000000 = 0.00% +; CHECK: edge entry -> exit probability is 0x7fffffff / 0x80000000 = 100.00% [HOT edge] + +header: + br label %body + +body: + %cond2 = icmp eq i32 %a, 42 + br i1 %cond2, label %header, label %abort +; CHECK: edge body -> header probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge body -> abort probability is 0x40000000 / 0x80000000 = 50.00% + +abort: + call void @abort() noreturn + unreachable + +exit: + ret i32 %b +} + @_ZTIi = external global i8* ; CHECK-LABEL: throwSmallException diff --git a/llvm/test/Analysis/BranchProbabilityInfo/pr22718.ll b/llvm/test/Analysis/BranchProbabilityInfo/pr22718.ll index 66ac89793..8674f9e5a 100644 --- a/llvm/test/Analysis/BranchProbabilityInfo/pr22718.ll +++ b/llvm/test/Analysis/BranchProbabilityInfo/pr22718.ll @@ -73,9 +73,9 @@ declare i32 @rand() #1 declare i32 @printf(i8*, ...) #2 -attributes #0 = { inlinehint nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { inlinehint nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #3 = { nounwind } !llvm.ident = !{!0} diff --git a/llvm/test/Analysis/ConstantFolding/binop-identity-undef.ll b/llvm/test/Analysis/ConstantFolding/binop-identity-undef.ll new file mode 100644 index 000000000..683078921 --- /dev/null +++ b/llvm/test/Analysis/ConstantFolding/binop-identity-undef.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -constprop -S %s | FileCheck %s + +define i32 @and1() { +; CHECK-LABEL: @and1( +; CHECK-NEXT: ret i32 undef +; + %r = and i32 undef, -1 + ret i32 %r +} + +define i32 @and2() { +; CHECK-LABEL: @and2( +; CHECK-NEXT: ret i32 undef +; + %r = and i32 -1, undef + ret i32 %r +} + +define i32 @and3_no_identity() { +; CHECK-LABEL: @and3_no_identity( +; CHECK-NEXT: ret i32 0 +; + %r = and i32 10, undef + ret i32 %r +} + +define i32 @or1() { +; CHECK-LABEL: @or1( +; CHECK-NEXT: ret i32 undef +; + %r = or i32 0, undef + ret i32 %r +} + +define i32 @or2() { +; CHECK-LABEL: @or2( +; CHECK-NEXT: ret i32 undef +; + %r = or i32 undef, 0 + ret i32 %r +} + +define i32 @or3_no_identity() { +; CHECK-LABEL: @or3_no_identity( +; CHECK-NEXT: ret i32 -1 +; + %r = or i32 undef, 10 + ret i32 %r +} diff --git a/llvm/test/Analysis/ConstantFolding/copysign.ll b/llvm/test/Analysis/ConstantFolding/copysign.ll new file mode 100644 index 000000000..228ffcb47 --- /dev/null +++ b/llvm/test/Analysis/ConstantFolding/copysign.ll @@ -0,0 +1,53 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -constprop < %s | FileCheck %s + +declare float @llvm.copysign.f32(float, float) +declare double @llvm.copysign.f64(double, double) + +define float @f32_01() { +; CHECK-LABEL: @f32_01( +; CHECK-NEXT: ret float -1.000000e+00 +; + %x = call float @llvm.copysign.f32(float 1.0, float -2.0) + ret float %x +} + +define float @f32_02() { +; CHECK-LABEL: @f32_02( +; CHECK-NEXT: ret float 2.000000e+00 +; + %x = call float @llvm.copysign.f32(float -2.0, float 1.0) + ret float %x +} + +define float @f32_03() { +; CHECK-LABEL: @f32_03( +; CHECK-NEXT: ret float -2.000000e+00 +; + %x = call float @llvm.copysign.f32(float -2.0, float -1.0) + ret float %x +} + +define double @f64_01() { +; CHECK-LABEL: @f64_01( +; CHECK-NEXT: ret double -1.000000e+00 +; + %x = call double @llvm.copysign.f64(double 1.0, double -2.0) + ret double %x +} + +define double @f64_02() { +; CHECK-LABEL: @f64_02( +; CHECK-NEXT: ret double 1.000000e+00 +; + %x = call double @llvm.copysign.f64(double -1.0, double 2.0) + ret double %x +} + +define double @f64_03() { +; CHECK-LABEL: @f64_03( +; CHECK-NEXT: ret double -1.000000e+00 +; + %x = call double @llvm.copysign.f64(double -1.0, double -2.0) + ret double %x +} diff --git a/llvm/test/Analysis/ConstantFolding/gep-alias.ll b/llvm/test/Analysis/ConstantFolding/gep-alias.ll new file mode 100644 index 000000000..0fcc778a4 --- /dev/null +++ b/llvm/test/Analysis/ConstantFolding/gep-alias.ll @@ -0,0 +1,17 @@ +; RUN: opt -instcombine -S -o - %s | FileCheck %s +; Test that we don't replace an alias with its aliasee when simplifying GEPs. +; In this test case the transformation is invalid because it replaces the +; reference to the symbol "b" (which refers to whichever instance of "b" +; was chosen by the linker) with a reference to "a" (which refers to the +; specific instance of "b" in this module). + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@a = internal global [3 x i8*] zeroinitializer +@b = linkonce_odr alias [3 x i8*], [3 x i8*]* @a + +define i8** @f() { + ; CHECK: ret i8** getelementptr ([3 x i8*], [3 x i8*]* @b, i64 0, i64 1) + ret i8** getelementptr ([3 x i8*], [3 x i8*]* @b, i64 0, i64 1) +} diff --git a/llvm/test/Analysis/ConstantFolding/gep-zeroinit-vector.ll b/llvm/test/Analysis/ConstantFolding/gep-zeroinit-vector.ll index bb5fcbdb8..03d27e9fb 100644 --- a/llvm/test/Analysis/ConstantFolding/gep-zeroinit-vector.ll +++ b/llvm/test/Analysis/ConstantFolding/gep-zeroinit-vector.ll @@ -9,7 +9,7 @@ define <2 x i16*> @test_gep() { ; CHECK-LABEL: @test_gep( -; CHECK-NEXT: ret <2 x i16*> getelementptr ([1 x %rec8], [1 x %rec8]* @a, <2 x i64> zeroinitializer, <2 x i64> zeroinitializer), i32 0), i32 0, i32 0), i16* getelementptr inbounds (%rec8, %rec8* extractelement (<2 x %rec8*> getelementptr ([1 x %rec8], [1 x %rec8]* @a, <2 x i64> zeroinitializer, <2 x i64> zeroinitializer), i32 1), i32 0, i32 0)> +; CHECK-NEXT: ret <2 x i16*> ; %A = getelementptr [1 x %rec8], [1 x %rec8]* @a, <2 x i16> zeroinitializer, <2 x i64> zeroinitializer %B = bitcast <2 x %rec8*> %A to <2 x i16*> diff --git a/llvm/test/Analysis/ConstantFolding/insertelement.ll b/llvm/test/Analysis/ConstantFolding/insertelement.ll new file mode 100644 index 000000000..960042acf --- /dev/null +++ b/llvm/test/Analysis/ConstantFolding/insertelement.ll @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -constprop -S | FileCheck %s + + +define <4 x i32> @insertelement_fixedlength_constant() { +; CHECK-LABEL: @insertelement_fixedlength_constant( +; CHECK-NEXT: ret <4 x i32> +; + %i = insertelement <4 x i32> undef, i32 1, i32 0 + ret <4 x i32> %i +} + +define @insertelement_scalable_constant() { +; CHECK-LABEL: @insertelement_scalable_constant( +; CHECK-NEXT: ret insertelement ( undef, i32 1, i32 0) +; + %i = insertelement undef, i32 1, i32 0 + ret %i +} diff --git a/llvm/test/Analysis/ConstantFolding/math-1.ll b/llvm/test/Analysis/ConstantFolding/math-1.ll new file mode 100644 index 000000000..595095017 --- /dev/null +++ b/llvm/test/Analysis/ConstantFolding/math-1.ll @@ -0,0 +1,195 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -early-cse -S -o - %s | FileCheck %s + +declare double @acos(double) +define double @f_acos() { +; CHECK-LABEL: @f_acos( +; CHECK-NEXT: ret double 0.000000e+00 +; + %res = tail call fast double @acos(double 1.0) + ret double %res +} + +declare float @asinf(float) +define float @f_asinf() { +; CHECK-LABEL: @f_asinf( +; CHECK-NEXT: ret float 0x3FF921FB{{.+}} +; + %res = tail call fast float @asinf(float 1.0) + ret float %res +} + +declare double @atan(double) +define double @f_atan() { +; CHECK-LABEL: @f_atan( +; CHECK-NEXT: [[RES:%.*]] = tail call fast double @atan(double 1.000000e+00) +; CHECK-NEXT: ret double 0x3FE921FB +; + %res = tail call fast double @atan(double 1.0) + ret double %res +} + +declare float @cosf(float) +define float @f_cosf() { +; CHECK-LABEL: @f_cosf( +; CHECK-NEXT: ret float 0x3FE14A2{{.+}} +; + %res = tail call fast float @cosf(float 1.0) + ret float %res +} + +declare float @llvm.cos.f32(float) +define float @i_cosf() { +; CHECK-LABEL: @i_cosf( +; CHECK-NEXT: ret float 0x3FE14A2 +; + %res = tail call fast float @llvm.cos.f32(float 1.0) + ret float %res +} + +declare double @cosh(double) +define double @f_cosh() { +; CHECK-LABEL: @f_cosh( +; CHECK-NEXT: ret double 0x3FF8B075{{.+}} +; + %res = tail call fast double @cosh(double 1.0) + ret double %res +} + +declare float @expf(float) +define float @f_expf() { +; CHECK-LABEL: @f_expf( +; CHECK-NEXT: ret float 0x4005BF0A{{.+}} +; + %res = tail call fast float @expf(float 1.0) + ret float %res +} + +declare float @llvm.exp.f32(float) +define float @i_expf() { +; CHECK-LABEL: @i_expf( +; CHECK-NEXT: ret float 0x4005BF0A{{.+}} +; + %res = tail call fast float @llvm.exp.f32(float 1.0) + ret float %res +} + +declare double @exp2(double) +define double @f_exp2() { +; CHECK-LABEL: @f_exp2( +; CHECK-NEXT: ret double 2.000000e+00 +; + %res = tail call fast double @exp2(double 1.0) + ret double %res +} + +declare double @llvm.exp2.f64(double) +define double @i_exp2() { +; CHECK-LABEL: @i_exp2( +; CHECK-NEXT: ret double 2.000000e+00 +; + %res = tail call fast double @llvm.exp2.f64(double 1.0) + ret double %res +} + +; FIXME: exp10() is not widely supported. +declare float @exp10f(float) +define float @f_exp10f() { +; CHECK-LABEL: @f_exp10f( +; CHECK-NEXT: [[RES:%.*]] = tail call float @exp10f(float 1.000000e+00) +; CHECK-NEXT: ret float [[RES]] +; + %res = tail call float @exp10f(float 1.0) + ret float %res +} + +declare double @log(double) +define double @f_log() { +; CHECK-LABEL: @f_log( +; CHECK-NEXT: ret double 0.000000e+00 +; + %res = tail call fast double @log(double 1.0) + ret double %res +} + +declare double @llvm.log.f64(double) +define double @i_log() { +; CHECK-LABEL: @i_log( +; CHECK-NEXT: ret double 0.000000e+00 +; + %res = tail call fast double @llvm.log.f64(double 1.0) + ret double %res +} + +declare float @log2f(float) +define float @f_log2f() { +; CHECK-LABEL: @f_log2f( +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = tail call fast float @log2f(float 1.0) + ret float %res +} + +declare float @llvm.log2.f32(float) +define float @i_log2f() { +; CHECK-LABEL: @i_log2f( +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = tail call fast float @llvm.log2.f32(float 1.0) + ret float %res +} + +declare double @log10(double) +define double @f_log10() { +; CHECK-LABEL: @f_log10( +; CHECK-NEXT: ret double 0.000000e+00 +; + %res = tail call fast double @log10(double 1.0) + ret double %res +} + +declare float @sinf(float) +define float @f_sinf() { +; CHECK-LABEL: @f_sinf( +; CHECK-NEXT: ret float 0x3FEAED54{{.+}} +; + %res = tail call fast float @sinf(float 1.0) + ret float %res +} + +declare double @sinh(double) +define double @f_sinh() { +; CHECK-LABEL: @f_sinh( +; CHECK-NEXT: ret double 0x3FF2CD9F{{.+}} +; + %res = tail call fast double @sinh(double 1.0) + ret double %res +} + +declare float @sqrtf(float) +define float @f_sqrtf() { +; CHECK-LABEL: @f_sqrtf( +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = tail call fast float @sqrtf(float 1.0) + ret float %res +} + +declare double @tan(double) +define double @f_tan() { +; CHECK-LABEL: @f_tan( +; CHECK-NEXT: ret double 0x3FF8EB24{{.+}} +; + %res = tail call fast double @tan(double 1.0) + ret double %res +} + +declare float @tanhf(float) +define float @f_tanhf() { +; CHECK-LABEL: @f_tanhf( +; CHECK-NEXT: [[RES:%.*]] = tail call fast float @tanhf(float 1.000000e+00) +; CHECK-NEXT: ret float 0x3FE85EFA{{.+}} +; + %res = tail call fast float @tanhf(float 1.0) + ret float %res +} diff --git a/llvm/test/Analysis/ConstantFolding/math-2.ll b/llvm/test/Analysis/ConstantFolding/math-2.ll new file mode 100644 index 000000000..90b64797f --- /dev/null +++ b/llvm/test/Analysis/ConstantFolding/math-2.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -early-cse -S -o - %s | FileCheck %s + +declare double @atan2(double, double) +define double @f_atan2() { +; CHECK-LABEL: @f_atan2( +; CHECK-NEXT: [[RES:%.*]] = tail call fast double @atan2(double 1.000000e+00, double 2.000000e+00) +; CHECK-NEXT: ret double 0x3FDDAC6{{.+}} +; + %res = tail call fast double @atan2(double 1.0, double 2.0) + ret double %res +} + +declare float @fmodf(float, float) +define float @f_fmodf() { +; CHECK-LABEL: @f_fmodf( +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = tail call fast float @fmodf(float 1.0, float 2.0) + ret float %res +} + +declare double @pow(double, double) +define double @f_pow() { +; CHECK-LABEL: @f_pow( +; CHECK-NEXT: ret double 1.000000e+00 +; + %res = tail call fast double @pow(double 1.0, double 2.0) + ret double %res +} + +declare float @llvm.pow.f32(float, float) +define float @i_powf() { +; CHECK-LABEL: @i_powf( +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = tail call fast float @llvm.pow.f32(float 1.0, float 2.0) + ret float %res +} + +declare double @llvm.powi.f64(double, i32) +define double @i_powi() { +; CHECK-LABEL: @i_powi( +; CHECK-NEXT: ret double 1.000000e+00 +; + %res = tail call fast double @llvm.powi.f64(double 1.0, i32 2) + ret double %res +} diff --git a/llvm/test/Analysis/ConstantFolding/rint.ll b/llvm/test/Analysis/ConstantFolding/rint.ll new file mode 100644 index 000000000..9ad794d60 --- /dev/null +++ b/llvm/test/Analysis/ConstantFolding/rint.ll @@ -0,0 +1,109 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -early-cse < %s | FileCheck %s + +declare float @nearbyintf(float) #0 +declare float @llvm.nearbyint.f32(float) #0 +declare double @nearbyint(double) #0 +declare double @llvm.nearbyint.f64(double) #0 +declare float @rintf(float) #0 +declare float @llvm.rint.f32(float) #0 +declare double @rint(double) #0 +declare double @llvm.rint.f64(double) #0 + +define float @constant_fold_rint_f32_01() #0 { +; CHECK-LABEL: @constant_fold_rint_f32_01( +; CHECK-NEXT: ret float 1.000000e+00 +; + %x = call float @nearbyintf(float 1.25) #0 + ret float %x +} + +define float @constant_fold_rint_f32_02() #0 { +; CHECK-LABEL: @constant_fold_rint_f32_02( +; CHECK-NEXT: ret float -1.000000e+00 +; + %x = call float @llvm.nearbyint.f32(float -1.25) #0 + ret float %x +} + +define float @constant_fold_rint_f32_03() #0 { +; CHECK-LABEL: @constant_fold_rint_f32_03( +; CHECK-NEXT: ret float 2.000000e+00 +; + %x = call float @rintf(float 1.5) #0 + ret float %x +} + +define float @constant_fold_rint_f32_04() #0 { +; CHECK-LABEL: @constant_fold_rint_f32_04( +; CHECK-NEXT: ret float -2.000000e+00 +; + %x = call float @llvm.rint.f32(float -1.5) #0 + ret float %x +} + +define float @constant_fold_rint_f32_05() #0 { +; CHECK-LABEL: @constant_fold_rint_f32_05( +; CHECK-NEXT: ret float 3.000000e+00 +; + %x = call float @nearbyintf(float 2.75) #0 + ret float %x +} + +define float @constant_fold_rint_f32_06() #0 { +; CHECK-LABEL: @constant_fold_rint_f32_06( +; CHECK-NEXT: ret float -3.000000e+00 +; + %x = call float @llvm.nearbyint.f32(float -2.75) #0 + ret float %x +} + +define double @constant_fold_rint_f64_01() #0 { +; CHECK-LABEL: @constant_fold_rint_f64_01( +; CHECK-NEXT: ret double 1.000000e+00 +; + %x = call double @rint(double 1.3) #0 + ret double %x +} + +define double @constant_fold_rint_f64_02() #0 { +; CHECK-LABEL: @constant_fold_rint_f64_02( +; CHECK-NEXT: ret double -1.000000e+00 +; + %x = call double @llvm.rint.f64(double -1.3) #0 + ret double %x +} + +define double @constant_fold_rint_f64_03() #0 { +; CHECK-LABEL: @constant_fold_rint_f64_03( +; CHECK-NEXT: ret double 2.000000e+00 +; + %x = call double @nearbyint(double 1.5) #0 + ret double %x +} + +define double @constant_fold_rint_f64_04() #0 { +; CHECK-LABEL: @constant_fold_rint_f64_04( +; CHECK-NEXT: ret double -2.000000e+00 +; + %x = call double @llvm.nearbyint.f64(double -1.5) #0 + ret double %x +} + +define double @constant_fold_rint_f64_05() #0 { +; CHECK-LABEL: @constant_fold_rint_f64_05( +; CHECK-NEXT: ret double 3.000000e+00 +; + %x = call double @rint(double 2.7) #0 + ret double %x +} + +define double @constant_fold_rint_f64_06() #0 { +; CHECK-LABEL: @constant_fold_rint_f64_06( +; CHECK-NEXT: ret double -3.000000e+00 +; + %x = call double @llvm.rint.f64(double -2.7) #0 + ret double %x +} + +attributes #0 = { nounwind readnone } diff --git a/llvm/test/Analysis/ConstantFolding/round.ll b/llvm/test/Analysis/ConstantFolding/round.ll new file mode 100644 index 000000000..d5b847810 --- /dev/null +++ b/llvm/test/Analysis/ConstantFolding/round.ll @@ -0,0 +1,92 @@ +; RUN: opt -S -early-cse < %s | FileCheck %s + +declare float @roundf(float) #0 +declare float @llvm.round.f32(float) #0 +declare double @round(double) #0 +declare double @llvm.round.f64(double) #0 + +; CHECK-LABEL: @constant_fold_round_f32_01 +; CHECK-NEXT: ret float 1.000000e+00 +define float @constant_fold_round_f32_01() #0 { + %x = call float @roundf(float 1.25) #0 + ret float %x +} + +; CHECK-LABEL: @constant_fold_round_f32_02 +; CHECK-NEXT: ret float -1.000000e+00 +define float @constant_fold_round_f32_02() #0 { + %x = call float @llvm.round.f32(float -1.25) #0 + ret float %x +} + +; CHECK-LABEL: @constant_fold_round_f32_03 +; CHECK-NEXT: ret float 2.000000e+00 +define float @constant_fold_round_f32_03() #0 { + %x = call float @roundf(float 1.5) #0 + ret float %x +} + +; CHECK-LABEL: @constant_fold_round_f32_04 +; CHECK-NEXT: ret float -2.000000e+00 +define float @constant_fold_round_f32_04() #0 { + %x = call float @llvm.round.f32(float -1.5) #0 + ret float %x +} + +; CHECK-LABEL: @constant_fold_round_f32_05 +; CHECK-NEXT: ret float 3.000000e+00 +define float @constant_fold_round_f32_05() #0 { + %x = call float @roundf(float 2.75) #0 + ret float %x +} + +; CHECK-LABEL: @constant_fold_round_f32_06 +; CHECK-NEXT: ret float -3.000000e+00 +define float @constant_fold_round_f32_06() #0 { + %x = call float @llvm.round.f32(float -2.75) #0 + ret float %x +} + +; CHECK-LABEL: @constant_fold_round_f64_01 +; CHECK-NEXT: ret double 1.000000e+00 +define double @constant_fold_round_f64_01() #0 { + %x = call double @round(double 1.3) #0 + ret double %x +} + +; CHECK-LABEL: @constant_fold_round_f64_02 +; CHECK-NEXT: ret double -1.000000e+00 +define double @constant_fold_round_f64_02() #0 { + %x = call double @llvm.round.f64(double -1.3) #0 + ret double %x +} + +; CHECK-LABEL: @constant_fold_round_f64_03 +; CHECK-NEXT: ret double 2.000000e+00 +define double @constant_fold_round_f64_03() #0 { + %x = call double @round(double 1.5) #0 + ret double %x +} + +; CHECK-LABEL: @constant_fold_round_f64_04 +; CHECK-NEXT: ret double -2.000000e+00 +define double @constant_fold_round_f64_04() #0 { + %x = call double @llvm.round.f64(double -1.5) #0 + ret double %x +} + +; CHECK-LABEL: @constant_fold_round_f64_05 +; CHECK-NEXT: ret double 3.000000e+00 +define double @constant_fold_round_f64_05() #0 { + %x = call double @round(double 2.7) #0 + ret double %x +} + +; CHECK-LABEL: @constant_fold_round_f64_06 +; CHECK-NEXT: ret double -3.000000e+00 +define double @constant_fold_round_f64_06() #0 { + %x = call double @llvm.round.f64(double -2.7) #0 + ret double %x +} + +attributes #0 = { nounwind readnone } diff --git a/llvm/test/Analysis/ConstantFolding/shufflevector.ll b/llvm/test/Analysis/ConstantFolding/shufflevector.ll new file mode 100644 index 000000000..d69c2caec --- /dev/null +++ b/llvm/test/Analysis/ConstantFolding/shufflevector.ll @@ -0,0 +1,11 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -constprop -S | FileCheck %s + +define @shufflevector_scalable_constant() { +; CHECK-LABEL: @shufflevector_scalable_constant( +; CHECK-NEXT: ret shufflevector ( insertelement ( undef, i32 1, i32 0), undef, zeroinitializer) +; + %i = insertelement undef, i32 1, i32 0 + %i2 = shufflevector %i, undef, zeroinitializer + ret %i2 +} diff --git a/llvm/test/Analysis/ConstantFolding/trunc.ll b/llvm/test/Analysis/ConstantFolding/trunc.ll new file mode 100644 index 000000000..df1380917 --- /dev/null +++ b/llvm/test/Analysis/ConstantFolding/trunc.ll @@ -0,0 +1,105 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -early-cse < %s | FileCheck %s + +declare float @truncf(float) #0 +declare float @llvm.trunc.f32(float) #0 +declare double @trunc(double) #0 +declare double @llvm.trunc.f64(double) #0 + +define float @constant_fold_trunc_f32_01() #0 { +; CHECK-LABEL: @constant_fold_trunc_f32_01( +; CHECK-NEXT: ret float 1.000000e+00 +; + %x = call float @truncf(float 1.25) #0 + ret float %x +} + +define float @constant_fold_trunc_f32_02() #0 { +; CHECK-LABEL: @constant_fold_trunc_f32_02( +; CHECK-NEXT: ret float -1.000000e+00 +; + %x = call float @llvm.trunc.f32(float -1.25) #0 + ret float %x +} + +define float @constant_fold_trunc_f32_03() #0 { +; CHECK-LABEL: @constant_fold_trunc_f32_03( +; CHECK-NEXT: ret float 1.000000e+00 +; + %x = call float @truncf(float 1.5) #0 + ret float %x +} + +define float @constant_fold_trunc_f32_04() #0 { +; CHECK-LABEL: @constant_fold_trunc_f32_04( +; CHECK-NEXT: ret float -1.000000e+00 +; + %x = call float @llvm.trunc.f32(float -1.5) #0 + ret float %x +} + +define float @constant_fold_trunc_f32_05() #0 { +; CHECK-LABEL: @constant_fold_trunc_f32_05( +; CHECK-NEXT: ret float 2.000000e+00 +; + %x = call float @truncf(float 2.75) #0 + ret float %x +} + +define float @constant_fold_trunc_f32_06() #0 { +; CHECK-LABEL: @constant_fold_trunc_f32_06( +; CHECK-NEXT: ret float -2.000000e+00 +; + %x = call float @llvm.trunc.f32(float -2.75) #0 + ret float %x +} + +define double @constant_fold_trunc_f64_01() #0 { +; CHECK-LABEL: @constant_fold_trunc_f64_01( +; CHECK-NEXT: ret double 1.000000e+00 +; + %x = call double @trunc(double 1.3) #0 + ret double %x +} + +define double @constant_fold_trunc_f64_02() #0 { +; CHECK-LABEL: @constant_fold_trunc_f64_02( +; CHECK-NEXT: ret double -1.000000e+00 +; + %x = call double @llvm.trunc.f64(double -1.3) #0 + ret double %x +} + +define double @constant_fold_trunc_f64_03() #0 { +; CHECK-LABEL: @constant_fold_trunc_f64_03( +; CHECK-NEXT: ret double 1.000000e+00 +; + %x = call double @trunc(double 1.5) #0 + ret double %x +} + +define double @constant_fold_trunc_f64_04() #0 { +; CHECK-LABEL: @constant_fold_trunc_f64_04( +; CHECK-NEXT: ret double -1.000000e+00 +; + %x = call double @llvm.trunc.f64(double -1.5) #0 + ret double %x +} + +define double @constant_fold_trunc_f64_05() #0 { +; CHECK-LABEL: @constant_fold_trunc_f64_05( +; CHECK-NEXT: ret double 2.000000e+00 +; + %x = call double @trunc(double 2.7) #0 + ret double %x +} + +define double @constant_fold_trunc_f64_06() #0 { +; CHECK-LABEL: @constant_fold_trunc_f64_06( +; CHECK-NEXT: ret double -2.000000e+00 +; + %x = call double @llvm.trunc.f64(double -2.7) #0 + ret double %x +} + +attributes #0 = { nounwind readnone } diff --git a/llvm/test/Analysis/CostModel/AArch64/aggregates.ll b/llvm/test/Analysis/CostModel/AArch64/aggregates.ll new file mode 100644 index 000000000..35d232b3b --- /dev/null +++ b/llvm/test/Analysis/CostModel/AArch64/aggregates.ll @@ -0,0 +1,142 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -cost-model -cost-kind=throughput -analyze | FileCheck %s --check-prefixes=ALL,THROUGHPUT +; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -cost-model -cost-kind=latency -analyze | FileCheck %s --check-prefixes=ALL,LATENCY +; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -cost-model -cost-kind=code-size -analyze | FileCheck %s --check-prefixes=ALL,CODESIZE + +define i32 @extract_first_i32({i32, i32} %agg) { +; THROUGHPUT-LABEL: 'extract_first_i32' +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i32 } %agg, 0 +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r +; +; LATENCY-LABEL: 'extract_first_i32' +; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i32 } %agg, 0 +; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %r +; +; CODESIZE-LABEL: 'extract_first_i32' +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i32 } %agg, 0 +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %r +; + %r = extractvalue {i32, i32} %agg, 0 + ret i32 %r +} + +define i32 @extract_second_i32({i32, i32} %agg) { +; THROUGHPUT-LABEL: 'extract_second_i32' +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i32 } %agg, 1 +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r +; +; LATENCY-LABEL: 'extract_second_i32' +; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i32 } %agg, 1 +; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %r +; +; CODESIZE-LABEL: 'extract_second_i32' +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i32 } %agg, 1 +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %r +; + %r = extractvalue {i32, i32} %agg, 1 + ret i32 %r +} + +define i32 @extract_i32({i32, i1} %agg) { +; THROUGHPUT-LABEL: 'extract_i32' +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i1 } %agg, 0 +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r +; +; LATENCY-LABEL: 'extract_i32' +; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i1 } %agg, 0 +; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %r +; +; CODESIZE-LABEL: 'extract_i32' +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i1 } %agg, 0 +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %r +; + %r = extractvalue {i32, i1} %agg, 0 + ret i32 %r +} + +define i1 @extract_i1({i32, i1} %agg) { +; THROUGHPUT-LABEL: 'extract_i1' +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i1 } %agg, 1 +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %r +; +; LATENCY-LABEL: 'extract_i1' +; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i1 } %agg, 1 +; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i1 %r +; +; CODESIZE-LABEL: 'extract_i1' +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i1 } %agg, 1 +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i1 %r +; + %r = extractvalue {i32, i1} %agg, 1 + ret i1 %r +} + +define float @extract_float({i32, float} %agg) { +; THROUGHPUT-LABEL: 'extract_float' +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, float } %agg, 1 +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r +; +; LATENCY-LABEL: 'extract_float' +; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, float } %agg, 1 +; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %r +; +; CODESIZE-LABEL: 'extract_float' +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, float } %agg, 1 +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %r +; + %r = extractvalue {i32, float} %agg, 1 + ret float %r +} + +define [42 x i42] @extract_array({i32, [42 x i42]} %agg) { +; THROUGHPUT-LABEL: 'extract_array' +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, [42 x i42] } %agg, 1 +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret [42 x i42] %r +; +; LATENCY-LABEL: 'extract_array' +; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, [42 x i42] } %agg, 1 +; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret [42 x i42] %r +; +; CODESIZE-LABEL: 'extract_array' +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, [42 x i42] } %agg, 1 +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret [42 x i42] %r +; + %r = extractvalue {i32, [42 x i42]} %agg, 1 + ret [42 x i42] %r +} + +define <42 x i42> @extract_vector({i32, <42 x i42>} %agg) { +; THROUGHPUT-LABEL: 'extract_vector' +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, <42 x i42> } %agg, 1 +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <42 x i42> %r +; +; LATENCY-LABEL: 'extract_vector' +; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, <42 x i42> } %agg, 1 +; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <42 x i42> %r +; +; CODESIZE-LABEL: 'extract_vector' +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, <42 x i42> } %agg, 1 +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <42 x i42> %r +; + %r = extractvalue {i32, <42 x i42>} %agg, 1 + ret <42 x i42> %r +} + +%T1 = type { i32, float, <4 x i1> } + +define %T1 @extract_struct({i32, %T1} %agg) { +; THROUGHPUT-LABEL: 'extract_struct' +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, %T1 } %agg, 1 +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret %T1 %r +; +; LATENCY-LABEL: 'extract_struct' +; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, %T1 } %agg, 1 +; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret %T1 %r +; +; CODESIZE-LABEL: 'extract_struct' +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, %T1 } %agg, 1 +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret %T1 %r +; + %r = extractvalue {i32, %T1} %agg, 1 + ret %T1 %r +} diff --git a/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll b/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll index b538b12f0..9a2c01058 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll @@ -1,8 +1,11 @@ -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=+half-rate-64-ops < %s | FileCheck %s -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FAST16,ALL %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=SLOW16,ALL %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FAST16,ALL %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=SLOW16,ALL %s -; CHECK: 'add_i32' -; CHECK: estimated cost of 1 for {{.*}} add i32 + +; ALL: 'add_i32' +; ALL: estimated cost of 1 for {{.*}} add i32 define amdgpu_kernel void @add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 { %vec = load i32, i32 addrspace(1)* %vaddr %add = add i32 %vec, %b @@ -10,8 +13,8 @@ define amdgpu_kernel void @add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %va ret void } -; CHECK: 'add_v2i32' -; CHECK: estimated cost of 2 for {{.*}} add <2 x i32> +; ALL: 'add_v2i32' +; ALL: estimated cost of 2 for {{.*}} add <2 x i32> define amdgpu_kernel void @add_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %vaddr, <2 x i32> %b) #0 { %vec = load <2 x i32>, <2 x i32> addrspace(1)* %vaddr %add = add <2 x i32> %vec, %b @@ -19,10 +22,10 @@ define amdgpu_kernel void @add_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> add ret void } -; CHECK: 'add_v3i32' +; ALL: 'add_v3i32' ; Allow for 4 when v3i32 is illegal and TargetLowering thinks it needs widening, ; and 3 when it is legal. -; CHECK: estimated cost of {{[34]}} for {{.*}} add <3 x i32> +; ALL: estimated cost of {{[34]}} for {{.*}} add <3 x i32> define amdgpu_kernel void @add_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %vaddr, <3 x i32> %b) #0 { %vec = load <3 x i32>, <3 x i32> addrspace(1)* %vaddr %add = add <3 x i32> %vec, %b @@ -30,8 +33,8 @@ define amdgpu_kernel void @add_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> add ret void } -; CHECK: 'add_v4i32' -; CHECK: estimated cost of 4 for {{.*}} add <4 x i32> +; ALL: 'add_v4i32' +; ALL: estimated cost of 4 for {{.*}} add <4 x i32> define amdgpu_kernel void @add_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %vaddr, <4 x i32> %b) #0 { %vec = load <4 x i32>, <4 x i32> addrspace(1)* %vaddr %add = add <4 x i32> %vec, %b @@ -39,10 +42,10 @@ define amdgpu_kernel void @add_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> add ret void } -; CHECK: 'add_v5i32' +; ALL: 'add_v5i32' ; Allow for 8 when v3i32 is illegal and TargetLowering thinks it needs widening, ; and 5 when it is legal. -; CHECK: estimated cost of {{[58]}} for {{.*}} add <5 x i32> +; ALL: estimated cost of {{[58]}} for {{.*}} add <5 x i32> define amdgpu_kernel void @add_v5i32(<5 x i32> addrspace(1)* %out, <5 x i32> addrspace(1)* %vaddr, <5 x i32> %b) #0 { %vec = load <5 x i32>, <5 x i32> addrspace(1)* %vaddr %add = add <5 x i32> %vec, %b @@ -50,8 +53,8 @@ define amdgpu_kernel void @add_v5i32(<5 x i32> addrspace(1)* %out, <5 x i32> add ret void } -; CHECK: 'add_i64' -; CHECK: estimated cost of 2 for {{.*}} add i64 +; ALL: 'add_i64' +; ALL: estimated cost of 2 for {{.*}} add i64 define amdgpu_kernel void @add_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 { %vec = load i64, i64 addrspace(1)* %vaddr %add = add i64 %vec, %b @@ -59,8 +62,8 @@ define amdgpu_kernel void @add_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %va ret void } -; CHECK: 'add_v2i64' -; CHECK: estimated cost of 4 for {{.*}} add <2 x i64> +; ALL: 'add_v2i64' +; ALL: estimated cost of 4 for {{.*}} add <2 x i64> define amdgpu_kernel void @add_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %vaddr, <2 x i64> %b) #0 { %vec = load <2 x i64>, <2 x i64> addrspace(1)* %vaddr %add = add <2 x i64> %vec, %b @@ -68,8 +71,8 @@ define amdgpu_kernel void @add_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> add ret void } -; CHECK: 'add_v3i64' -; CHECK: estimated cost of 6 for {{.*}} add <3 x i64> +; ALL: 'add_v3i64' +; ALL: estimated cost of 6 for {{.*}} add <3 x i64> define amdgpu_kernel void @add_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> addrspace(1)* %vaddr, <3 x i64> %b) #0 { %vec = load <3 x i64>, <3 x i64> addrspace(1)* %vaddr %add = add <3 x i64> %vec, %b @@ -77,8 +80,8 @@ define amdgpu_kernel void @add_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> add ret void } -; CHECK: 'add_v4i64' -; CHECK: estimated cost of 8 for {{.*}} add <4 x i64> +; ALL: 'add_v4i64' +; ALL: estimated cost of 8 for {{.*}} add <4 x i64> define amdgpu_kernel void @add_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %vaddr, <4 x i64> %b) #0 { %vec = load <4 x i64>, <4 x i64> addrspace(1)* %vaddr %add = add <4 x i64> %vec, %b @@ -86,8 +89,8 @@ define amdgpu_kernel void @add_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> add ret void } -; CHECK: 'add_v16i64' -; CHECK: estimated cost of 32 for {{.*}} add <16 x i64> +; ALL: 'add_v16i64' +; ALL: estimated cost of 32 for {{.*}} add <16 x i64> define amdgpu_kernel void @add_v16i64(<16 x i64> addrspace(1)* %out, <16 x i64> addrspace(1)* %vaddr, <16 x i64> %b) #0 { %vec = load <16 x i64>, <16 x i64> addrspace(1)* %vaddr %add = add <16 x i64> %vec, %b @@ -95,8 +98,8 @@ define amdgpu_kernel void @add_v16i64(<16 x i64> addrspace(1)* %out, <16 x i64> ret void } -; CHECK: 'add_i16' -; CHECK: estimated cost of 1 for {{.*}} add i16 +; ALL: 'add_i16' +; ALL: estimated cost of 1 for {{.*}} add i16 define amdgpu_kernel void @add_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %vaddr, i16 %b) #0 { %vec = load i16, i16 addrspace(1)* %vaddr %add = add i16 %vec, %b @@ -104,8 +107,9 @@ define amdgpu_kernel void @add_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %va ret void } -; CHECK: 'add_v2i16' -; CHECK: estimated cost of 2 for {{.*}} add <2 x i16> +; ALL: 'add_v2i16' +; SLOW16: estimated cost of 2 for {{.*}} add <2 x i16> +; FAST16: estimated cost of 1 for {{.*}} add <2 x i16> define amdgpu_kernel void @add_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr, <2 x i16> %b) #0 { %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr %add = add <2 x i16> %vec, %b @@ -113,8 +117,8 @@ define amdgpu_kernel void @add_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> add ret void } -; CHECK: 'sub_i32' -; CHECK: estimated cost of 1 for {{.*}} sub i32 +; ALL: 'sub_i32' +; ALL: estimated cost of 1 for {{.*}} sub i32 define amdgpu_kernel void @sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 { %vec = load i32, i32 addrspace(1)* %vaddr %sub = sub i32 %vec, %b @@ -122,16 +126,16 @@ define amdgpu_kernel void @sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %va ret void } -; CHECK: 'sub_i64' -; CHECK: estimated cost of 2 for {{.*}} sub i64 +; ALL: 'sub_i64' +; ALL: estimated cost of 2 for {{.*}} sub i64 define amdgpu_kernel void @sub_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 { %vec = load i64, i64 addrspace(1)* %vaddr %sub = sub i64 %vec, %b store i64 %sub, i64 addrspace(1)* %out ret void } -; CHECK: 'sub_i16' -; CHECK: estimated cost of 1 for {{.*}} sub i16 +; ALL: 'sub_i16' +; ALL: estimated cost of 1 for {{.*}} sub i16 define amdgpu_kernel void @sub_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %vaddr, i16 %b) #0 { %vec = load i16, i16 addrspace(1)* %vaddr %sub = sub i16 %vec, %b @@ -139,8 +143,9 @@ define amdgpu_kernel void @sub_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %va ret void } -; CHECK: 'sub_v2i16' -; CHECK: estimated cost of 2 for {{.*}} sub <2 x i16> +; ALL: 'sub_v2i16' +; SLOW16: estimated cost of 2 for {{.*}} sub <2 x i16> +; FAST16: estimated cost of 1 for {{.*}} sub <2 x i16> define amdgpu_kernel void @sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr, <2 x i16> %b) #0 { %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr %sub = sub <2 x i16> %vec, %b diff --git a/llvm/test/Analysis/CostModel/AMDGPU/addrspacecast.ll b/llvm/test/Analysis/CostModel/AMDGPU/addrspacecast.ll index f15ab500a..a87a965c6 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/addrspacecast.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/addrspacecast.ll @@ -1,4 +1,5 @@ ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri < %s | FileCheck %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri < %s | FileCheck %s ; CHECK-LABEL: 'addrspacecast_global_to_flat' ; CHECK: estimated cost of 0 for {{.*}} addrspacecast i8 addrspace(1)* %ptr to i8* diff --git a/llvm/test/Analysis/CostModel/AMDGPU/bit-ops.ll b/llvm/test/Analysis/CostModel/AMDGPU/bit-ops.ll index aa70f5032..2dec5f350 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/bit-ops.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/bit-ops.ll @@ -1,7 +1,10 @@ -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL,SLOW16 %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL,FAST16 %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL,SLOW16 %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL,FAST16 %s -; CHECK: 'or_i32' -; CHECK: estimated cost of 1 for {{.*}} or i32 +; ALL: 'or_i32' +; ALL: estimated cost of 1 for {{.*}} or i32 define amdgpu_kernel void @or_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 { %vec = load i32, i32 addrspace(1)* %vaddr %or = or i32 %vec, %b @@ -9,8 +12,8 @@ define amdgpu_kernel void @or_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vad ret void } -; CHECK: 'or_i64' -; CHECK: estimated cost of 2 for {{.*}} or i64 +; ALL: 'or_i64' +; ALL: estimated cost of 2 for {{.*}} or i64 define amdgpu_kernel void @or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 { %vec = load i64, i64 addrspace(1)* %vaddr %or = or i64 %vec, %b @@ -18,8 +21,18 @@ define amdgpu_kernel void @or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vad ret void } -; CHECK: 'xor_i32' -; CHECK: estimated cost of 1 for {{.*}} xor i32 +; ALL: 'or_v2i16' +; SLOW16: estimated cost of 2 for {{.*}} or <2 x i16> +; FAST16: estimated cost of 1 for {{.*}} or <2 x i16> +define amdgpu_kernel void @or_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr, <2 x i16> %b) #0 { + %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr + %or = or <2 x i16> %vec, %b + store <2 x i16> %or, <2 x i16> addrspace(1)* %out + ret void +} + +; ALL: 'xor_i32' +; ALL: estimated cost of 1 for {{.*}} xor i32 define amdgpu_kernel void @xor_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 { %vec = load i32, i32 addrspace(1)* %vaddr %or = xor i32 %vec, %b @@ -27,8 +40,8 @@ define amdgpu_kernel void @xor_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %va ret void } -; CHECK: 'xor_i64' -; CHECK: estimated cost of 2 for {{.*}} xor i64 +; ALL: 'xor_i64' +; ALL: estimated cost of 2 for {{.*}} xor i64 define amdgpu_kernel void @xor_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 { %vec = load i64, i64 addrspace(1)* %vaddr %or = xor i64 %vec, %b @@ -36,9 +49,18 @@ define amdgpu_kernel void @xor_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %va ret void } +; ALL: 'xor_v2i16' +; SLOW16: estimated cost of 2 for {{.*}} xor <2 x i16> +; FAST16: estimated cost of 1 for {{.*}} xor <2 x i16> +define amdgpu_kernel void @xor_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr, <2 x i16> %b) #0 { + %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr + %xor = xor <2 x i16> %vec, %b + store <2 x i16> %xor, <2 x i16> addrspace(1)* %out + ret void +} -; CHECK: 'and_i32' -; CHECK: estimated cost of 1 for {{.*}} and i32 +; ALL: 'and_i32' +; ALL: estimated cost of 1 for {{.*}} and i32 define amdgpu_kernel void @and_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 { %vec = load i32, i32 addrspace(1)* %vaddr %or = and i32 %vec, %b @@ -46,8 +68,8 @@ define amdgpu_kernel void @and_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %va ret void } -; CHECK: 'and_i64' -; CHECK: estimated cost of 2 for {{.*}} and i64 +; ALL: 'and_i64' +; ALL: estimated cost of 2 for {{.*}} and i64 define amdgpu_kernel void @and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 { %vec = load i64, i64 addrspace(1)* %vaddr %or = and i64 %vec, %b @@ -55,5 +77,14 @@ define amdgpu_kernel void @and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %va ret void } +; ALL: 'and_v2i16' +; SLOW16: estimated cost of 2 for {{.*}} and <2 x i16> +; FAST16: estimated cost of 1 for {{.*}} and <2 x i16> +define amdgpu_kernel void @and_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr, <2 x i16> %b) #0 { + %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr + %and = and <2 x i16> %vec, %b + store <2 x i16> %and, <2 x i16> addrspace(1)* %out + ret void +} attributes #0 = { nounwind } diff --git a/llvm/test/Analysis/CostModel/AMDGPU/extractelement.ll b/llvm/test/Analysis/CostModel/AMDGPU/extractelement.ll index 889cc8bb8..67ce8ffba 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/extractelement.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/extractelement.ll @@ -1,6 +1,10 @@ ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa %s | FileCheck -check-prefixes=GCN,CI %s -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji %s | FileCheck -check-prefixes=GCN,VI %s -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 %s | FileCheck -check-prefixes=GCN,GFX9 %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji %s | FileCheck -check-prefixes=GCN,GFX89 %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 %s | FileCheck -check-prefixes=GCN,GFX89 %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa %s | FileCheck -check-prefixes=GCN,CI %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji %s | FileCheck -check-prefixes=GCN,GFX89 %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 %s | FileCheck -check-prefixes=GCN,GFX89 %s + ; GCN: 'extractelement_v2i32' ; GCN: estimated cost of 0 for {{.*}} extractelement <2 x i32> @@ -113,8 +117,7 @@ define amdgpu_kernel void @extractelement_v4i8(i8 addrspace(1)* %out, <4 x i8> a ; GCN: 'extractelement_0_v2i16': ; CI: estimated cost of 1 for {{.*}} extractelement <2 x i16> %vec, i16 0 -; VI: estimated cost of 0 for {{.*}} extractelement <2 x i16> -; GFX9: estimated cost of 0 for {{.*}} extractelement <2 x i16> +; GFX89: estimated cost of 0 for {{.*}} extractelement <2 x i16> define amdgpu_kernel void @extractelement_0_v2i16(i16 addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr) { %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr %elt = extractelement <2 x i16> %vec, i16 0 diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fabs.ll b/llvm/test/Analysis/CostModel/AMDGPU/fabs.ll index 51e65fe91..de5381c21 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/fabs.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/fabs.ll @@ -1,6 +1,7 @@ ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck %s -; CHECK: 'fabs_f32' +; CHECK-LABEL: 'fabs_f32' ; CHECK: estimated cost of 0 for {{.*}} call float @llvm.fabs.f32 define amdgpu_kernel void @fabs_f32(float addrspace(1)* %out, float addrspace(1)* %vaddr) #0 { %vec = load float, float addrspace(1)* %vaddr @@ -9,7 +10,7 @@ define amdgpu_kernel void @fabs_f32(float addrspace(1)* %out, float addrspace(1) ret void } -; CHECK: 'fabs_v2f32' +; CHECK-LABEL: 'fabs_v2f32' ; CHECK: estimated cost of 0 for {{.*}} call <2 x float> @llvm.fabs.v2f32 define amdgpu_kernel void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr) #0 { %vec = load <2 x float>, <2 x float> addrspace(1)* %vaddr @@ -18,7 +19,7 @@ define amdgpu_kernel void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float ret void } -; CHECK: 'fabs_v3f32' +; CHECK-LABEL: 'fabs_v3f32' ; CHECK: estimated cost of 0 for {{.*}} call <3 x float> @llvm.fabs.v3f32 define amdgpu_kernel void @fabs_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %vaddr) #0 { %vec = load <3 x float>, <3 x float> addrspace(1)* %vaddr @@ -27,7 +28,7 @@ define amdgpu_kernel void @fabs_v3f32(<3 x float> addrspace(1)* %out, <3 x float ret void } -; CHECK: 'fabs_v5f32' +; CHECK-LABEL: 'fabs_v5f32' ; CHECK: estimated cost of 0 for {{.*}} call <5 x float> @llvm.fabs.v5f32 define amdgpu_kernel void @fabs_v5f32(<5 x float> addrspace(1)* %out, <5 x float> addrspace(1)* %vaddr) #0 { %vec = load <5 x float>, <5 x float> addrspace(1)* %vaddr @@ -36,7 +37,7 @@ define amdgpu_kernel void @fabs_v5f32(<5 x float> addrspace(1)* %out, <5 x float ret void } -; CHECK: 'fabs_f64' +; CHECK-LABEL: 'fabs_f64' ; CHECK: estimated cost of 0 for {{.*}} call double @llvm.fabs.f64 define amdgpu_kernel void @fabs_f64(double addrspace(1)* %out, double addrspace(1)* %vaddr) #0 { %vec = load double, double addrspace(1)* %vaddr @@ -45,7 +46,7 @@ define amdgpu_kernel void @fabs_f64(double addrspace(1)* %out, double addrspace( ret void } -; CHECK: 'fabs_v2f64' +; CHECK-LABEL: 'fabs_v2f64' ; CHECK: estimated cost of 0 for {{.*}} call <2 x double> @llvm.fabs.v2f64 define amdgpu_kernel void @fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %vaddr) #0 { %vec = load <2 x double>, <2 x double> addrspace(1)* %vaddr @@ -54,7 +55,7 @@ define amdgpu_kernel void @fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x doub ret void } -; CHECK: 'fabs_v3f64' +; CHECK-LABEL: 'fabs_v3f64' ; CHECK: estimated cost of 0 for {{.*}} call <3 x double> @llvm.fabs.v3f64 define amdgpu_kernel void @fabs_v3f64(<3 x double> addrspace(1)* %out, <3 x double> addrspace(1)* %vaddr) #0 { %vec = load <3 x double>, <3 x double> addrspace(1)* %vaddr @@ -63,7 +64,7 @@ define amdgpu_kernel void @fabs_v3f64(<3 x double> addrspace(1)* %out, <3 x doub ret void } -; CHECK: 'fabs_f16' +; CHECK-LABEL: 'fabs_f16' ; CHECK: estimated cost of 0 for {{.*}} call half @llvm.fabs.f16 define amdgpu_kernel void @fabs_f16(half addrspace(1)* %out, half addrspace(1)* %vaddr) #0 { %vec = load half, half addrspace(1)* %vaddr @@ -72,7 +73,7 @@ define amdgpu_kernel void @fabs_f16(half addrspace(1)* %out, half addrspace(1)* ret void } -; CHECK: 'fabs_v2f16' +; CHECK-LABEL: 'fabs_v2f16' ; CHECK: estimated cost of 0 for {{.*}} call <2 x half> @llvm.fabs.v2f16 define amdgpu_kernel void @fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %vaddr) #0 { %vec = load <2 x half>, <2 x half> addrspace(1)* %vaddr @@ -81,7 +82,7 @@ define amdgpu_kernel void @fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> ret void } -; CHECK: 'fabs_v3f16' +; CHECK-LABEL: 'fabs_v3f16' ; CHECK: estimated cost of 0 for {{.*}} call <3 x half> @llvm.fabs.v3f16 define amdgpu_kernel void @fabs_v3f16(<3 x half> addrspace(1)* %out, <3 x half> addrspace(1)* %vaddr) #0 { %vec = load <3 x half>, <3 x half> addrspace(1)* %vaddr diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll b/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll index 5a5a09419..1203182a8 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll @@ -1,5 +1,7 @@ -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=+half-rate-64-ops < %s | FileCheck -check-prefix=FASTF64 -check-prefix=ALL %s -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefix=SLOWF64 -check-prefix=ALL %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FASTF64,FASTF16,ALL %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=SLOWF64,SLOWF16,ALL %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FASTF64,FASTF16,ALL %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=SLOWF64,SLOWF16,ALL %s ; ALL: 'fadd_f32' ; ALL: estimated cost of 1 for {{.*}} fadd float @@ -71,8 +73,8 @@ define amdgpu_kernel void @fadd_v3f64(<3 x double> addrspace(1)* %out, <3 x doub ret void } -; ALL 'fadd_f16' -; ALL estimated cost of 1 for {{.*}} fadd half +; ALL: 'fadd_f16' +; ALL: estimated cost of 1 for {{.*}} fadd half define amdgpu_kernel void @fadd_f16(half addrspace(1)* %out, half addrspace(1)* %vaddr, half %b) #0 { %vec = load half, half addrspace(1)* %vaddr %add = fadd half %vec, %b @@ -80,8 +82,9 @@ define amdgpu_kernel void @fadd_f16(half addrspace(1)* %out, half addrspace(1)* ret void } -; ALL 'fadd_v2f16' -; ALL estimated cost of 2 for {{.*}} fadd <2 x half> +; ALL: 'fadd_v2f16' +; SLOWF16: estimated cost of 2 for {{.*}} fadd <2 x half> +; FASTF16: estimated cost of 1 for {{.*}} fadd <2 x half> define amdgpu_kernel void @fadd_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %vaddr, <2 x half> %b) #0 { %vec = load <2 x half>, <2 x half> addrspace(1)* %vaddr %add = fadd <2 x half> %vec, %b @@ -89,8 +92,19 @@ define amdgpu_kernel void @fadd_v2f16(<2 x half> addrspace(1)* %out, <2 x half> ret void } -; ALL 'fadd_v4f16' -; ALL estimated cost of 4 for {{.*}} fadd <4 x half> +; ALL: 'fadd_v3f16' +; SLOWF16: estimated cost of 4 for {{.*}} fadd <3 x half> +; FASTF16: estimated cost of 2 for {{.*}} fadd <3 x half> +define amdgpu_kernel void @fadd_v3f16(<3 x half> addrspace(1)* %out, <3 x half> addrspace(1)* %vaddr, <3 x half> %b) #0 { + %vec = load <3 x half>, <3 x half> addrspace(1)* %vaddr + %add = fadd <3 x half> %vec, %b + store <3 x half> %add, <3 x half> addrspace(1)* %out + ret void +} + +; ALL: 'fadd_v4f16' +; SLOWF16: estimated cost of 4 for {{.*}} fadd <4 x half> +; FASTF16: estimated cost of 2 for {{.*}} fadd <4 x half> define amdgpu_kernel void @fadd_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %vaddr, <4 x half> %b) #0 { %vec = load <4 x half>, <4 x half> addrspace(1)* %vaddr %add = fadd <4 x half> %vec, %b diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll index 325960ac9..6986a3158 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll @@ -1,48 +1,91 @@ -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,CIFASTF64,NOFP32DENORM,NOFP16,NOFP16-NOFP32DENORM %s -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,CISLOWF64,NOFP32DENORM,NOFP16,NOFP16-NOFP32DENORM %s -; RUN: opt -cost-model -analyze -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,SIFASTF64,NOFP32DENORM,NOFP16,NOFP16-NOFP32DENORM %s -; RUN: opt -cost-model -analyze -mtriple=amdgcn-mesa-mesa3d -mcpu=verde -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,SISLOWF64,NOFP32DENORM,NOFP16,NOFP16-NOFP32DENORM %s -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii -mattr=+fp32-denormals < %s | FileCheck -check-prefixes=ALL,FP32DENORMS,SLOWFP32DENORMS,NOFP16,NOFP16-FP32DENORM %s -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+fp32-denormals < %s | FileCheck -check-prefixes=ALL,FP32DENORMS,FASTFP32DENORMS,FP16 %s - -; ALL: 'fdiv_f32' -; NOFP32DENORM: estimated cost of 12 for {{.*}} fdiv float -; FP32DENORMS: estimated cost of 10 for {{.*}} fdiv float -define amdgpu_kernel void @fdiv_f32(float addrspace(1)* %out, float addrspace(1)* %vaddr, float %b) #0 { +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=ALL,CIFASTF64,NOFP16,NOFP16-NOFP32DENORM,SLOWFP32DENORMS %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri < %s | FileCheck -check-prefixes=ALL,CISLOWF64,NOFP16,NOFP16-NOFP32DENORM,SLOWFP32DENORMS %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefixes=ALL,SIFASTF64,NOFP32DENORM,NOFP16,NOFP16-NOFP32DENORM,SLOWFP32DENORMS %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-mesa-mesa3d -mcpu=verde < %s | FileCheck -check-prefixes=ALL,SISLOWF64,NOFP32DENORM,NOFP16,NOFP16-NOFP32DENORM,SLOWFP32DENORMS %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=ALL,NOFP16,NOFP16-FP32DENORM,SLOWFP32DENORMS %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL,FASTFP32DENORMS,FP16 %s + +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=ALL,CIFASTF64,NOFP16,NOFP16-NOFP32DENORM %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri < %s | FileCheck -check-prefixes=ALL,CISLOWF64,NOFP16,NOFP16-NOFP32DENORM %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefixes=ALL,SIFASTF64,NOFP16,NOFP16-NOFP32DENORM %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-mesa-mesa3d -mcpu=verde < %s | FileCheck -check-prefixes=ALL,SISLOWF64,NOFP16,NOFP16-NOFP32DENORM %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=ALL,SLOWFP32DENORMS,NOFP16,NOFP16-FP32DENORM %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL,FASTFP32DENORMS,FP16 %s + +; ALL: 'fdiv_f32_ieee' +; ALL: estimated cost of 10 for {{.*}} fdiv float +define amdgpu_kernel void @fdiv_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %vaddr, float %b) #0 { + %vec = load float, float addrspace(1)* %vaddr + %add = fdiv float %vec, %b + store float %add, float addrspace(1)* %out + ret void +} + +; ALL: 'fdiv_f32_ftzdaz' +; ALL: estimated cost of 12 for {{.*}} fdiv float +define amdgpu_kernel void @fdiv_f32_ftzdaz(float addrspace(1)* %out, float addrspace(1)* %vaddr, float %b) #1 { %vec = load float, float addrspace(1)* %vaddr %add = fdiv float %vec, %b store float %add, float addrspace(1)* %out ret void } -; ALL: 'fdiv_v2f32' -; NOFP32DENORM: estimated cost of 24 for {{.*}} fdiv <2 x float> -; FP32DENORMS: estimated cost of 20 for {{.*}} fdiv <2 x float> -define amdgpu_kernel void @fdiv_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr, <2 x float> %b) #0 { +; ALL: 'fdiv_v2f32_ieee' +; ALL: estimated cost of 20 for {{.*}} fdiv <2 x float> +define amdgpu_kernel void @fdiv_v2f32_ieee(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr, <2 x float> %b) #0 { + %vec = load <2 x float>, <2 x float> addrspace(1)* %vaddr + %add = fdiv <2 x float> %vec, %b + store <2 x float> %add, <2 x float> addrspace(1)* %out + ret void +} + +; ALL: 'fdiv_v2f32_ftzdaz' +; ALL: estimated cost of 24 for {{.*}} fdiv <2 x float> +define amdgpu_kernel void @fdiv_v2f32_ftzdaz(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr, <2 x float> %b) #1 { %vec = load <2 x float>, <2 x float> addrspace(1)* %vaddr %add = fdiv <2 x float> %vec, %b store <2 x float> %add, <2 x float> addrspace(1)* %out ret void } -; ALL: 'fdiv_v3f32' +; ALL: 'fdiv_v3f32_ieee' ; Allow for 48/40 when v3f32 is illegal and TargetLowering thinks it needs widening, ; and 36/30 when it is legal. -; NOFP32DENORM: estimated cost of {{36|48}} for {{.*}} fdiv <3 x float> -; FP32DENORMS: estimated cost of {{30|40}} for {{.*}} fdiv <3 x float> -define amdgpu_kernel void @fdiv_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %vaddr, <3 x float> %b) #0 { +; ALL: estimated cost of {{30|40}} for {{.*}} fdiv <3 x float> +define amdgpu_kernel void @fdiv_v3f32_ieee(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %vaddr, <3 x float> %b) #0 { %vec = load <3 x float>, <3 x float> addrspace(1)* %vaddr %add = fdiv <3 x float> %vec, %b store <3 x float> %add, <3 x float> addrspace(1)* %out ret void } -; ALL: 'fdiv_v5f32' +; ALL: 'fdiv_v3f32_ftzdaz' +; Allow for 48/40 when v3f32 is illegal and TargetLowering thinks it needs widening, +; and 36/30 when it is legal. +; ALL: estimated cost of {{36|48}} for {{.*}} fdiv <3 x float> +define amdgpu_kernel void @fdiv_v3f32_ftzdaz(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %vaddr, <3 x float> %b) #1 { + %vec = load <3 x float>, <3 x float> addrspace(1)* %vaddr + %add = fdiv <3 x float> %vec, %b + store <3 x float> %add, <3 x float> addrspace(1)* %out + ret void +} + +; ALL: 'fdiv_v5f32_ieee' +; Allow for 96/80 when v5f32 is illegal and TargetLowering thinks it needs widening, +; and 60/50 when it is legal. +; ALL: estimated cost of {{80|50}} for {{.*}} fdiv <5 x float> +define amdgpu_kernel void @fdiv_v5f32_ieee(<5 x float> addrspace(1)* %out, <5 x float> addrspace(1)* %vaddr, <5 x float> %b) #0 { + %vec = load <5 x float>, <5 x float> addrspace(1)* %vaddr + %add = fdiv <5 x float> %vec, %b + store <5 x float> %add, <5 x float> addrspace(1)* %out + ret void +} + +; ALL: 'fdiv_v5f32_ftzdaz' ; Allow for 96/80 when v5f32 is illegal and TargetLowering thinks it needs widening, ; and 60/50 when it is legal. -; NOFP32DENORM: estimated cost of {{96|60}} for {{.*}} fdiv <5 x float> -; FP32DENORMS: estimated cost of {{80|50}} for {{.*}} fdiv <5 x float> -define amdgpu_kernel void @fdiv_v5f32(<5 x float> addrspace(1)* %out, <5 x float> addrspace(1)* %vaddr, <5 x float> %b) #0 { +; ALL: estimated cost of {{96|60}} for {{.*}} fdiv <5 x float> +define amdgpu_kernel void @fdiv_v5f32_ftzdaz(<5 x float> addrspace(1)* %out, <5 x float> addrspace(1)* %vaddr, <5 x float> %b) #1 { %vec = load <5 x float>, <5 x float> addrspace(1)* %vaddr %add = fdiv <5 x float> %vec, %b store <5 x float> %add, <5 x float> addrspace(1)* %out @@ -85,55 +128,99 @@ define amdgpu_kernel void @fdiv_v3f64(<3 x double> addrspace(1)* %out, <3 x doub ret void } -; ALL: 'fdiv_f16' -; NOFP16-NOFP32DENORM: estimated cost of 12 for {{.*}} fdiv half -; NOFP16-FP32DENORM: estimated cost of 10 for {{.*}} fdiv half +; ALL: 'fdiv_f16_f32_ieee' +; NOFP16: estimated cost of 10 for {{.*}} fdiv half ; FP16: estimated cost of 10 for {{.*}} fdiv half -define amdgpu_kernel void @fdiv_f16(half addrspace(1)* %out, half addrspace(1)* %vaddr, half %b) #0 { +define amdgpu_kernel void @fdiv_f16_f32_ieee(half addrspace(1)* %out, half addrspace(1)* %vaddr, half %b) #0 { %vec = load half, half addrspace(1)* %vaddr %add = fdiv half %vec, %b store half %add, half addrspace(1)* %out ret void } -; ALL: 'fdiv_v2f16' -; NOFP16-NOFP32DENORM: estimated cost of 24 for {{.*}} fdiv <2 x half> -; NOFP16-FP32DENORM: estimated cost of 20 for {{.*}} fdiv <2 x half> +; ALL: 'fdiv_f16_f32_ftzdaz' +; NOFP16: estimated cost of 12 for {{.*}} fdiv half +; FP16: estimated cost of 10 for {{.*}} fdiv half +define amdgpu_kernel void @fdiv_f16_f32_ftzdaz(half addrspace(1)* %out, half addrspace(1)* %vaddr, half %b) #1 { + %vec = load half, half addrspace(1)* %vaddr + %add = fdiv half %vec, %b + store half %add, half addrspace(1)* %out + ret void +} + +; ALL: 'fdiv_v2f16_f32_ieee' +; NOFP16: estimated cost of 20 for {{.*}} fdiv <2 x half> ; FP16: estimated cost of 20 for {{.*}} fdiv <2 x half> -define amdgpu_kernel void @fdiv_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %vaddr, <2 x half> %b) #0 { +define amdgpu_kernel void @fdiv_v2f16_f32_ieee(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %vaddr, <2 x half> %b) #0 { %vec = load <2 x half>, <2 x half> addrspace(1)* %vaddr %add = fdiv <2 x half> %vec, %b store <2 x half> %add, <2 x half> addrspace(1)* %out ret void } -; ALL: 'fdiv_v4f16' -; NOFP16-NOFP32DENORM: estimated cost of 48 for {{.*}} fdiv <4 x half> -; NOFP16-FP32DENORM: estimated cost of 40 for {{.*}} fdiv <4 x half> +; ALL: 'fdiv_v2f16_f32_ftzdaz' +; NOFP16: estimated cost of 24 for {{.*}} fdiv <2 x half> +; FP16: estimated cost of 20 for {{.*}} fdiv <2 x half> +define amdgpu_kernel void @fdiv_v2f16_f32_ftzdaz(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %vaddr, <2 x half> %b) #1 { + %vec = load <2 x half>, <2 x half> addrspace(1)* %vaddr + %add = fdiv <2 x half> %vec, %b + store <2 x half> %add, <2 x half> addrspace(1)* %out + ret void +} + +; ALL: 'fdiv_v4f16_f32_ieee' +; NOFP16: estimated cost of 40 for {{.*}} fdiv <4 x half> ; FP16: estimated cost of 40 for {{.*}} fdiv <4 x half> -define amdgpu_kernel void @fdiv_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %vaddr, <4 x half> %b) #0 { +define amdgpu_kernel void @fdiv_v4f16_f32_ieee(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %vaddr, <4 x half> %b) #0 { %vec = load <4 x half>, <4 x half> addrspace(1)* %vaddr %add = fdiv <4 x half> %vec, %b store <4 x half> %add, <4 x half> addrspace(1)* %out ret void } -; ALL: 'rcp_f32' -; NOFP32DENORM: estimated cost of 3 for {{.*}} fdiv float +; ALL: 'fdiv_v4f16_f32_ftzdaz' +; NOFP16: estimated cost of 48 for {{.*}} fdiv <4 x half> +; FP16: estimated cost of 40 for {{.*}} fdiv <4 x half> +define amdgpu_kernel void @fdiv_v4f16_f32_ftzdaz(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %vaddr, <4 x half> %b) #1 { + %vec = load <4 x half>, <4 x half> addrspace(1)* %vaddr + %add = fdiv <4 x half> %vec, %b + store <4 x half> %add, <4 x half> addrspace(1)* %out + ret void +} + +; ALL: 'rcp_f32_ieee' ; SLOWFP32DENORMS: estimated cost of 10 for {{.*}} fdiv float ; FASTFP32DENORMS: estimated cost of 10 for {{.*}} fdiv float -define amdgpu_kernel void @rcp_f32(float addrspace(1)* %out, float addrspace(1)* %vaddr) #0 { +define amdgpu_kernel void @rcp_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %vaddr) #0 { %vec = load float, float addrspace(1)* %vaddr %add = fdiv float 1.0, %vec store float %add, float addrspace(1)* %out ret void } -; ALL: 'rcp_f16' -; NOFP16-NOFP32DENORM: estimated cost of 3 for {{.*}} fdiv half -; NOFP16-FP32DENORM: estimated cost of 10 for {{.*}} fdiv half +; ALL: 'rcp_f32_ftzdaz' +; ALL: estimated cost of 3 for {{.*}} fdiv float +define amdgpu_kernel void @rcp_f32_ftzdaz(float addrspace(1)* %out, float addrspace(1)* %vaddr) #1 { + %vec = load float, float addrspace(1)* %vaddr + %add = fdiv float 1.0, %vec + store float %add, float addrspace(1)* %out + ret void +} + +; ALL: 'rcp_f16_f32_ieee' +; NOFP16: estimated cost of 10 for {{.*}} fdiv half +; FP16: estimated cost of 3 for {{.*}} fdiv half +define amdgpu_kernel void @rcp_f16_f32_ieee(half addrspace(1)* %out, half addrspace(1)* %vaddr) #0 { + %vec = load half, half addrspace(1)* %vaddr + %add = fdiv half 1.0, %vec + store half %add, half addrspace(1)* %out + ret void +} + +; ALL: 'rcp_f16_f32_ftzdaz' +; NOFP16: estimated cost of 3 for {{.*}} fdiv half ; FP16: estimated cost of 3 for {{.*}} fdiv half -define amdgpu_kernel void @rcp_f16(half addrspace(1)* %out, half addrspace(1)* %vaddr) #0 { +define amdgpu_kernel void @rcp_f16_f32_ftzdaz(half addrspace(1)* %out, half addrspace(1)* %vaddr) #1 { %vec = load half, half addrspace(1)* %vaddr %add = fdiv half 1.0, %vec store half %add, half addrspace(1)* %out @@ -152,26 +239,44 @@ define amdgpu_kernel void @rcp_f64(double addrspace(1)* %out, double addrspace(1 ret void } -; ALL: 'rcp_v2f32' -; NOFP32DENORM: estimated cost of 6 for {{.*}} fdiv <2 x float> +; ALL: 'rcp_v2f32_ieee' ; SLOWFP32DENORMS: estimated cost of 20 for {{.*}} fdiv <2 x float> ; FASTFP32DENORMS: estimated cost of 20 for {{.*}} fdiv <2 x float> -define amdgpu_kernel void @rcp_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr) #0 { +define amdgpu_kernel void @rcp_v2f32_ieee(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr) #0 { %vec = load <2 x float>, <2 x float> addrspace(1)* %vaddr %add = fdiv <2 x float> , %vec store <2 x float> %add, <2 x float> addrspace(1)* %out ret void } -; ALL: 'rcp_v2f16' -; NOFP16-NOFP32DENORM: estimated cost of 6 for {{.*}} fdiv <2 x half> -; NOFP16-FP32DENORM: estimated cost of 20 for {{.*}} fdiv <2 x half> +; ALL: 'rcp_v2f32_ftzdaz' +; ALL: estimated cost of 6 for {{.*}} fdiv <2 x float> +define amdgpu_kernel void @rcp_v2f32_ftzdaz(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr) #1 { + %vec = load <2 x float>, <2 x float> addrspace(1)* %vaddr + %add = fdiv <2 x float> , %vec + store <2 x float> %add, <2 x float> addrspace(1)* %out + ret void +} + +; ALL: 'rcp_v2f16_f32_ieee' +; NOFP16: estimated cost of 20 for {{.*}} fdiv <2 x half> +; FP16: estimated cost of 6 for {{.*}} fdiv <2 x half> +define amdgpu_kernel void @rcp_v2f16_f32_ieee(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %vaddr) #0 { + %vec = load <2 x half>, <2 x half> addrspace(1)* %vaddr + %add = fdiv <2 x half> , %vec + store <2 x half> %add, <2 x half> addrspace(1)* %out + ret void +} + +; ALL: 'rcp_v2f16_f32_ftzdaz' +; NOFP16: estimated cost of 6 for {{.*}} fdiv <2 x half> ; FP16: estimated cost of 6 for {{.*}} fdiv <2 x half> -define amdgpu_kernel void @rcp_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %vaddr) #0 { +define amdgpu_kernel void @rcp_v2f16_f32_ftzdaz(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %vaddr) #1 { %vec = load <2 x half>, <2 x half> addrspace(1)* %vaddr %add = fdiv <2 x half> , %vec store <2 x half> %add, <2 x half> addrspace(1)* %out ret void } -attributes #0 = { nounwind } +attributes #0 = { nounwind "target-features"="+fp32-denormals" } +attributes #1 = { nounwind "target-features"="-fp32-denormals" } diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fma.ll b/llvm/test/Analysis/CostModel/AMDGPU/fma.ll new file mode 100644 index 000000000..462163d2f --- /dev/null +++ b/llvm/test/Analysis/CostModel/AMDGPU/fma.ll @@ -0,0 +1,120 @@ +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FASTF64,FAST32,FASTF16,ALL %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=SLOWF64,SLOW32,SLOWF16,ALL %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FASTF64,FAST32,FASTF16,ALL %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=SLOWF64,SLOW32,SLOWF16,ALL %s + +; ALL-LABEL: 'fma_f32' +; SLOW32: estimated cost of 3 for {{.*}} call float @llvm.fma.f32 +; FAST32: estimated cost of 2 for {{.*}} call float @llvm.fma.f32 +define amdgpu_kernel void @fma_f32(float addrspace(1)* %out, float addrspace(1)* %vaddr) #0 { + %vec = load float, float addrspace(1)* %vaddr + %fma = call float @llvm.fma.f32(float %vec, float %vec, float %vec) #1 + store float %fma, float addrspace(1)* %out + ret void +} + +; ALL-LABEL: 'fma_v2f32' +; SLOW32: estimated cost of 6 for {{.*}} call <2 x float> @llvm.fma.v2f32 +; FAST32: estimated cost of 4 for {{.*}} call <2 x float> @llvm.fma.v2f32 +define amdgpu_kernel void @fma_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr) #0 { + %vec = load <2 x float>, <2 x float> addrspace(1)* %vaddr + %fma = call <2 x float> @llvm.fma.v2f32(<2 x float> %vec, <2 x float> %vec, <2 x float> %vec) #1 + store <2 x float> %fma, <2 x float> addrspace(1)* %out + ret void +} + +; ALL-LABEL: 'fma_v3f32' +; SLOW32: estimated cost of 9 for {{.*}} call <3 x float> @llvm.fma.v3f32 +; FAST32: estimated cost of 6 for {{.*}} call <3 x float> @llvm.fma.v3f32 +define amdgpu_kernel void @fma_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %vaddr) #0 { + %vec = load <3 x float>, <3 x float> addrspace(1)* %vaddr + %fma = call <3 x float> @llvm.fma.v3f32(<3 x float> %vec, <3 x float> %vec, <3 x float> %vec) #1 + store <3 x float> %fma, <3 x float> addrspace(1)* %out + ret void +} + +; ALL-LABEL: 'fma_v5f32' +; SLOW32: estimated cost of 15 for {{.*}} call <5 x float> @llvm.fma.v5f32 +; FAST32: estimated cost of 10 for {{.*}} call <5 x float> @llvm.fma.v5f32 +define amdgpu_kernel void @fma_v5f32(<5 x float> addrspace(1)* %out, <5 x float> addrspace(1)* %vaddr) #0 { + %vec = load <5 x float>, <5 x float> addrspace(1)* %vaddr + %fma = call <5 x float> @llvm.fma.v5f32(<5 x float> %vec, <5 x float> %vec, <5 x float> %vec) #1 + store <5 x float> %fma, <5 x float> addrspace(1)* %out + ret void +} + +; ALL-LABEL: 'fma_f64' +; SLOW64: estimated cost of 3 for {{.*}} call double @llvm.fma.f64 +; FAST64: estimated cost of 2 for {{.*}} call double @llvm.fma.f64 +define amdgpu_kernel void @fma_f64(double addrspace(1)* %out, double addrspace(1)* %vaddr) #0 { + %vec = load double, double addrspace(1)* %vaddr + %fma = call double @llvm.fma.f64(double %vec, double %vec, double %vec) #1 + store double %fma, double addrspace(1)* %out + ret void +} + +; ALL-LABEL: 'fma_v2f64' +; SLOW64: estimated cost of 6 for {{.*}} call <2 x double> @llvm.fma.v2f64 +; FAST64: estimated cost of 4 for {{.*}} call <2 x double> @llvm.fma.v2f64 +define amdgpu_kernel void @fma_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %vaddr) #0 { + %vec = load <2 x double>, <2 x double> addrspace(1)* %vaddr + %fma = call <2 x double> @llvm.fma.v2f64(<2 x double> %vec, <2 x double> %vec, <2 x double> %vec) #1 + store <2 x double> %fma, <2 x double> addrspace(1)* %out + ret void +} + +; ALL-LABEL: 'fma_v3f64' +; SLOW64: estimated cost of 9 for {{.*}} call <3 x double> @llvm.fma.v3f64 +; FAST64: estimated cost of 6 for {{.*}} call <3 x double> @llvm.fma.v3f64 +define amdgpu_kernel void @fma_v3f64(<3 x double> addrspace(1)* %out, <3 x double> addrspace(1)* %vaddr) #0 { + %vec = load <3 x double>, <3 x double> addrspace(1)* %vaddr + %fma = call <3 x double> @llvm.fma.v3f64(<3 x double> %vec, <3 x double> %vec, <3 x double> %vec) #1 + store <3 x double> %fma, <3 x double> addrspace(1)* %out + ret void +} + +; ALL-LABEL: 'fma_f16' +; SLOW16: estimated cost of 3 for {{.*}} call half @llvm.fma.f16 +; FAST16: estimated cost of 2 for {{.*}} call half @llvm.fma.f16 +define amdgpu_kernel void @fma_f16(half addrspace(1)* %out, half addrspace(1)* %vaddr) #0 { + %vec = load half, half addrspace(1)* %vaddr + %fma = call half @llvm.fma.f16(half %vec, half %vec, half %vec) #1 + store half %fma, half addrspace(1)* %out + ret void +} + +; ALL-LABEL: 'fma_v2f16' +; SLOW16: estimated cost of 6 for {{.*}} call <2 x half> @llvm.fma.v2f16 +; FAST16: estimated cost of 2 for {{.*}} call <2 x half> @llvm.fma.v2f16 +define amdgpu_kernel void @fma_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %vaddr) #0 { + %vec = load <2 x half>, <2 x half> addrspace(1)* %vaddr + %fma = call <2 x half> @llvm.fma.v2f16(<2 x half> %vec, <2 x half> %vec, <2 x half> %vec) #1 + store <2 x half> %fma, <2 x half> addrspace(1)* %out + ret void +} + +; ALL-LABEL: 'fma_v3f16' +; SLOW16: estimated cost of 12 for {{.*}} call <3 x half> @llvm.fma.v3f16 +; FAST16: estimated cost of 4 for {{.*}} call <3 x half> @llvm.fma.v3f16 +define amdgpu_kernel void @fma_v3f16(<3 x half> addrspace(1)* %out, <3 x half> addrspace(1)* %vaddr) #0 { + %vec = load <3 x half>, <3 x half> addrspace(1)* %vaddr + %fma = call <3 x half> @llvm.fma.v3f16(<3 x half> %vec, <3 x half> %vec, <3 x half> %vec) #1 + store <3 x half> %fma, <3 x half> addrspace(1)* %out + ret void +} + +declare float @llvm.fma.f32(float, float, float) #1 +declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) #1 +declare <3 x float> @llvm.fma.v3f32(<3 x float>, <3 x float>, <3 x float>) #1 +declare <5 x float> @llvm.fma.v5f32(<5 x float>, <5 x float>, <5 x float>) #1 + +declare double @llvm.fma.f64(double, double, double) #1 +declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #1 +declare <3 x double> @llvm.fma.v3f64(<3 x double>, <3 x double>, <3 x double>) #1 + +declare half @llvm.fma.f16(half, half, half) #1 +declare <2 x half> @llvm.fma.v2f16(<2 x half>, <2 x half>, <2 x half>) #1 +declare <3 x half> @llvm.fma.v3f16(<3 x half>, <3 x half>, <3 x half>) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fmul.ll b/llvm/test/Analysis/CostModel/AMDGPU/fmul.ll index b1e2b51dd..9de2072f5 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/fmul.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/fmul.ll @@ -1,5 +1,7 @@ -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=+half-rate-64-ops < %s | FileCheck -check-prefix=FASTF64 -check-prefix=ALL %s -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefix=SLOWF64 -check-prefix=ALL %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FASTF64,FASTF16,ALL %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=SLOWF64,SLOWF16,ALL %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FASTF64,FASTF16,ALL %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=SLOWF64,SLOWF16,ALL %s ; ALL: 'fmul_f32' ; ALL: estimated cost of 1 for {{.*}} fmul float @@ -71,8 +73,8 @@ define amdgpu_kernel void @fmul_v3f64(<3 x double> addrspace(1)* %out, <3 x doub ret void } -; ALL 'fmul_f16' -; ALL estimated cost of 1 for {{.*}} fmul half +; ALL: 'fmul_f16' +; ALL: estimated cost of 1 for {{.*}} fmul half define amdgpu_kernel void @fmul_f16(half addrspace(1)* %out, half addrspace(1)* %vaddr, half %b) #0 { %vec = load half, half addrspace(1)* %vaddr %add = fmul half %vec, %b @@ -80,8 +82,9 @@ define amdgpu_kernel void @fmul_f16(half addrspace(1)* %out, half addrspace(1)* ret void } -; ALL 'fmul_v2f16' -; ALL estimated cost of 2 for {{.*}} fmul <2 x half> +; ALL: 'fmul_v2f16' +; SLOWF16 estimated cost of 2 for {{.*}} fmul <2 x half> +; FASTF16 estimated cost of 1 for {{.*}} fmul <2 x half> define amdgpu_kernel void @fmul_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %vaddr, <2 x half> %b) #0 { %vec = load <2 x half>, <2 x half> addrspace(1)* %vaddr %add = fmul <2 x half> %vec, %b @@ -89,8 +92,19 @@ define amdgpu_kernel void @fmul_v2f16(<2 x half> addrspace(1)* %out, <2 x half> ret void } -; ALL 'fmul_v4f16' -; ALL estimated cost of 4 for {{.*}} fmul <4 x half> +; ALL: 'fmul_v3f16' +; SLOWF16 estimated cost of 4 for {{.*}} fmul <3 x half> +; FASTF16 estimated cost of 2 for {{.*}} fmul <3 x half> +define amdgpu_kernel void @fmul_v3f16(<3 x half> addrspace(1)* %out, <3 x half> addrspace(1)* %vaddr, <3 x half> %b) #0 { + %vec = load <3 x half>, <3 x half> addrspace(1)* %vaddr + %add = fmul <3 x half> %vec, %b + store <3 x half> %add, <3 x half> addrspace(1)* %out + ret void +} + +; ALL: 'fmul_v4f16' +; SLOWF16: estimated cost of 4 for {{.*}} fmul <4 x half> +; FASTF16: estimated cost of 2 for {{.*}} fmul <4 x half> define amdgpu_kernel void @fmul_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %vaddr, <4 x half> %b) #0 { %vec = load <4 x half>, <4 x half> addrspace(1)* %vaddr %add = fmul <4 x half> %vec, %b diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fsub.ll b/llvm/test/Analysis/CostModel/AMDGPU/fsub.ll index 9ab518423..8bc6ebcd7 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/fsub.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/fsub.ll @@ -1,5 +1,7 @@ -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=+half-rate-64-ops < %s | FileCheck -check-prefix=FASTF64 -check-prefix=ALL %s -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefix=SLOWF64 -check-prefix=ALL %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FASTF64,FASTF16,ALL %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=SLOWF64,SLOWF16,ALL %s +; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FASTF64,FASTF16,ALL %s +; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=SLOWF64,SLOWF16,ALL %s ; ALL: 'fsub_f32' ; ALL: estimated cost of 1 for {{.*}} fsub float @@ -81,7 +83,8 @@ define amdgpu_kernel void @fsub_f16(half addrspace(1)* %out, half addrspace(1)* } ; ALL: 'fsub_v2f16' -; ALL: estimated cost of 2 for {{.*}} fsub <2 x half> +; SLOWF16: estimated cost of 2 for {{.*}} fsub <2 x half> +; FASTF16: estimated cost of 1 for {{.*}} fsub <2 x half> define amdgpu_kernel void @fsub_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %vaddr, <2 x half> %b) #0 { %vec = load <2 x half>, <2 x half> addrspace(1)* %vaddr %add = fsub <2 x half> %vec, %b @@ -89,8 +92,19 @@ define amdgpu_kernel void @fsub_v2f16(<2 x half> addrspace(1)* %out, <2 x half> ret void } +; ALL: 'fsub_v3f16' +; SLOWF16: estimated cost of 4 for {{.*}} fsub <3 x half> +; FASTF16: estimated cost of 2 for {{.*}} fsub <3 x half> +define amdgpu_kernel void @fsub_v3f16(<3 x half> addrspace(1)* %out, <3 x half> addrspace(1)* %vaddr, <3 x half> %b) #0 { + %vec = load <3 x half>, <3 x half> addrspace(1)* %vaddr + %add = fsub <3 x half> %vec, %b + store <3 x half> %add, <3 x half> addrspace(1)* %out + ret void +} + ; ALL: 'fsub_v4f16' -; ALL: estimated cost of 4 for {{.*}} fsub <4 x half> +; SLOWF16: estimated cost of 4 for {{.*}} fsub <4 x half> +; FASTF16: estimated cost of 2 for {{.*}} fsub <4 x half> define amdgpu_kernel void @fsub_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %vaddr, <4 x half> %b) #0 { %vec = load <4 x half>, <4 x half> addrspace(1)* %vaddr %add = fsub <4 x half> %vec, %b diff --git a/llvm/test/Analysis/CostModel/AMDGPU/insertelement.ll b/llvm/test/Analysis/CostModel/AMDGPU/insertelement.ll index 67ab2607a..7bd86db27 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/insertelement.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/insertelement.ll @@ -1,6 +1,9 @@ ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa %s | FileCheck -check-prefixes=GCN,CI %s -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji %s | FileCheck -check-prefixes=GCN,VI %s -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 %s | FileCheck -check-prefixes=GCN,GFX9 %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji %s | FileCheck -check-prefixes=GCN,GFX89 %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 %s | FileCheck -check-prefixes=GCN,GFX89 %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa %s | FileCheck -check-prefixes=GCN,CI %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji %s | FileCheck -check-prefixes=GCN,GFX89 %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 %s | FileCheck -check-prefixes=GCN,GFX89 %s ; GCN-LABEL: 'insertelement_v2i32' ; GCN: estimated cost of 0 for {{.*}} insertelement <2 x i32> @@ -22,8 +25,7 @@ define amdgpu_kernel void @insertelement_v2i64(<2 x i64> addrspace(1)* %out, <2 ; GCN-LABEL: 'insertelement_0_v2i16' ; CI: estimated cost of 1 for {{.*}} insertelement <2 x i16> -; VI: estimated cost of 0 for {{.*}} insertelement <2 x i16> -; GFX9: estimated cost of 0 for {{.*}} insertelement <2 x i16> +; GFX89: estimated cost of 0 for {{.*}} insertelement <2 x i16> define amdgpu_kernel void @insertelement_0_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr) { %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr %insert = insertelement <2 x i16> %vec, i16 123, i16 0 diff --git a/llvm/test/Analysis/CostModel/AMDGPU/mul.ll b/llvm/test/Analysis/CostModel/AMDGPU/mul.ll index 85a377041..4d8a66ecd 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/mul.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/mul.ll @@ -1,7 +1,10 @@ -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW16,ALL %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=FAST16,ALL %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW16,ALL %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=FAST16,ALL %s -; CHECK: 'mul_i32' -; CHECK: estimated cost of 3 for {{.*}} mul i32 +; ALL: 'mul_i32' +; ALL: estimated cost of 3 for {{.*}} mul i32 define amdgpu_kernel void @mul_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 { %vec = load i32, i32 addrspace(1)* %vaddr %mul = mul i32 %vec, %b @@ -9,8 +12,8 @@ define amdgpu_kernel void @mul_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %va ret void } -; CHECK: 'mul_v2i32' -; CHECK: estimated cost of 6 for {{.*}} mul <2 x i32> +; ALL: 'mul_v2i32' +; ALL: estimated cost of 6 for {{.*}} mul <2 x i32> define amdgpu_kernel void @mul_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %vaddr, <2 x i32> %b) #0 { %vec = load <2 x i32>, <2 x i32> addrspace(1)* %vaddr %mul = mul <2 x i32> %vec, %b @@ -18,10 +21,10 @@ define amdgpu_kernel void @mul_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> add ret void } -; CHECK: 'mul_v3i32' +; ALL: 'mul_v3i32' ; Allow for 12 when v3i32 is illegal and TargetLowering thinks it needs widening, ; and 9 when it is legal. -; CHECK: estimated cost of {{9|12}} for {{.*}} mul <3 x i32> +; ALL: estimated cost of {{9|12}} for {{.*}} mul <3 x i32> define amdgpu_kernel void @mul_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %vaddr, <3 x i32> %b) #0 { %vec = load <3 x i32>, <3 x i32> addrspace(1)* %vaddr %mul = mul <3 x i32> %vec, %b @@ -29,10 +32,10 @@ define amdgpu_kernel void @mul_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> add ret void } -; CHECK: 'mul_v5i32' +; ALL: 'mul_v5i32' ; Allow for 24 when v5i32 is illegal and TargetLowering thinks it needs widening, ; and 15 when it is legal. -; CHECK: estimated cost of {{15|24}} for {{.*}} mul <5 x i32> +; ALL: estimated cost of {{15|24}} for {{.*}} mul <5 x i32> define amdgpu_kernel void @mul_v5i32(<5 x i32> addrspace(1)* %out, <5 x i32> addrspace(1)* %vaddr, <5 x i32> %b) #0 { %vec = load <5 x i32>, <5 x i32> addrspace(1)* %vaddr %mul = mul <5 x i32> %vec, %b @@ -40,8 +43,8 @@ define amdgpu_kernel void @mul_v5i32(<5 x i32> addrspace(1)* %out, <5 x i32> add ret void } -; CHECK: 'mul_v4i32' -; CHECK: estimated cost of 12 for {{.*}} mul <4 x i32> +; ALL: 'mul_v4i32' +; ALL: estimated cost of 12 for {{.*}} mul <4 x i32> define amdgpu_kernel void @mul_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %vaddr, <4 x i32> %b) #0 { %vec = load <4 x i32>, <4 x i32> addrspace(1)* %vaddr %mul = mul <4 x i32> %vec, %b @@ -49,8 +52,8 @@ define amdgpu_kernel void @mul_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> add ret void } -; CHECK: 'mul_i64' -; CHECK: estimated cost of 16 for {{.*}} mul i64 +; ALL: 'mul_i64' +; ALL: estimated cost of 16 for {{.*}} mul i64 define amdgpu_kernel void @mul_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 { %vec = load i64, i64 addrspace(1)* %vaddr %mul = mul i64 %vec, %b @@ -58,8 +61,8 @@ define amdgpu_kernel void @mul_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %va ret void } -; CHECK: 'mul_v2i64' -; CHECK: estimated cost of 32 for {{.*}} mul <2 x i64> +; ALL: 'mul_v2i64' +; ALL: estimated cost of 32 for {{.*}} mul <2 x i64> define amdgpu_kernel void @mul_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %vaddr, <2 x i64> %b) #0 { %vec = load <2 x i64>, <2 x i64> addrspace(1)* %vaddr %mul = mul <2 x i64> %vec, %b @@ -67,8 +70,8 @@ define amdgpu_kernel void @mul_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> add ret void } -; CHECK: 'mul_v3i64' -; CHECK: estimated cost of 48 for {{.*}} mul <3 x i64> +; ALL: 'mul_v3i64' +; ALL: estimated cost of 48 for {{.*}} mul <3 x i64> define amdgpu_kernel void @mul_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> addrspace(1)* %vaddr, <3 x i64> %b) #0 { %vec = load <3 x i64>, <3 x i64> addrspace(1)* %vaddr %mul = mul <3 x i64> %vec, %b @@ -76,8 +79,8 @@ define amdgpu_kernel void @mul_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> add ret void } -; CHECK: 'mul_v4i64' -; CHECK: estimated cost of 64 for {{.*}} mul <4 x i64> +; ALL: 'mul_v4i64' +; ALL: estimated cost of 64 for {{.*}} mul <4 x i64> define amdgpu_kernel void @mul_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %vaddr, <4 x i64> %b) #0 { %vec = load <4 x i64>, <4 x i64> addrspace(1)* %vaddr %mul = mul <4 x i64> %vec, %b @@ -86,8 +89,8 @@ define amdgpu_kernel void @mul_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> add } -; CHECK: 'mul_v8i64' -; CHECK: estimated cost of 128 for {{.*}} mul <8 x i64> +; ALL: 'mul_v8i64' +; ALL: estimated cost of 128 for {{.*}} mul <8 x i64> define amdgpu_kernel void @mul_v8i64(<8 x i64> addrspace(1)* %out, <8 x i64> addrspace(1)* %vaddr, <8 x i64> %b) #0 { %vec = load <8 x i64>, <8 x i64> addrspace(1)* %vaddr %mul = mul <8 x i64> %vec, %b @@ -95,4 +98,33 @@ define amdgpu_kernel void @mul_v8i64(<8 x i64> addrspace(1)* %out, <8 x i64> add ret void } +; ALL: 'mul_i16' +; ALL: estimated cost of 3 for {{.*}} mul i16 +define amdgpu_kernel void @mul_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %vaddr, i16 %b) #0 { + %vec = load i16, i16 addrspace(1)* %vaddr + %mul = mul i16 %vec, %b + store i16 %mul, i16 addrspace(1)* %out + ret void +} + +; ALL: 'mul_v2i16' +; SLOW16: estimated cost of 6 for {{.*}} mul <2 x i16> +; FAST16: estimated cost of 3 for {{.*}} mul <2 x i16> +define amdgpu_kernel void @mul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr, <2 x i16> %b) #0 { + %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr + %mul = mul <2 x i16> %vec, %b + store <2 x i16> %mul, <2 x i16> addrspace(1)* %out + ret void +} + +; ALL: 'mul_v3i16' +; SLOW16: estimated cost of 12 for {{.*}} mul <3 x i16> +; FAST16: estimated cost of 6 for {{.*}} mul <3 x i16> +define amdgpu_kernel void @mul_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(1)* %vaddr, <3 x i16> %b) #0 { + %vec = load <3 x i16>, <3 x i16> addrspace(1)* %vaddr + %mul = mul <3 x i16> %vec, %b + store <3 x i16> %mul, <3 x i16> addrspace(1)* %out + ret void +} + attributes #0 = { nounwind } diff --git a/llvm/test/Analysis/CostModel/AMDGPU/shifts.ll b/llvm/test/Analysis/CostModel/AMDGPU/shifts.ll index 85fb0ebe1..55f547fe3 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/shifts.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/shifts.ll @@ -1,5 +1,7 @@ -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=+half-rate-64-ops < %s | FileCheck -check-prefix=ALL -check-prefix=FAST64 %s -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefix=ALL -check-prefix=SLOW64 %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,FAST64,FAST16 %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,SLOW64,SLOW16 %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,FAST64,FAST16 %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,SLOW64,SLOW16 %s ; ALL: 'shl_i32' ; ALL: estimated cost of 1 for {{.*}} shl i32 @@ -20,6 +22,25 @@ define amdgpu_kernel void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %va ret void } +; ALL: 'shl_i16' +; ALL: estimated cost of 1 for {{.*}} shl i16 +define amdgpu_kernel void @shl_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %vaddr, i16 %b) #0 { + %vec = load i16, i16 addrspace(1)* %vaddr + %or = shl i16 %vec, %b + store i16 %or, i16 addrspace(1)* %out + ret void +} + +; ALL: 'shl_v2i16' +; SLOW16: estimated cost of 2 for {{.*}} shl <2 x i16> +; FAST16: estimated cost of 1 for {{.*}} shl <2 x i16> +define amdgpu_kernel void @shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr, <2 x i16> %b) #0 { + %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr + %or = shl <2 x i16> %vec, %b + store <2 x i16> %or, <2 x i16> addrspace(1)* %out + ret void +} + ; ALL: 'lshr_i32' ; ALL: estimated cost of 1 for {{.*}} lshr i32 define amdgpu_kernel void @lshr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 { @@ -39,6 +60,25 @@ define amdgpu_kernel void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %v ret void } +; ALL: 'lshr_i16' +; ALL: estimated cost of 1 for {{.*}} lshr i16 +define amdgpu_kernel void @lshr_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %vaddr, i16 %b) #0 { + %vec = load i16, i16 addrspace(1)* %vaddr + %or = lshr i16 %vec, %b + store i16 %or, i16 addrspace(1)* %out + ret void +} + +; ALL: 'lshr_v2i16' +; SLOW16: estimated cost of 2 for {{.*}} lshr <2 x i16> +; FAST16: estimated cost of 1 for {{.*}} lshr <2 x i16> +define amdgpu_kernel void @lshr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr, <2 x i16> %b) #0 { + %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr + %or = lshr <2 x i16> %vec, %b + store <2 x i16> %or, <2 x i16> addrspace(1)* %out + ret void +} + ; ALL: 'ashr_i32' ; ALL: estimated cost of 1 for {{.*}} ashr i32 define amdgpu_kernel void @ashr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 { @@ -58,4 +98,23 @@ define amdgpu_kernel void @ashr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %v ret void } +; ALL: 'ashr_i16' +; ALL: estimated cost of 1 for {{.*}} ashr i16 +define amdgpu_kernel void @ashr_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %vaddr, i16 %b) #0 { + %vec = load i16, i16 addrspace(1)* %vaddr + %or = ashr i16 %vec, %b + store i16 %or, i16 addrspace(1)* %out + ret void +} + +; ALL: 'ashr_v2i16' +; SLOW16: estimated cost of 2 for {{.*}} ashr <2 x i16> +; FAST16: estimated cost of 1 for {{.*}} ashr <2 x i16> +define amdgpu_kernel void @ashr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr, <2 x i16> %b) #0 { + %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr + %or = ashr <2 x i16> %vec, %b + store <2 x i16> %or, <2 x i16> addrspace(1)* %out + ret void +} + attributes #0 = { nounwind } diff --git a/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll b/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll index cc756c82f..ff3abcaab 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll @@ -1,7 +1,11 @@ -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 %s | FileCheck -check-prefixes=GFX9,GCN %s -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji %s | FileCheck -check-prefixes=VI,GCN %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 %s | FileCheck -check-prefixes=GFX9,GCN,TPT %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji %s | FileCheck -check-prefixes=VI,GCN,TPT %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 %s | FileCheck -check-prefixes=GFX9,GCN,CS %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji %s | FileCheck -check-prefixes=VI,GCN,CS %s +; GCN-LABEL: 'shufflevector_00_v2i16' ; GFX9: estimated cost of 0 for {{.*}} shufflevector <2 x i16> %vec, <2 x i16> undef, <2 x i32> zeroinitializer +; VI: estimated cost of 1 for {{.*}} shufflevector <2 x i16> %vec, <2 x i16> undef, <2 x i32> zeroinitializer define amdgpu_kernel void @shufflevector_00_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr) { %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <2 x i32> zeroinitializer @@ -9,7 +13,8 @@ define amdgpu_kernel void @shufflevector_00_v2i16(<2 x i16> addrspace(1)* %out, ret void } -; GFX9: estimated cost of 0 for {{.*}} shufflevector <2 x i16> %vec, <2 x i16> undef, <2 x i32> +; GCN-LABEL: 'shufflevector_01_v2i16' +; GCN: estimated cost of 0 for {{.*}} shufflevector <2 x i16> %vec, <2 x i16> undef, <2 x i32> define amdgpu_kernel void @shufflevector_01_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr) { %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <2 x i32> @@ -17,7 +22,9 @@ define amdgpu_kernel void @shufflevector_01_v2i16(<2 x i16> addrspace(1)* %out, ret void } +; GCN-LABEL: 'shufflevector_10_v2i16' ; GFX9: estimated cost of 0 for {{.*}} shufflevector <2 x i16> %vec, <2 x i16> undef, <2 x i32> +; VI: estimated cost of 2 for {{.*}} shufflevector <2 x i16> %vec, <2 x i16> undef, <2 x i32> define amdgpu_kernel void @shufflevector_10_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr) { %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <2 x i32> @@ -25,7 +32,9 @@ define amdgpu_kernel void @shufflevector_10_v2i16(<2 x i16> addrspace(1)* %out, ret void } +; GCN-LABEL: 'shufflevector_11_v2i16' ; GFX9: estimated cost of 0 for {{.*}} shufflevector <2 x i16> %vec, <2 x i16> undef, <2 x i32> +; VI: estimated cost of 2 for {{.*}} shufflevector <2 x i16> %vec, <2 x i16> undef, <2 x i32> define amdgpu_kernel void @shufflevector_11_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr) { %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <2 x i32> @@ -33,6 +42,7 @@ define amdgpu_kernel void @shufflevector_11_v2i16(<2 x i16> addrspace(1)* %out, ret void } +; GCN-LABEL: 'shufflevector_02_v2i16' ; GCN: estimated cost of 2 for {{.*}} shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> define amdgpu_kernel void @shufflevector_02_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr0, <2 x i16> addrspace(1)* %vaddr1) { %vec0 = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr0 @@ -41,3 +51,14 @@ define amdgpu_kernel void @shufflevector_02_v2i16(<2 x i16> addrspace(1)* %out, store <2 x i16> %shuf, <2 x i16> addrspace(1)* %out ret void } + +; GCN-LABEL: 'shufflevector_xxx' +; TPT: Unknown cost for {{.*}} shufflevector <2 x i8> %vec, <2 x i8> undef, <4 x i32> +; CS: estimated cost of 1 for {{.*}} shufflevector <2 x i8> %vec, <2 x i8> undef, <4 x i32> +; Should not assert +define amdgpu_kernel void @shufflevector_xxx(<4 x i8> addrspace(1)* %out, <2 x i8> addrspace(1)* %vaddr) { + %vec = load <2 x i8>, <2 x i8> addrspace(1)* %vaddr + %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <4 x i32> + store <4 x i8> %shuf, <4 x i8> addrspace(1)* %out + ret void +} diff --git a/llvm/test/Analysis/CostModel/ARM/arith.ll b/llvm/test/Analysis/CostModel/ARM/arith.ll new file mode 100644 index 000000000..ad9a3d5a0 --- /dev/null +++ b/llvm/test/Analysis/CostModel/ARM/arith.ll @@ -0,0 +1,733 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -cost-model -analyze -mtriple=thumbv8.1-m.main-none-eabi -mattr=+mve,+mve1beat < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE1 +; RUN: opt -cost-model -analyze -mtriple=thumbv8.1-m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE2 +; RUN: opt -cost-model -analyze -mtriple=thumbv8.1-m.main-none-eabi -mattr=+mve,+mve4beat < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE4 + +define void @i8() { +; CHECK-LABEL: 'i8' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = add i8 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = sub i8 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = mul i8 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i8 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i8 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = shl i8 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i = and i8 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j = or i8 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k = xor i8 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %c = add i8 undef, undef + %d = sub i8 undef, undef + %e = mul i8 undef, undef + %f = ashr i8 undef, undef + %g = lshr i8 undef, undef + %h = shl i8 undef, undef + %i = and i8 undef, undef + %j = or i8 undef, undef + %k = xor i8 undef, undef + ret void +} + +define void @i16() { +; CHECK-LABEL: 'i16' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = add i16 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = sub i16 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = mul i16 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i16 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i16 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = shl i16 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i = and i16 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j = or i16 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k = xor i16 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %c = add i16 undef, undef + %d = sub i16 undef, undef + %e = mul i16 undef, undef + %f = ashr i16 undef, undef + %g = lshr i16 undef, undef + %h = shl i16 undef, undef + %i = and i16 undef, undef + %j = or i16 undef, undef + %k = xor i16 undef, undef + ret void +} + +define void @i32() { +; CHECK-LABEL: 'i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = add i32 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = sub i32 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = mul i32 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i32 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i32 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = shl i32 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i = and i32 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j = or i32 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k = xor i32 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %c = add i32 undef, undef + %d = sub i32 undef, undef + %e = mul i32 undef, undef + %f = ashr i32 undef, undef + %g = lshr i32 undef, undef + %h = shl i32 undef, undef + %i = and i32 undef, undef + %j = or i32 undef, undef + %k = xor i32 undef, undef + ret void +} + +define void @i64() { +; CHECK-LABEL: 'i64' +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c = add i64 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d = sub i64 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = mul i64 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f = ashr i64 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g = lshr i64 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h = shl i64 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i = and i64 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j = or i64 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k = xor i64 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %c = add i64 undef, undef + %d = sub i64 undef, undef + %e = mul i64 undef, undef + %f = ashr i64 undef, undef + %g = lshr i64 undef, undef + %h = shl i64 undef, undef + %i = and i64 undef, undef + %j = or i64 undef, undef + %k = xor i64 undef, undef + ret void +} + + +define void @vi8() { +; CHECK-MVE1-LABEL: 'vi8' +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c2 = add <2 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d2 = sub <2 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e2 = mul <2 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f2 = ashr <2 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %g2 = lshr <2 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %h2 = shl <2 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c4 = add <4 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d4 = sub <4 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e4 = mul <4 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f4 = ashr <4 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g4 = lshr <4 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h4 = shl <4 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i4 = and <4 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j4 = or <4 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k4 = xor <4 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c8 = add <8 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d8 = sub <8 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e8 = mul <8 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f8 = ashr <8 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g8 = lshr <8 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h8 = shl <8 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = and <8 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j8 = or <8 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k8 = xor <8 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c16 = add <16 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d16 = sub <16 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e16 = mul <16 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = ashr <16 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g16 = lshr <16 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h16 = shl <16 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = and <16 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j16 = or <16 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k16 = xor <16 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVE2-LABEL: 'vi8' +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %c2 = add <2 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %d2 = sub <2 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %e2 = mul <2 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %f2 = ashr <2 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %g2 = lshr <2 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %h2 = shl <2 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i2 = and <2 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j2 = or <2 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k2 = xor <2 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c4 = add <4 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d4 = sub <4 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e4 = mul <4 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f4 = ashr <4 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g4 = lshr <4 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h4 = shl <4 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i4 = and <4 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j4 = or <4 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k4 = xor <4 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c8 = add <8 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d8 = sub <8 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e8 = mul <8 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f8 = ashr <8 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g8 = lshr <8 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h8 = shl <8 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i8 = and <8 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j8 = or <8 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k8 = xor <8 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c16 = add <16 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d16 = sub <16 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e16 = mul <16 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16 = ashr <16 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g16 = lshr <16 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h16 = shl <16 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i16 = and <16 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j16 = or <16 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k16 = xor <16 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVE4-LABEL: 'vi8' +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %c2 = add <2 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %d2 = sub <2 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e2 = mul <2 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = ashr <2 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %g2 = lshr <2 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %h2 = shl <2 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i2 = and <2 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j2 = or <2 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k2 = xor <2 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c4 = add <4 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d4 = sub <4 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e4 = mul <4 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f4 = ashr <4 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %g4 = lshr <4 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %h4 = shl <4 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i4 = and <4 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j4 = or <4 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k4 = xor <4 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c8 = add <8 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d8 = sub <8 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e8 = mul <8 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f8 = ashr <8 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %g8 = lshr <8 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %h8 = shl <8 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i8 = and <8 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j8 = or <8 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k8 = xor <8 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c16 = add <16 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d16 = sub <16 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e16 = mul <16 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f16 = ashr <16 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %g16 = lshr <16 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %h16 = shl <16 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i16 = and <16 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j16 = or <16 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k16 = xor <16 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %c2 = add <2 x i8> undef, undef + %d2 = sub <2 x i8> undef, undef + %e2 = mul <2 x i8> undef, undef + %f2 = ashr <2 x i8> undef, undef + %g2 = lshr <2 x i8> undef, undef + %h2 = shl <2 x i8> undef, undef + %i2 = and <2 x i8> undef, undef + %j2 = or <2 x i8> undef, undef + %k2 = xor <2 x i8> undef, undef + %c4 = add <4 x i8> undef, undef + %d4 = sub <4 x i8> undef, undef + %e4 = mul <4 x i8> undef, undef + %f4 = ashr <4 x i8> undef, undef + %g4 = lshr <4 x i8> undef, undef + %h4 = shl <4 x i8> undef, undef + %i4 = and <4 x i8> undef, undef + %j4 = or <4 x i8> undef, undef + %k4 = xor <4 x i8> undef, undef + %c8 = add <8 x i8> undef, undef + %d8 = sub <8 x i8> undef, undef + %e8 = mul <8 x i8> undef, undef + %f8 = ashr <8 x i8> undef, undef + %g8 = lshr <8 x i8> undef, undef + %h8 = shl <8 x i8> undef, undef + %i8 = and <8 x i8> undef, undef + %j8 = or <8 x i8> undef, undef + %k8 = xor <8 x i8> undef, undef + %c16 = add <16 x i8> undef, undef + %d16 = sub <16 x i8> undef, undef + %e16 = mul <16 x i8> undef, undef + %f16 = ashr <16 x i8> undef, undef + %g16 = lshr <16 x i8> undef, undef + %h16 = shl <16 x i8> undef, undef + %i16 = and <16 x i8> undef, undef + %j16 = or <16 x i8> undef, undef + %k16 = xor <16 x i8> undef, undef + ret void +} + +define void @vi16() { +; CHECK-MVE1-LABEL: 'vi16' +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c2 = add <2 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d2 = sub <2 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e2 = mul <2 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f2 = ashr <2 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %g2 = lshr <2 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %h2 = shl <2 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c4 = add <4 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d4 = sub <4 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e4 = mul <4 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f4 = ashr <4 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g4 = lshr <4 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h4 = shl <4 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i4 = and <4 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j4 = or <4 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k4 = xor <4 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c8 = add <8 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d8 = sub <8 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e8 = mul <8 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f8 = ashr <8 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g8 = lshr <8 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h8 = shl <8 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = and <8 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j8 = or <8 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k8 = xor <8 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c16 = add <16 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d16 = sub <16 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e16 = mul <16 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16 = ashr <16 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g16 = lshr <16 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h16 = shl <16 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i16 = and <16 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j16 = or <16 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k16 = xor <16 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVE2-LABEL: 'vi16' +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %c2 = add <2 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %d2 = sub <2 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %e2 = mul <2 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %f2 = ashr <2 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %g2 = lshr <2 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %h2 = shl <2 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i2 = and <2 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j2 = or <2 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k2 = xor <2 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c4 = add <4 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d4 = sub <4 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e4 = mul <4 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f4 = ashr <4 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g4 = lshr <4 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h4 = shl <4 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i4 = and <4 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j4 = or <4 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k4 = xor <4 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c8 = add <8 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d8 = sub <8 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e8 = mul <8 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f8 = ashr <8 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g8 = lshr <8 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h8 = shl <8 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i8 = and <8 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j8 = or <8 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k8 = xor <8 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c16 = add <16 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d16 = sub <16 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e16 = mul <16 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f16 = ashr <16 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %g16 = lshr <16 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %h16 = shl <16 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i16 = and <16 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j16 = or <16 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k16 = xor <16 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVE4-LABEL: 'vi16' +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %c2 = add <2 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %d2 = sub <2 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e2 = mul <2 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = ashr <2 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %g2 = lshr <2 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %h2 = shl <2 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i2 = and <2 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j2 = or <2 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k2 = xor <2 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c4 = add <4 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d4 = sub <4 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e4 = mul <4 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f4 = ashr <4 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %g4 = lshr <4 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %h4 = shl <4 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i4 = and <4 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j4 = or <4 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k4 = xor <4 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c8 = add <8 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d8 = sub <8 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e8 = mul <8 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f8 = ashr <8 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %g8 = lshr <8 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %h8 = shl <8 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i8 = and <8 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j8 = or <8 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k8 = xor <8 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c16 = add <16 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %d16 = sub <16 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e16 = mul <16 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f16 = ashr <16 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %g16 = lshr <16 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %h16 = shl <16 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %i16 = and <16 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %j16 = or <16 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %k16 = xor <16 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %c2 = add <2 x i16> undef, undef + %d2 = sub <2 x i16> undef, undef + %e2 = mul <2 x i16> undef, undef + %f2 = ashr <2 x i16> undef, undef + %g2 = lshr <2 x i16> undef, undef + %h2 = shl <2 x i16> undef, undef + %i2 = and <2 x i16> undef, undef + %j2 = or <2 x i16> undef, undef + %k2 = xor <2 x i16> undef, undef + %c4 = add <4 x i16> undef, undef + %d4 = sub <4 x i16> undef, undef + %e4 = mul <4 x i16> undef, undef + %f4 = ashr <4 x i16> undef, undef + %g4 = lshr <4 x i16> undef, undef + %h4 = shl <4 x i16> undef, undef + %i4 = and <4 x i16> undef, undef + %j4 = or <4 x i16> undef, undef + %k4 = xor <4 x i16> undef, undef + %c8 = add <8 x i16> undef, undef + %d8 = sub <8 x i16> undef, undef + %e8 = mul <8 x i16> undef, undef + %f8 = ashr <8 x i16> undef, undef + %g8 = lshr <8 x i16> undef, undef + %h8 = shl <8 x i16> undef, undef + %i8 = and <8 x i16> undef, undef + %j8 = or <8 x i16> undef, undef + %k8 = xor <8 x i16> undef, undef + %c16 = add <16 x i16> undef, undef + %d16 = sub <16 x i16> undef, undef + %e16 = mul <16 x i16> undef, undef + %f16 = ashr <16 x i16> undef, undef + %g16 = lshr <16 x i16> undef, undef + %h16 = shl <16 x i16> undef, undef + %i16 = and <16 x i16> undef, undef + %j16 = or <16 x i16> undef, undef + %k16 = xor <16 x i16> undef, undef + ret void +} + +define void @vi32() { +; CHECK-MVE1-LABEL: 'vi32' +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c2 = add <2 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d2 = sub <2 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e2 = mul <2 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f2 = ashr <2 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %g2 = lshr <2 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %h2 = shl <2 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c4 = add <4 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d4 = sub <4 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e4 = mul <4 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f4 = ashr <4 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g4 = lshr <4 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h4 = shl <4 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i4 = and <4 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j4 = or <4 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k4 = xor <4 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c8 = add <8 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d8 = sub <8 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e8 = mul <8 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f8 = ashr <8 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g8 = lshr <8 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h8 = shl <8 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i8 = and <8 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j8 = or <8 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k8 = xor <8 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c16 = add <16 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d16 = sub <16 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e16 = mul <16 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f16 = ashr <16 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %g16 = lshr <16 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %h16 = shl <16 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i16 = and <16 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j16 = or <16 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k16 = xor <16 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVE2-LABEL: 'vi32' +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %c2 = add <2 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %d2 = sub <2 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %e2 = mul <2 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %f2 = ashr <2 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %g2 = lshr <2 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %h2 = shl <2 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i2 = and <2 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j2 = or <2 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k2 = xor <2 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c4 = add <4 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d4 = sub <4 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e4 = mul <4 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f4 = ashr <4 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g4 = lshr <4 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h4 = shl <4 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i4 = and <4 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j4 = or <4 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k4 = xor <4 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c8 = add <8 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d8 = sub <8 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e8 = mul <8 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f8 = ashr <8 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %g8 = lshr <8 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %h8 = shl <8 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i8 = and <8 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j8 = or <8 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k8 = xor <8 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c16 = add <16 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %d16 = sub <16 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e16 = mul <16 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f16 = ashr <16 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %g16 = lshr <16 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %h16 = shl <16 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %i16 = and <16 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %j16 = or <16 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %k16 = xor <16 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVE4-LABEL: 'vi32' +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %c2 = add <2 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %d2 = sub <2 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e2 = mul <2 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = ashr <2 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %g2 = lshr <2 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %h2 = shl <2 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i2 = and <2 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j2 = or <2 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k2 = xor <2 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c4 = add <4 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d4 = sub <4 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e4 = mul <4 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f4 = ashr <4 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %g4 = lshr <4 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %h4 = shl <4 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i4 = and <4 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j4 = or <4 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k4 = xor <4 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c8 = add <8 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %d8 = sub <8 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e8 = mul <8 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f8 = ashr <8 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %g8 = lshr <8 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %h8 = shl <8 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %i8 = and <8 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %j8 = or <8 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %k8 = xor <8 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %c16 = add <16 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %d16 = sub <16 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e16 = mul <16 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %f16 = ashr <16 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %g16 = lshr <16 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %h16 = shl <16 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %i16 = and <16 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %j16 = or <16 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %k16 = xor <16 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %c2 = add <2 x i32> undef, undef + %d2 = sub <2 x i32> undef, undef + %e2 = mul <2 x i32> undef, undef + %f2 = ashr <2 x i32> undef, undef + %g2 = lshr <2 x i32> undef, undef + %h2 = shl <2 x i32> undef, undef + %i2 = and <2 x i32> undef, undef + %j2 = or <2 x i32> undef, undef + %k2 = xor <2 x i32> undef, undef + %c4 = add <4 x i32> undef, undef + %d4 = sub <4 x i32> undef, undef + %e4 = mul <4 x i32> undef, undef + %f4 = ashr <4 x i32> undef, undef + %g4 = lshr <4 x i32> undef, undef + %h4 = shl <4 x i32> undef, undef + %i4 = and <4 x i32> undef, undef + %j4 = or <4 x i32> undef, undef + %k4 = xor <4 x i32> undef, undef + %c8 = add <8 x i32> undef, undef + %d8 = sub <8 x i32> undef, undef + %e8 = mul <8 x i32> undef, undef + %f8 = ashr <8 x i32> undef, undef + %g8 = lshr <8 x i32> undef, undef + %h8 = shl <8 x i32> undef, undef + %i8 = and <8 x i32> undef, undef + %j8 = or <8 x i32> undef, undef + %k8 = xor <8 x i32> undef, undef + %c16 = add <16 x i32> undef, undef + %d16 = sub <16 x i32> undef, undef + %e16 = mul <16 x i32> undef, undef + %f16 = ashr <16 x i32> undef, undef + %g16 = lshr <16 x i32> undef, undef + %h16 = shl <16 x i32> undef, undef + %i16 = and <16 x i32> undef, undef + %j16 = or <16 x i32> undef, undef + %k16 = xor <16 x i32> undef, undef + ret void +} + +define void @vi64() { +; CHECK-MVE1-LABEL: 'vi64' +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c2 = add <2 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %d2 = sub <2 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e2 = mul <2 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f2 = ashr <2 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %g2 = lshr <2 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %h2 = shl <2 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %c4 = add <4 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %d4 = sub <4 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %e4 = mul <4 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %f4 = ashr <4 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %g4 = lshr <4 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %h4 = shl <4 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i4 = and <4 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j4 = or <4 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k4 = xor <4 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %c8 = add <8 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %d8 = sub <8 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %e8 = mul <8 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f8 = ashr <8 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %g8 = lshr <8 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %h8 = shl <8 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i8 = and <8 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j8 = or <8 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k8 = xor <8 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %c16 = add <16 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %d16 = sub <16 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %e16 = mul <16 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %f16 = ashr <16 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %g16 = lshr <16 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %h16 = shl <16 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %i16 = and <16 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %j16 = or <16 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %k16 = xor <16 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVE2-LABEL: 'vi64' +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c2 = add <2 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %d2 = sub <2 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e2 = mul <2 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f2 = ashr <2 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %g2 = lshr <2 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %h2 = shl <2 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i2 = and <2 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j2 = or <2 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k2 = xor <2 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %c4 = add <4 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %d4 = sub <4 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %e4 = mul <4 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %f4 = ashr <4 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %g4 = lshr <4 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %h4 = shl <4 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i4 = and <4 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j4 = or <4 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k4 = xor <4 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %c8 = add <8 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %d8 = sub <8 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %e8 = mul <8 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f8 = ashr <8 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %g8 = lshr <8 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %h8 = shl <8 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %i8 = and <8 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %j8 = or <8 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %k8 = xor <8 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %c16 = add <16 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %d16 = sub <16 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %e16 = mul <16 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %f16 = ashr <16 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %g16 = lshr <16 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %h16 = shl <16 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %i16 = and <16 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %j16 = or <16 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %k16 = xor <16 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVE4-LABEL: 'vi64' +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %c2 = add <2 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %d2 = sub <2 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %e2 = mul <2 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f2 = ashr <2 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %g2 = lshr <2 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %h2 = shl <2 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i2 = and <2 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j2 = or <2 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k2 = xor <2 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %c4 = add <4 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %d4 = sub <4 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e4 = mul <4 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %f4 = ashr <4 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %g4 = lshr <4 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %h4 = shl <4 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %i4 = and <4 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %j4 = or <4 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %k4 = xor <4 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %c8 = add <8 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %d8 = sub <8 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %e8 = mul <8 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %f8 = ashr <8 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %g8 = lshr <8 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %h8 = shl <8 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %i8 = and <8 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %j8 = or <8 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %k8 = xor <8 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 544 for instruction: %c16 = add <16 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 544 for instruction: %d16 = sub <16 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 544 for instruction: %e16 = mul <16 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 544 for instruction: %f16 = ashr <16 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 544 for instruction: %g16 = lshr <16 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 544 for instruction: %h16 = shl <16 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %i16 = and <16 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %j16 = or <16 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %k16 = xor <16 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %c2 = add <2 x i64> undef, undef + %d2 = sub <2 x i64> undef, undef + %e2 = mul <2 x i64> undef, undef + %f2 = ashr <2 x i64> undef, undef + %g2 = lshr <2 x i64> undef, undef + %h2 = shl <2 x i64> undef, undef + %i2 = and <2 x i64> undef, undef + %j2 = or <2 x i64> undef, undef + %k2 = xor <2 x i64> undef, undef + %c4 = add <4 x i64> undef, undef + %d4 = sub <4 x i64> undef, undef + %e4 = mul <4 x i64> undef, undef + %f4 = ashr <4 x i64> undef, undef + %g4 = lshr <4 x i64> undef, undef + %h4 = shl <4 x i64> undef, undef + %i4 = and <4 x i64> undef, undef + %j4 = or <4 x i64> undef, undef + %k4 = xor <4 x i64> undef, undef + %c8 = add <8 x i64> undef, undef + %d8 = sub <8 x i64> undef, undef + %e8 = mul <8 x i64> undef, undef + %f8 = ashr <8 x i64> undef, undef + %g8 = lshr <8 x i64> undef, undef + %h8 = shl <8 x i64> undef, undef + %i8 = and <8 x i64> undef, undef + %j8 = or <8 x i64> undef, undef + %k8 = xor <8 x i64> undef, undef + %c16 = add <16 x i64> undef, undef + %d16 = sub <16 x i64> undef, undef + %e16 = mul <16 x i64> undef, undef + %f16 = ashr <16 x i64> undef, undef + %g16 = lshr <16 x i64> undef, undef + %h16 = shl <16 x i64> undef, undef + %i16 = and <16 x i64> undef, undef + %j16 = or <16 x i64> undef, undef + %k16 = xor <16 x i64> undef, undef + ret void +} diff --git a/llvm/test/Analysis/CostModel/ARM/cast.ll b/llvm/test/Analysis/CostModel/ARM/cast.ll index 72308834c..0b068b5e0 100644 --- a/llvm/test/Analysis/CostModel/ARM/cast.ll +++ b/llvm/test/Analysis/CostModel/ARM/cast.ll @@ -1,548 +1,954 @@ -; RUN: opt < %s -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=cortex-a8 | FileCheck %s -target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" -target triple = "thumbv7-apple-ios6.0.0" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=cortex-a9 | FileCheck %s --check-prefix=CHECK-NEON +; RUN: opt -cost-model -analyze -mtriple=thumbv8.1-m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefix=CHECK-MVE define i32 @casts() { - - ; -- scalars -- - ; CHECK: Found an estimated cost of 1 for instruction: %r0 = sext i1 undef to i8 +; CHECK-NEON-LABEL: 'casts' +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r0 = sext i1 undef to i8 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r1 = zext i1 undef to i8 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r2 = sext i1 undef to i16 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = zext i1 undef to i16 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i1 undef to i32 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i1 undef to i32 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r6 = sext i1 undef to i64 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r7 = zext i1 undef to i64 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r8 = trunc i8 undef to i1 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = sext i8 undef to i16 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = zext i8 undef to i16 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = sext i8 undef to i32 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r12 = zext i8 undef to i32 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r13 = sext i8 undef to i64 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r14 = zext i8 undef to i64 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r15 = trunc i16 undef to i1 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r16 = trunc i16 undef to i8 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r17 = sext i16 undef to i32 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r18 = zext i16 undef to i32 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r19 = sext i16 undef to i64 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r20 = zext i16 undef to i64 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r21 = trunc i32 undef to i1 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r22 = trunc i32 undef to i8 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r23 = trunc i32 undef to i16 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r24 = sext i32 undef to i64 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r25 = zext i32 undef to i64 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r26 = trunc i64 undef to i1 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r27 = trunc i64 undef to i8 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r28 = trunc i64 undef to i16 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r29 = trunc i64 undef to i32 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r30 = fptoui float undef to i1 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r31 = fptosi float undef to i1 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r32 = fptoui float undef to i8 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r33 = fptosi float undef to i8 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r34 = fptoui float undef to i16 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r35 = fptosi float undef to i16 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r36 = fptoui float undef to i32 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r37 = fptosi float undef to i32 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r38 = fptoui float undef to i64 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r39 = fptosi float undef to i64 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r40 = fptoui double undef to i1 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r41 = fptosi double undef to i1 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r42 = fptoui double undef to i8 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r43 = fptosi double undef to i8 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r44 = fptoui double undef to i16 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r45 = fptosi double undef to i16 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r46 = fptoui double undef to i32 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r47 = fptosi double undef to i32 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r48 = fptoui double undef to i64 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r49 = fptosi double undef to i64 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r50 = sitofp i1 undef to float +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r51 = uitofp i1 undef to float +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r52 = sitofp i1 undef to double +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r53 = uitofp i1 undef to double +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r54 = sitofp i8 undef to float +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r55 = uitofp i8 undef to float +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r56 = sitofp i8 undef to double +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r57 = uitofp i8 undef to double +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r58 = sitofp i16 undef to float +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r59 = uitofp i16 undef to float +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r60 = sitofp i16 undef to double +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r61 = uitofp i16 undef to double +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r62 = sitofp i32 undef to float +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r63 = uitofp i32 undef to float +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r64 = sitofp i32 undef to double +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r65 = uitofp i32 undef to double +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r66 = sitofp i64 undef to float +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r67 = uitofp i64 undef to float +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r68 = sitofp i64 undef to double +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r69 = uitofp i64 undef to double +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q70 = sext <4 x i8> undef to <4 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q71 = sext <8 x i8> undef to <8 x i16> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s70 = sext <4 x i8> undef to <4 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r70 = sext <8 x i8> undef to <8 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r71 = sext <16 x i8> undef to <16 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q72 = zext <4 x i8> undef to <4 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q73 = zext <8 x i8> undef to <8 x i16> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s72 = zext <4 x i8> undef to <4 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r72 = zext <8 x i8> undef to <8 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r73 = zext <16 x i8> undef to <16 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %rext_1 = zext <8 x i8> undef to <8 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %rext_2 = sext <8 x i16> undef to <8 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %rext_3 = zext <8 x i16> undef to <8 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_4 = sext <4 x i16> undef to <4 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_5 = zext <4 x i16> undef to <4 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_a = sext <2 x i32> undef to <2 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_b = zext <2 x i32> undef to <2 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r74 = trunc <8 x i32> undef to <8 x i8> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r75 = trunc <16 x i32> undef to <16 x i8> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r80 = fptrunc double undef to float +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r81 = fptrunc <2 x double> undef to <2 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r85 = fpext float undef to double +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r86 = fpext <2 x float> undef to <2 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r87 = fpext <4 x float> undef to <4 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r88 = fpext <8 x float> undef to <8 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r89 = fpext <16 x float> undef to <16 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r90 = fptoui <2 x float> undef to <2 x i1> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r91 = fptosi <2 x float> undef to <2 x i1> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r92 = fptoui <2 x float> undef to <2 x i8> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r93 = fptosi <2 x float> undef to <2 x i8> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r94 = fptoui <2 x float> undef to <2 x i16> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r95 = fptosi <2 x float> undef to <2 x i16> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r96 = fptoui <2 x float> undef to <2 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r97 = fptosi <2 x float> undef to <2 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r98 = fptoui <2 x float> undef to <2 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r99 = fptosi <2 x float> undef to <2 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r100 = fptoui <2 x double> undef to <2 x i1> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r101 = fptosi <2 x double> undef to <2 x i1> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r102 = fptoui <2 x double> undef to <2 x i8> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r103 = fptosi <2 x double> undef to <2 x i8> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r104 = fptoui <2 x double> undef to <2 x i16> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r105 = fptosi <2 x double> undef to <2 x i16> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r106 = fptoui <2 x double> undef to <2 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r107 = fptosi <2 x double> undef to <2 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r108 = fptoui <2 x double> undef to <2 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r109 = fptosi <2 x double> undef to <2 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r110 = fptoui <4 x float> undef to <4 x i1> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r111 = fptosi <4 x float> undef to <4 x i1> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r112 = fptoui <4 x float> undef to <4 x i8> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r113 = fptosi <4 x float> undef to <4 x i8> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r114 = fptoui <4 x float> undef to <4 x i16> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r115 = fptosi <4 x float> undef to <4 x i16> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r116 = fptoui <4 x float> undef to <4 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r117 = fptosi <4 x float> undef to <4 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r131 = fptosi <8 x float> undef to <8 x i1> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r132 = fptoui <8 x float> undef to <8 x i8> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r133 = fptosi <8 x float> undef to <8 x i8> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r134 = fptoui <8 x float> undef to <8 x i16> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r135 = fptosi <8 x float> undef to <8 x i16> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r136 = fptoui <8 x float> undef to <8 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r151 = fptosi <16 x float> undef to <16 x i1> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r152 = fptoui <16 x float> undef to <16 x i8> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r153 = fptosi <16 x float> undef to <16 x i8> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r154 = fptoui <16 x float> undef to <16 x i16> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r156 = fptoui <16 x float> undef to <16 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r171 = sitofp <2 x i1> undef to <2 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r172 = uitofp <2 x i8> undef to <2 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r173 = sitofp <2 x i8> undef to <2 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r174 = uitofp <2 x i16> undef to <2 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r183 = sitofp <2 x i8> undef to <2 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r184 = uitofp <2 x i16> undef to <2 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r185 = sitofp <2 x i16> undef to <2 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r186 = uitofp <2 x i32> undef to <2 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r187 = sitofp <2 x i32> undef to <2 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r188 = uitofp <2 x i64> undef to <2 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r189 = sitofp <2 x i64> undef to <2 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r190 = uitofp <4 x i1> undef to <4 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r191 = sitofp <4 x i1> undef to <4 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r192 = uitofp <4 x i8> undef to <4 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r193 = sitofp <4 x i8> undef to <4 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r194 = uitofp <4 x i16> undef to <4 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r195 = sitofp <4 x i16> undef to <4 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r196 = uitofp <4 x i32> undef to <4 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r197 = sitofp <4 x i32> undef to <4 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r211 = sitofp <8 x i1> undef to <8 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r212 = uitofp <8 x i8> undef to <8 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r213 = sitofp <8 x i8> undef to <8 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r214 = uitofp <8 x i16> undef to <8 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r231 = sitofp <16 x i1> undef to <16 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r232 = uitofp <16 x i8> undef to <16 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r233 = sitofp <16 x i8> undef to <16 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r234 = uitofp <16 x i16> undef to <16 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; CHECK-MVE-LABEL: 'casts' +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r0 = sext i1 undef to i8 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r1 = zext i1 undef to i8 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r2 = sext i1 undef to i16 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = zext i1 undef to i16 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i1 undef to i32 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i1 undef to i32 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r6 = sext i1 undef to i64 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r7 = zext i1 undef to i64 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r8 = trunc i8 undef to i1 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = sext i8 undef to i16 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = zext i8 undef to i16 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = sext i8 undef to i32 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r12 = zext i8 undef to i32 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r13 = sext i8 undef to i64 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r14 = zext i8 undef to i64 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r15 = trunc i16 undef to i1 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r16 = trunc i16 undef to i8 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r17 = sext i16 undef to i32 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r18 = zext i16 undef to i32 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r19 = sext i16 undef to i64 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r20 = zext i16 undef to i64 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r21 = trunc i32 undef to i1 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r22 = trunc i32 undef to i8 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r23 = trunc i32 undef to i16 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r24 = sext i32 undef to i64 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r25 = zext i32 undef to i64 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r26 = trunc i64 undef to i1 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r27 = trunc i64 undef to i8 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r28 = trunc i64 undef to i16 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r29 = trunc i64 undef to i32 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r30 = fptoui float undef to i1 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r31 = fptosi float undef to i1 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r32 = fptoui float undef to i8 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r33 = fptosi float undef to i8 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r34 = fptoui float undef to i16 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r35 = fptosi float undef to i16 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r36 = fptoui float undef to i32 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r37 = fptosi float undef to i32 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r38 = fptoui float undef to i64 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r39 = fptosi float undef to i64 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r40 = fptoui double undef to i1 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r41 = fptosi double undef to i1 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r42 = fptoui double undef to i8 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r43 = fptosi double undef to i8 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r44 = fptoui double undef to i16 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r45 = fptosi double undef to i16 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r46 = fptoui double undef to i32 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r47 = fptosi double undef to i32 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r48 = fptoui double undef to i64 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r49 = fptosi double undef to i64 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r50 = sitofp i1 undef to float +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r51 = uitofp i1 undef to float +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r52 = sitofp i1 undef to double +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r53 = uitofp i1 undef to double +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r54 = sitofp i8 undef to float +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r55 = uitofp i8 undef to float +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r56 = sitofp i8 undef to double +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r57 = uitofp i8 undef to double +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r58 = sitofp i16 undef to float +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r59 = uitofp i16 undef to float +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r60 = sitofp i16 undef to double +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r61 = uitofp i16 undef to double +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r62 = sitofp i32 undef to float +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r63 = uitofp i32 undef to float +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r64 = sitofp i32 undef to double +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r65 = uitofp i32 undef to double +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r66 = sitofp i64 undef to float +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r67 = uitofp i64 undef to float +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r68 = sitofp i64 undef to double +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r69 = uitofp i64 undef to double +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %q70 = sext <4 x i8> undef to <4 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %q71 = sext <8 x i8> undef to <8 x i16> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s70 = sext <4 x i8> undef to <4 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %r70 = sext <8 x i8> undef to <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %r71 = sext <16 x i8> undef to <16 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %q72 = zext <4 x i8> undef to <4 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %q73 = zext <8 x i8> undef to <8 x i16> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s72 = zext <4 x i8> undef to <4 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %r72 = zext <8 x i8> undef to <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %r73 = zext <16 x i8> undef to <16 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %rext_1 = zext <8 x i8> undef to <8 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %rext_2 = sext <8 x i16> undef to <8 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %rext_3 = zext <8 x i16> undef to <8 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %rext_4 = sext <4 x i16> undef to <4 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %rext_5 = zext <4 x i16> undef to <4 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %rext_a = sext <2 x i32> undef to <2 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rext_b = zext <2 x i32> undef to <2 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r74 = trunc <8 x i32> undef to <8 x i8> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r75 = trunc <16 x i32> undef to <16 x i8> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r80 = fptrunc double undef to float +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r81 = fptrunc <2 x double> undef to <2 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r85 = fpext float undef to double +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r86 = fpext <2 x float> undef to <2 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r87 = fpext <4 x float> undef to <4 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r88 = fpext <8 x float> undef to <8 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %r89 = fpext <16 x float> undef to <16 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r90 = fptoui <2 x float> undef to <2 x i1> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r91 = fptosi <2 x float> undef to <2 x i1> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r92 = fptoui <2 x float> undef to <2 x i8> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r93 = fptosi <2 x float> undef to <2 x i8> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r94 = fptoui <2 x float> undef to <2 x i16> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r95 = fptosi <2 x float> undef to <2 x i16> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r96 = fptoui <2 x float> undef to <2 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r97 = fptosi <2 x float> undef to <2 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r98 = fptoui <2 x float> undef to <2 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r99 = fptosi <2 x float> undef to <2 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r100 = fptoui <2 x double> undef to <2 x i1> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r101 = fptosi <2 x double> undef to <2 x i1> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r102 = fptoui <2 x double> undef to <2 x i8> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r103 = fptosi <2 x double> undef to <2 x i8> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r104 = fptoui <2 x double> undef to <2 x i16> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r105 = fptosi <2 x double> undef to <2 x i16> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r106 = fptoui <2 x double> undef to <2 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r107 = fptosi <2 x double> undef to <2 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r108 = fptoui <2 x double> undef to <2 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r109 = fptosi <2 x double> undef to <2 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r110 = fptoui <4 x float> undef to <4 x i1> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r111 = fptosi <4 x float> undef to <4 x i1> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r112 = fptoui <4 x float> undef to <4 x i8> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r113 = fptosi <4 x float> undef to <4 x i8> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r114 = fptoui <4 x float> undef to <4 x i16> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r115 = fptosi <4 x float> undef to <4 x i16> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r116 = fptoui <4 x float> undef to <4 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r117 = fptosi <4 x float> undef to <4 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r131 = fptosi <8 x float> undef to <8 x i1> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r132 = fptoui <8 x float> undef to <8 x i8> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r133 = fptosi <8 x float> undef to <8 x i8> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r134 = fptoui <8 x float> undef to <8 x i16> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r135 = fptosi <8 x float> undef to <8 x i16> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r136 = fptoui <8 x float> undef to <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r151 = fptosi <16 x float> undef to <16 x i1> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r152 = fptoui <16 x float> undef to <16 x i8> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r153 = fptosi <16 x float> undef to <16 x i8> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r154 = fptoui <16 x float> undef to <16 x i16> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r156 = fptoui <16 x float> undef to <16 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r171 = sitofp <2 x i1> undef to <2 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r172 = uitofp <2 x i8> undef to <2 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r173 = sitofp <2 x i8> undef to <2 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r174 = uitofp <2 x i16> undef to <2 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r183 = sitofp <2 x i8> undef to <2 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r184 = uitofp <2 x i16> undef to <2 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r185 = sitofp <2 x i16> undef to <2 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r186 = uitofp <2 x i32> undef to <2 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r187 = sitofp <2 x i32> undef to <2 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r188 = uitofp <2 x i64> undef to <2 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r189 = sitofp <2 x i64> undef to <2 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r190 = uitofp <4 x i1> undef to <4 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r191 = sitofp <4 x i1> undef to <4 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r192 = uitofp <4 x i8> undef to <4 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r193 = sitofp <4 x i8> undef to <4 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r194 = uitofp <4 x i16> undef to <4 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r195 = sitofp <4 x i16> undef to <4 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r196 = uitofp <4 x i32> undef to <4 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r197 = sitofp <4 x i32> undef to <4 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r211 = sitofp <8 x i1> undef to <8 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r212 = uitofp <8 x i8> undef to <8 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r213 = sitofp <8 x i8> undef to <8 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r214 = uitofp <8 x i16> undef to <8 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r231 = sitofp <16 x i1> undef to <16 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r232 = uitofp <16 x i8> undef to <16 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r233 = sitofp <16 x i8> undef to <16 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r234 = uitofp <16 x i16> undef to <16 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1045 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1045 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1045 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1045 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1045 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1045 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1045 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1045 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1045 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1045 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; + ; -- scalars -- %r0 = sext i1 undef to i8 - ; CHECK: Found an estimated cost of 1 for instruction: %r1 = zext i1 undef to i8 %r1 = zext i1 undef to i8 - ; CHECK: Found an estimated cost of 1 for instruction: %r2 = sext i1 undef to i16 %r2 = sext i1 undef to i16 - ; CHECK: Found an estimated cost of 1 for instruction: %r3 = zext i1 undef to i16 %r3 = zext i1 undef to i16 - ; CHECK: Found an estimated cost of 1 for instruction: %r4 = sext i1 undef to i32 %r4 = sext i1 undef to i32 - ; CHECK: Found an estimated cost of 1 for instruction: %r5 = zext i1 undef to i32 %r5 = zext i1 undef to i32 - ; CHECK: Found an estimated cost of 1 for instruction: %r6 = sext i1 undef to i64 %r6 = sext i1 undef to i64 - ; CHECK: Found an estimated cost of 1 for instruction: %r7 = zext i1 undef to i64 %r7 = zext i1 undef to i64 - ; CHECK: Found an estimated cost of 0 for instruction: %r8 = trunc i8 undef to i1 %r8 = trunc i8 undef to i1 - ; CHECK: Found an estimated cost of 1 for instruction: %r9 = sext i8 undef to i16 %r9 = sext i8 undef to i16 - ; CHECK: Found an estimated cost of 1 for instruction: %r10 = zext i8 undef to i16 %r10 = zext i8 undef to i16 - ; CHECK: Found an estimated cost of 1 for instruction: %r11 = sext i8 undef to i32 %r11 = sext i8 undef to i32 - ; CHECK: Found an estimated cost of 1 for instruction: %r12 = zext i8 undef to i32 %r12 = zext i8 undef to i32 - ; CHECK: Found an estimated cost of 1 for instruction: %r13 = sext i8 undef to i64 %r13 = sext i8 undef to i64 - ; CHECK: Found an estimated cost of 1 for instruction: %r14 = zext i8 undef to i64 %r14 = zext i8 undef to i64 - ; CHECK: Found an estimated cost of 0 for instruction: %r15 = trunc i16 undef to i1 %r15 = trunc i16 undef to i1 - ; CHECK: Found an estimated cost of 0 for instruction: %r16 = trunc i16 undef to i8 %r16 = trunc i16 undef to i8 - ; CHECK: Found an estimated cost of 1 for instruction: %r17 = sext i16 undef to i32 %r17 = sext i16 undef to i32 - ; CHECK: Found an estimated cost of 1 for instruction: %r18 = zext i16 undef to i32 %r18 = zext i16 undef to i32 - ; CHECK: Found an estimated cost of 2 for instruction: %r19 = sext i16 undef to i64 %r19 = sext i16 undef to i64 - ; CHECK: Found an estimated cost of 1 for instruction: %r20 = zext i16 undef to i64 %r20 = zext i16 undef to i64 - ; CHECK: Found an estimated cost of 0 for instruction: %r21 = trunc i32 undef to i1 %r21 = trunc i32 undef to i1 - ; CHECK: Found an estimated cost of 0 for instruction: %r22 = trunc i32 undef to i8 %r22 = trunc i32 undef to i8 - ; CHECK: Found an estimated cost of 0 for instruction: %r23 = trunc i32 undef to i16 %r23 = trunc i32 undef to i16 - ; CHECK: Found an estimated cost of 1 for instruction: %r24 = sext i32 undef to i64 %r24 = sext i32 undef to i64 - ; CHECK: Found an estimated cost of 1 for instruction: %r25 = zext i32 undef to i64 %r25 = zext i32 undef to i64 - ; CHECK: Found an estimated cost of 0 for instruction: %r26 = trunc i64 undef to i1 %r26 = trunc i64 undef to i1 - ; CHECK: Found an estimated cost of 0 for instruction: %r27 = trunc i64 undef to i8 %r27 = trunc i64 undef to i8 - ; CHECK: Found an estimated cost of 0 for instruction: %r28 = trunc i64 undef to i16 %r28 = trunc i64 undef to i16 - ; CHECK: Found an estimated cost of 0 for instruction: %r29 = trunc i64 undef to i32 %r29 = trunc i64 undef to i32 - ; -- floating point conversions -- + ; -- floating point conversions -- ; Moves between scalar and NEON registers. - ; CHECK: Found an estimated cost of 2 for instruction: %r30 = fptoui float undef to i1 %r30 = fptoui float undef to i1 - ; CHECK: Found an estimated cost of 2 for instruction: %r31 = fptosi float undef to i1 %r31 = fptosi float undef to i1 - ; CHECK: Found an estimated cost of 2 for instruction: %r32 = fptoui float undef to i8 %r32 = fptoui float undef to i8 - ; CHECK: Found an estimated cost of 2 for instruction: %r33 = fptosi float undef to i8 %r33 = fptosi float undef to i8 - ; CHECK: Found an estimated cost of 2 for instruction: %r34 = fptoui float undef to i16 %r34 = fptoui float undef to i16 - ; CHECK: Found an estimated cost of 2 for instruction: %r35 = fptosi float undef to i16 %r35 = fptosi float undef to i16 - ; CHECK: Found an estimated cost of 2 for instruction: %r36 = fptoui float undef to i32 %r36 = fptoui float undef to i32 - ; CHECK: Found an estimated cost of 2 for instruction: %r37 = fptosi float undef to i32 %r37 = fptosi float undef to i32 - ; CHECK: Found an estimated cost of 10 for instruction: %r38 = fptoui float undef to i64 %r38 = fptoui float undef to i64 - ; CHECK: Found an estimated cost of 10 for instruction: %r39 = fptosi float undef to i64 %r39 = fptosi float undef to i64 - ; CHECK: Found an estimated cost of 2 for instruction: %r40 = fptoui double undef to i1 %r40 = fptoui double undef to i1 - ; CHECK: Found an estimated cost of 2 for instruction: %r41 = fptosi double undef to i1 %r41 = fptosi double undef to i1 - ; CHECK: Found an estimated cost of 2 for instruction: %r42 = fptoui double undef to i8 %r42 = fptoui double undef to i8 - ; CHECK: Found an estimated cost of 2 for instruction: %r43 = fptosi double undef to i8 %r43 = fptosi double undef to i8 - ; CHECK: Found an estimated cost of 2 for instruction: %r44 = fptoui double undef to i16 %r44 = fptoui double undef to i16 - ; CHECK: Found an estimated cost of 2 for instruction: %r45 = fptosi double undef to i16 %r45 = fptosi double undef to i16 - ; CHECK: Found an estimated cost of 2 for instruction: %r46 = fptoui double undef to i32 %r46 = fptoui double undef to i32 - ; CHECK: Found an estimated cost of 2 for instruction: %r47 = fptosi double undef to i32 %r47 = fptosi double undef to i32 ; Function call - ; CHECK: Found an estimated cost of 10 for instruction: %r48 = fptoui double undef to i64 %r48 = fptoui double undef to i64 - ; CHECK: Found an estimated cost of 10 for instruction: %r49 = fptosi double undef to i64 %r49 = fptosi double undef to i64 - ; CHECK: Found an estimated cost of 2 for instruction: %r50 = sitofp i1 undef to float %r50 = sitofp i1 undef to float - ; CHECK: Found an estimated cost of 2 for instruction: %r51 = uitofp i1 undef to float %r51 = uitofp i1 undef to float - ; CHECK: Found an estimated cost of 2 for instruction: %r52 = sitofp i1 undef to double %r52 = sitofp i1 undef to double - ; CHECK: Found an estimated cost of 2 for instruction: %r53 = uitofp i1 undef to double %r53 = uitofp i1 undef to double - ; CHECK: Found an estimated cost of 2 for instruction: %r54 = sitofp i8 undef to float %r54 = sitofp i8 undef to float - ; CHECK: Found an estimated cost of 2 for instruction: %r55 = uitofp i8 undef to float %r55 = uitofp i8 undef to float - ; CHECK: Found an estimated cost of 2 for instruction: %r56 = sitofp i8 undef to double %r56 = sitofp i8 undef to double - ; CHECK: Found an estimated cost of 2 for instruction: %r57 = uitofp i8 undef to double %r57 = uitofp i8 undef to double - ; CHECK: Found an estimated cost of 2 for instruction: %r58 = sitofp i16 undef to float %r58 = sitofp i16 undef to float - ; CHECK: Found an estimated cost of 2 for instruction: %r59 = uitofp i16 undef to float %r59 = uitofp i16 undef to float - ; CHECK: Found an estimated cost of 2 for instruction: %r60 = sitofp i16 undef to double %r60 = sitofp i16 undef to double - ; CHECK: Found an estimated cost of 2 for instruction: %r61 = uitofp i16 undef to double %r61 = uitofp i16 undef to double - ; CHECK: Found an estimated cost of 2 for instruction: %r62 = sitofp i32 undef to float %r62 = sitofp i32 undef to float - ; CHECK: Found an estimated cost of 2 for instruction: %r63 = uitofp i32 undef to float %r63 = uitofp i32 undef to float - ; CHECK: Found an estimated cost of 2 for instruction: %r64 = sitofp i32 undef to double %r64 = sitofp i32 undef to double - ; CHECK: Found an estimated cost of 2 for instruction: %r65 = uitofp i32 undef to double %r65 = uitofp i32 undef to double ; Function call - ; CHECK: Found an estimated cost of 10 for instruction: %r66 = sitofp i64 undef to float %r66 = sitofp i64 undef to float - ; CHECK: Found an estimated cost of 10 for instruction: %r67 = uitofp i64 undef to float %r67 = uitofp i64 undef to float - ; CHECK: Found an estimated cost of 10 for instruction: %r68 = sitofp i64 undef to double %r68 = sitofp i64 undef to double - ; CHECK: Found an estimated cost of 10 for instruction: %r69 = uitofp i64 undef to double %r69 = uitofp i64 undef to double - ; CHECK: Found an estimated cost of 3 for instruction: %r70 = sext <8 x i8> undef to <8 x i32> + %q70 = sext <4 x i8> undef to <4 x i32> + %q71 = sext <8 x i8> undef to <8 x i16> + %s70 = sext <4 x i8> undef to <4 x i32> %r70 = sext <8 x i8> undef to <8 x i32> - ; CHECK: Found an estimated cost of 6 for instruction: %r71 = sext <16 x i8> undef to <16 x i32> %r71 = sext <16 x i8> undef to <16 x i32> - ; CHECK: Found an estimated cost of 3 for instruction: %r72 = zext <8 x i8> undef to <8 x i32> + %q72 = zext <4 x i8> undef to <4 x i32> + %q73 = zext <8 x i8> undef to <8 x i16> + %s72 = zext <4 x i8> undef to <4 x i32> %r72 = zext <8 x i8> undef to <8 x i32> - ; CHECK: Found an estimated cost of 6 for instruction: %r73 = zext <16 x i8> undef to <16 x i32> %r73 = zext <16 x i8> undef to <16 x i32> - ; CHECK: Found an estimated cost of 7 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64> %rext_0 = sext <8 x i8> undef to <8 x i64> - ; CHECK: Found an estimated cost of 7 for instruction: %rext_1 = zext <8 x i8> undef to <8 x i64> %rext_1 = zext <8 x i8> undef to <8 x i64> - ; CHECK: Found an estimated cost of 6 for instruction: %rext_2 = sext <8 x i16> undef to <8 x i64> %rext_2 = sext <8 x i16> undef to <8 x i64> - ; CHECK: Found an estimated cost of 6 for instruction: %rext_3 = zext <8 x i16> undef to <8 x i64> %rext_3 = zext <8 x i16> undef to <8 x i64> - ; CHECK: Found an estimated cost of 3 for instruction: %rext_4 = sext <4 x i16> undef to <4 x i64> %rext_4 = sext <4 x i16> undef to <4 x i64> - ; CHECK: Found an estimated cost of 3 for instruction: %rext_5 = zext <4 x i16> undef to <4 x i64> %rext_5 = zext <4 x i16> undef to <4 x i64> + %rext_6 = sext <2 x i8> undef to <2 x i64> + %rext_7 = zext <2 x i8> undef to <2 x i64> + %rext_8 = sext <2 x i16> undef to <2 x i64> + %rext_9 = zext <2 x i16> undef to <2 x i64> + %rext_a = sext <2 x i32> undef to <2 x i64> + %rext_b = zext <2 x i32> undef to <2 x i64> ; Vector cast cost of instructions lowering the cast to the stack. - ; CHECK: Found an estimated cost of 3 for instruction: %r74 = trunc <8 x i32> undef to <8 x i8> %r74 = trunc <8 x i32> undef to <8 x i8> - ; CHECK: Found an estimated cost of 6 for instruction: %r75 = trunc <16 x i32> undef to <16 x i8> %r75 = trunc <16 x i32> undef to <16 x i8> ; Floating point truncation costs. - ; CHECK: Found an estimated cost of 1 for instruction: %r80 = fptrunc double undef to float %r80 = fptrunc double undef to float - ; CHECK: Found an estimated cost of 2 for instruction: %r81 = fptrunc <2 x double> undef to <2 x float> %r81 = fptrunc <2 x double> undef to <2 x float> - ; CHECK: Found an estimated cost of 4 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float> %r82 = fptrunc <4 x double> undef to <4 x float> - ; CHECK: Found an estimated cost of 8 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float> %r83 = fptrunc <8 x double> undef to <8 x float> - ; CHECK: Found an estimated cost of 16 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float> %r84 = fptrunc <16 x double> undef to <16 x float> ; Floating point extension costs. - ; CHECK: Found an estimated cost of 1 for instruction: %r85 = fpext float undef to double %r85 = fpext float undef to double - ; CHECK: Found an estimated cost of 2 for instruction: %r86 = fpext <2 x float> undef to <2 x double> %r86 = fpext <2 x float> undef to <2 x double> - ; CHECK: Found an estimated cost of 4 for instruction: %r87 = fpext <4 x float> undef to <4 x double> %r87 = fpext <4 x float> undef to <4 x double> - ; CHECK: Found an estimated cost of 8 for instruction: %r88 = fpext <8 x float> undef to <8 x double> %r88 = fpext <8 x float> undef to <8 x double> - ; CHECK: Found an estimated cost of 16 for instruction: %r89 = fpext <16 x float> undef to <16 x double> %r89 = fpext <16 x float> undef to <16 x double> ;; Floating point to integer vector casts. - ; CHECK: Found an estimated cost of 1 for instruction: %r90 = fptoui <2 x float> undef to <2 x i1> %r90 = fptoui <2 x float> undef to <2 x i1> - ; CHECK: Found an estimated cost of 1 for instruction: %r91 = fptosi <2 x float> undef to <2 x i1> %r91 = fptosi <2 x float> undef to <2 x i1> - ; CHECK: Found an estimated cost of 1 for instruction: %r92 = fptoui <2 x float> undef to <2 x i8> %r92 = fptoui <2 x float> undef to <2 x i8> - ; CHECK: Found an estimated cost of 1 for instruction: %r93 = fptosi <2 x float> undef to <2 x i8> %r93 = fptosi <2 x float> undef to <2 x i8> - ; CHECK: Found an estimated cost of 1 for instruction: %r94 = fptoui <2 x float> undef to <2 x i16> %r94 = fptoui <2 x float> undef to <2 x i16> - ; CHECK: Found an estimated cost of 1 for instruction: %r95 = fptosi <2 x float> undef to <2 x i16> %r95 = fptosi <2 x float> undef to <2 x i16> - ; CHECK: Found an estimated cost of 1 for instruction: %r96 = fptoui <2 x float> undef to <2 x i32> %r96 = fptoui <2 x float> undef to <2 x i32> - ; CHECK: Found an estimated cost of 1 for instruction: %r97 = fptosi <2 x float> undef to <2 x i32> %r97 = fptosi <2 x float> undef to <2 x i32> - ; CHECK: Found an estimated cost of 32 for instruction: %r98 = fptoui <2 x float> undef to <2 x i64> %r98 = fptoui <2 x float> undef to <2 x i64> - ; CHECK: Found an estimated cost of 32 for instruction: %r99 = fptosi <2 x float> undef to <2 x i64> %r99 = fptosi <2 x float> undef to <2 x i64> - ; CHECK: Found an estimated cost of 16 for instruction: %r100 = fptoui <2 x double> undef to <2 x i1> %r100 = fptoui <2 x double> undef to <2 x i1> - ; CHECK: Found an estimated cost of 16 for instruction: %r101 = fptosi <2 x double> undef to <2 x i1> %r101 = fptosi <2 x double> undef to <2 x i1> - ; CHECK: Found an estimated cost of 16 for instruction: %r102 = fptoui <2 x double> undef to <2 x i8> %r102 = fptoui <2 x double> undef to <2 x i8> - ; CHECK: Found an estimated cost of 16 for instruction: %r103 = fptosi <2 x double> undef to <2 x i8> %r103 = fptosi <2 x double> undef to <2 x i8> - ; CHECK: Found an estimated cost of 16 for instruction: %r104 = fptoui <2 x double> undef to <2 x i16> %r104 = fptoui <2 x double> undef to <2 x i16> - ; CHECK: Found an estimated cost of 16 for instruction: %r105 = fptosi <2 x double> undef to <2 x i16> %r105 = fptosi <2 x double> undef to <2 x i16> - ; CHECK: Found an estimated cost of 2 for instruction: %r106 = fptoui <2 x double> undef to <2 x i32> %r106 = fptoui <2 x double> undef to <2 x i32> - ; CHECK: Found an estimated cost of 2 for instruction: %r107 = fptosi <2 x double> undef to <2 x i32> %r107 = fptosi <2 x double> undef to <2 x i32> - ; CHECK: Found an estimated cost of 32 for instruction: %r108 = fptoui <2 x double> undef to <2 x i64> %r108 = fptoui <2 x double> undef to <2 x i64> - ; CHECK: Found an estimated cost of 32 for instruction: %r109 = fptosi <2 x double> undef to <2 x i64> %r109 = fptosi <2 x double> undef to <2 x i64> - ; CHECK: Found an estimated cost of 32 for instruction: %r110 = fptoui <4 x float> undef to <4 x i1> %r110 = fptoui <4 x float> undef to <4 x i1> - ; CHECK: Found an estimated cost of 32 for instruction: %r111 = fptosi <4 x float> undef to <4 x i1> %r111 = fptosi <4 x float> undef to <4 x i1> - ; CHECK: Found an estimated cost of 3 for instruction: %r112 = fptoui <4 x float> undef to <4 x i8> %r112 = fptoui <4 x float> undef to <4 x i8> - ; CHECK: Found an estimated cost of 3 for instruction: %r113 = fptosi <4 x float> undef to <4 x i8> %r113 = fptosi <4 x float> undef to <4 x i8> - ; CHECK: Found an estimated cost of 2 for instruction: %r114 = fptoui <4 x float> undef to <4 x i16> %r114 = fptoui <4 x float> undef to <4 x i16> - ; CHECK: Found an estimated cost of 2 for instruction: %r115 = fptosi <4 x float> undef to <4 x i16> %r115 = fptosi <4 x float> undef to <4 x i16> - ; CHECK: Found an estimated cost of 1 for instruction: %r116 = fptoui <4 x float> undef to <4 x i32> %r116 = fptoui <4 x float> undef to <4 x i32> - ; CHECK: Found an estimated cost of 1 for instruction: %r117 = fptosi <4 x float> undef to <4 x i32> %r117 = fptosi <4 x float> undef to <4 x i32> - ; CHECK: Found an estimated cost of 65 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64> %r118 = fptoui <4 x float> undef to <4 x i64> - ; CHECK: Found an estimated cost of 65 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64> %r119 = fptosi <4 x float> undef to <4 x i64> - ; CHECK: Found an estimated cost of 33 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1> %r120 = fptoui <4 x double> undef to <4 x i1> - ; CHECK: Found an estimated cost of 33 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1> %r121 = fptosi <4 x double> undef to <4 x i1> - ; CHECK: Found an estimated cost of 33 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8> %r122 = fptoui <4 x double> undef to <4 x i8> - ; CHECK: Found an estimated cost of 33 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8> %r123 = fptosi <4 x double> undef to <4 x i8> - ; CHECK: Found an estimated cost of 33 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16> %r124 = fptoui <4 x double> undef to <4 x i16> - ; CHECK: Found an estimated cost of 33 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> %r125 = fptosi <4 x double> undef to <4 x i16> - ; CHECK: Found an estimated cost of 5 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> %r126 = fptoui <4 x double> undef to <4 x i32> - ; CHECK: Found an estimated cost of 5 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> %r127 = fptosi <4 x double> undef to <4 x i32> - ; CHECK: Found an estimated cost of 65 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> %r128 = fptoui <4 x double> undef to <4 x i64> - ; CHECK: Found an estimated cost of 65 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> %r129 = fptosi <4 x double> undef to <4 x i64> - ; CHECK: Found an estimated cost of 65 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> %r130 = fptoui <8 x float> undef to <8 x i1> - ; CHECK: Found an estimated cost of 65 for instruction: %r131 = fptosi <8 x float> undef to <8 x i1> %r131 = fptosi <8 x float> undef to <8 x i1> - ; CHECK: Found an estimated cost of 7 for instruction: %r132 = fptoui <8 x float> undef to <8 x i8> %r132 = fptoui <8 x float> undef to <8 x i8> - ; CHECK: Found an estimated cost of 7 for instruction: %r133 = fptosi <8 x float> undef to <8 x i8> %r133 = fptosi <8 x float> undef to <8 x i8> - ; CHECK: Found an estimated cost of 4 for instruction: %r134 = fptoui <8 x float> undef to <8 x i16> %r134 = fptoui <8 x float> undef to <8 x i16> - ; CHECK: Found an estimated cost of 4 for instruction: %r135 = fptosi <8 x float> undef to <8 x i16> %r135 = fptosi <8 x float> undef to <8 x i16> - ; CHECK: Found an estimated cost of 2 for instruction: %r136 = fptoui <8 x float> undef to <8 x i32> %r136 = fptoui <8 x float> undef to <8 x i32> - ; CHECK: Found an estimated cost of 2 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> %r137 = fptosi <8 x float> undef to <8 x i32> - ; CHECK: Found an estimated cost of 131 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> %r138 = fptoui <8 x float> undef to <8 x i64> - ; CHECK: Found an estimated cost of 131 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> %r139 = fptosi <8 x float> undef to <8 x i64> - ; CHECK: Found an estimated cost of 67 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> %r140 = fptoui <8 x double> undef to <8 x i1> - ; CHECK: Found an estimated cost of 67 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> %r141 = fptosi <8 x double> undef to <8 x i1> - ; CHECK: Found an estimated cost of 67 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> %r142 = fptoui <8 x double> undef to <8 x i8> - ; CHECK: Found an estimated cost of 67 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> %r143 = fptosi <8 x double> undef to <8 x i8> - ; CHECK: Found an estimated cost of 67 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> %r144 = fptoui <8 x double> undef to <8 x i16> - ; CHECK: Found an estimated cost of 67 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> %r145 = fptosi <8 x double> undef to <8 x i16> - ; CHECK: Found an estimated cost of 11 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> %r146 = fptoui <8 x double> undef to <8 x i32> - ; CHECK: Found an estimated cost of 11 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> %r147 = fptosi <8 x double> undef to <8 x i32> - ; CHECK: Found an estimated cost of 131 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> %r148 = fptoui <8 x double> undef to <8 x i64> - ; CHECK: Found an estimated cost of 131 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> %r149 = fptosi <8 x double> undef to <8 x i64> - ; CHECK: Found an estimated cost of 131 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> %r150 = fptoui <16 x float> undef to <16 x i1> - ; CHECK: Found an estimated cost of 131 for instruction: %r151 = fptosi <16 x float> undef to <16 x i1> %r151 = fptosi <16 x float> undef to <16 x i1> - ; CHECK: Found an estimated cost of 15 for instruction: %r152 = fptoui <16 x float> undef to <16 x i8> %r152 = fptoui <16 x float> undef to <16 x i8> - ; CHECK: Found an estimated cost of 15 for instruction: %r153 = fptosi <16 x float> undef to <16 x i8> %r153 = fptosi <16 x float> undef to <16 x i8> - ; CHECK: Found an estimated cost of 8 for instruction: %r154 = fptoui <16 x float> undef to <16 x i16> %r154 = fptoui <16 x float> undef to <16 x i16> - ; CHECK: Found an estimated cost of 8 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16> %r155 = fptosi <16 x float> undef to <16 x i16> - ; CHECK: Found an estimated cost of 4 for instruction: %r156 = fptoui <16 x float> undef to <16 x i32> %r156 = fptoui <16 x float> undef to <16 x i32> - ; CHECK: Found an estimated cost of 4 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> %r157 = fptosi <16 x float> undef to <16 x i32> - ; CHECK: Found an estimated cost of 263 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> %r158 = fptoui <16 x float> undef to <16 x i64> - ; CHECK: Found an estimated cost of 263 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> %r159 = fptosi <16 x float> undef to <16 x i64> - ; CHECK: Found an estimated cost of 135 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> %r160 = fptoui <16 x double> undef to <16 x i1> - ; CHECK: Found an estimated cost of 135 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> %r161 = fptosi <16 x double> undef to <16 x i1> - ; CHECK: Found an estimated cost of 135 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> %r162 = fptoui <16 x double> undef to <16 x i8> - ; CHECK: Found an estimated cost of 135 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> %r163 = fptosi <16 x double> undef to <16 x i8> - ; CHECK: Found an estimated cost of 135 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> %r164 = fptoui <16 x double> undef to <16 x i16> - ; CHECK: Found an estimated cost of 135 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> %r165 = fptosi <16 x double> undef to <16 x i16> - ; CHECK: Found an estimated cost of 23 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> %r166 = fptoui <16 x double> undef to <16 x i32> - ; CHECK: Found an estimated cost of 23 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> %r167 = fptosi <16 x double> undef to <16 x i32> - ; CHECK: Found an estimated cost of 263 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> %r168 = fptoui <16 x double> undef to <16 x i64> - ; CHECK: Found an estimated cost of 263 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> %r169 = fptosi <16 x double> undef to <16 x i64> - ; CHECK: Found an estimated cost of 12 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float> %r170 = uitofp <2 x i1> undef to <2 x float> - ; CHECK: Found an estimated cost of 12 for instruction: %r171 = sitofp <2 x i1> undef to <2 x float> %r171 = sitofp <2 x i1> undef to <2 x float> - ; CHECK: Found an estimated cost of 3 for instruction: %r172 = uitofp <2 x i8> undef to <2 x float> %r172 = uitofp <2 x i8> undef to <2 x float> - ; CHECK: Found an estimated cost of 3 for instruction: %r173 = sitofp <2 x i8> undef to <2 x float> %r173 = sitofp <2 x i8> undef to <2 x float> - ; CHECK: Found an estimated cost of 2 for instruction: %r174 = uitofp <2 x i16> undef to <2 x float> %r174 = uitofp <2 x i16> undef to <2 x float> - ; CHECK: Found an estimated cost of 2 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float> %r175 = sitofp <2 x i16> undef to <2 x float> - ; CHECK: Found an estimated cost of 1 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float> %r176 = uitofp <2 x i32> undef to <2 x float> - ; CHECK: Found an estimated cost of 1 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float> %r177 = sitofp <2 x i32> undef to <2 x float> - ; CHECK: Found an estimated cost of 28 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> %r178 = uitofp <2 x i64> undef to <2 x float> - ; CHECK: Found an estimated cost of 28 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> %r179 = sitofp <2 x i64> undef to <2 x float> - ; CHECK: Found an estimated cost of 8 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> %r180 = uitofp <2 x i1> undef to <2 x double> - ; CHECK: Found an estimated cost of 8 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> %r181 = sitofp <2 x i1> undef to <2 x double> - ; CHECK: Found an estimated cost of 4 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double> %r182 = uitofp <2 x i8> undef to <2 x double> - ; CHECK: Found an estimated cost of 4 for instruction: %r183 = sitofp <2 x i8> undef to <2 x double> %r183 = sitofp <2 x i8> undef to <2 x double> - ; CHECK: Found an estimated cost of 3 for instruction: %r184 = uitofp <2 x i16> undef to <2 x double> %r184 = uitofp <2 x i16> undef to <2 x double> - ; CHECK: Found an estimated cost of 3 for instruction: %r185 = sitofp <2 x i16> undef to <2 x double> %r185 = sitofp <2 x i16> undef to <2 x double> - ; CHECK: Found an estimated cost of 2 for instruction: %r186 = uitofp <2 x i32> undef to <2 x double> %r186 = uitofp <2 x i32> undef to <2 x double> - ; CHECK: Found an estimated cost of 2 for instruction: %r187 = sitofp <2 x i32> undef to <2 x double> %r187 = sitofp <2 x i32> undef to <2 x double> - ; CHECK: Found an estimated cost of 24 for instruction: %r188 = uitofp <2 x i64> undef to <2 x double> %r188 = uitofp <2 x i64> undef to <2 x double> - ; CHECK: Found an estimated cost of 24 for instruction: %r189 = sitofp <2 x i64> undef to <2 x double> %r189 = sitofp <2 x i64> undef to <2 x double> - ; CHECK: Found an estimated cost of 3 for instruction: %r190 = uitofp <4 x i1> undef to <4 x float> %r190 = uitofp <4 x i1> undef to <4 x float> - ; CHECK: Found an estimated cost of 3 for instruction: %r191 = sitofp <4 x i1> undef to <4 x float> %r191 = sitofp <4 x i1> undef to <4 x float> - ; CHECK: Found an estimated cost of 3 for instruction: %r192 = uitofp <4 x i8> undef to <4 x float> %r192 = uitofp <4 x i8> undef to <4 x float> - ; CHECK: Found an estimated cost of 3 for instruction: %r193 = sitofp <4 x i8> undef to <4 x float> %r193 = sitofp <4 x i8> undef to <4 x float> - ; CHECK: Found an estimated cost of 2 for instruction: %r194 = uitofp <4 x i16> undef to <4 x float> %r194 = uitofp <4 x i16> undef to <4 x float> - ; CHECK: Found an estimated cost of 2 for instruction: %r195 = sitofp <4 x i16> undef to <4 x float> %r195 = sitofp <4 x i16> undef to <4 x float> - ; CHECK: Found an estimated cost of 1 for instruction: %r196 = uitofp <4 x i32> undef to <4 x float> %r196 = uitofp <4 x i32> undef to <4 x float> - ; CHECK: Found an estimated cost of 1 for instruction: %r197 = sitofp <4 x i32> undef to <4 x float> %r197 = sitofp <4 x i32> undef to <4 x float> - ; CHECK: Found an estimated cost of 57 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> %r198 = uitofp <4 x i64> undef to <4 x float> - ; CHECK: Found an estimated cost of 57 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> %r199 = sitofp <4 x i64> undef to <4 x float> - ; CHECK: Found an estimated cost of 17 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> %r200 = uitofp <4 x i1> undef to <4 x double> - ; CHECK: Found an estimated cost of 17 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> %r201 = sitofp <4 x i1> undef to <4 x double> - ; CHECK: Found an estimated cost of 9 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> %r202 = uitofp <4 x i8> undef to <4 x double> - ; CHECK: Found an estimated cost of 9 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double> %r203 = sitofp <4 x i8> undef to <4 x double> - ; CHECK: Found an estimated cost of 7 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double> %r204 = uitofp <4 x i16> undef to <4 x double> - ; CHECK: Found an estimated cost of 7 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> %r205 = sitofp <4 x i16> undef to <4 x double> - ; CHECK: Found an estimated cost of 5 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> %r206 = uitofp <4 x i32> undef to <4 x double> - ; CHECK: Found an estimated cost of 5 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> %r207 = sitofp <4 x i32> undef to <4 x double> - ; CHECK: Found an estimated cost of 49 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> %r208 = uitofp <4 x i64> undef to <4 x double> - ; CHECK: Found an estimated cost of 49 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> %r209 = sitofp <4 x i64> undef to <4 x double> - ; CHECK: Found an estimated cost of 7 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> %r210 = uitofp <8 x i1> undef to <8 x float> - ; CHECK: Found an estimated cost of 7 for instruction: %r211 = sitofp <8 x i1> undef to <8 x float> %r211 = sitofp <8 x i1> undef to <8 x float> - ; CHECK: Found an estimated cost of 7 for instruction: %r212 = uitofp <8 x i8> undef to <8 x float> %r212 = uitofp <8 x i8> undef to <8 x float> - ; CHECK: Found an estimated cost of 7 for instruction: %r213 = sitofp <8 x i8> undef to <8 x float> %r213 = sitofp <8 x i8> undef to <8 x float> - ; CHECK: Found an estimated cost of 4 for instruction: %r214 = uitofp <8 x i16> undef to <8 x float> %r214 = uitofp <8 x i16> undef to <8 x float> - ; CHECK: Found an estimated cost of 4 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float> %r215 = sitofp <8 x i16> undef to <8 x float> - ; CHECK: Found an estimated cost of 2 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float> %r216 = uitofp <8 x i32> undef to <8 x float> - ; CHECK: Found an estimated cost of 2 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> %r217 = sitofp <8 x i32> undef to <8 x float> - ; CHECK: Found an estimated cost of 115 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> %r218 = uitofp <8 x i64> undef to <8 x float> - ; CHECK: Found an estimated cost of 115 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> %r219 = sitofp <8 x i64> undef to <8 x float> - ; CHECK: Found an estimated cost of 35 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> %r220 = uitofp <8 x i1> undef to <8 x double> - ; CHECK: Found an estimated cost of 35 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> %r221 = sitofp <8 x i1> undef to <8 x double> - ; CHECK: Found an estimated cost of 19 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> %r222 = uitofp <8 x i8> undef to <8 x double> - ; CHECK: Found an estimated cost of 19 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double> %r223 = sitofp <8 x i8> undef to <8 x double> - ; CHECK: Found an estimated cost of 15 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double> %r224 = uitofp <8 x i16> undef to <8 x double> - ; CHECK: Found an estimated cost of 15 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> %r225 = sitofp <8 x i16> undef to <8 x double> - ; CHECK: Found an estimated cost of 15 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> %r226 = uitofp <8 x i16> undef to <8 x double> - ; CHECK: Found an estimated cost of 15 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> %r227 = sitofp <8 x i16> undef to <8 x double> - ; CHECK: Found an estimated cost of 99 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> %r228 = uitofp <8 x i64> undef to <8 x double> - ; CHECK: Found an estimated cost of 99 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> %r229 = sitofp <8 x i64> undef to <8 x double> - ; CHECK: Found an estimated cost of 15 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> %r230 = uitofp <16 x i1> undef to <16 x float> - ; CHECK: Found an estimated cost of 15 for instruction: %r231 = sitofp <16 x i1> undef to <16 x float> %r231 = sitofp <16 x i1> undef to <16 x float> - ; CHECK: Found an estimated cost of 15 for instruction: %r232 = uitofp <16 x i8> undef to <16 x float> %r232 = uitofp <16 x i8> undef to <16 x float> - ; CHECK: Found an estimated cost of 15 for instruction: %r233 = sitofp <16 x i8> undef to <16 x float> %r233 = sitofp <16 x i8> undef to <16 x float> - ; CHECK: Found an estimated cost of 8 for instruction: %r234 = uitofp <16 x i16> undef to <16 x float> %r234 = uitofp <16 x i16> undef to <16 x float> - ; CHECK: Found an estimated cost of 8 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> %r235 = sitofp <16 x i16> undef to <16 x float> - ; CHECK: Found an estimated cost of 4 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float> %r236 = uitofp <16 x i32> undef to <16 x float> - ; CHECK: Found an estimated cost of 4 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> %r237 = sitofp <16 x i32> undef to <16 x float> - ; CHECK: Found an estimated cost of 231 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> %r238 = uitofp <16 x i64> undef to <16 x float> - ; CHECK: Found an estimated cost of 231 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> %r239 = sitofp <16 x i64> undef to <16 x float> - ; CHECK: Found an estimated cost of 71 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> %r240 = uitofp <16 x i1> undef to <16 x double> - ; CHECK: Found an estimated cost of 71 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> %r241 = sitofp <16 x i1> undef to <16 x double> - ; CHECK: Found an estimated cost of 39 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> %r242 = uitofp <16 x i8> undef to <16 x double> - ; CHECK: Found an estimated cost of 39 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> %r243 = sitofp <16 x i8> undef to <16 x double> - ; CHECK: Found an estimated cost of 31 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> %r244 = uitofp <16 x i16> undef to <16 x double> - ; CHECK: Found an estimated cost of 31 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> %r245 = sitofp <16 x i16> undef to <16 x double> - ; CHECK: Found an estimated cost of 31 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> %r246 = uitofp <16 x i16> undef to <16 x double> - ; CHECK: Found an estimated cost of 31 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> %r247 = sitofp <16 x i16> undef to <16 x double> - ; CHECK: Found an estimated cost of 199 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> %r248 = uitofp <16 x i64> undef to <16 x double> - ; CHECK: Found an estimated cost of 199 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> %r249 = sitofp <16 x i64> undef to <16 x double> - ; CHECK: Found an estimated cost of 0 for instruction: ret i32 undef + ret i32 undef +} + + +define i32 @load_extends() { +; CHECK-NEON-LABEL: 'load_extends' +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, i16* undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, i32* undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r3 = zext i8 %loadi8 to i32 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i8 %loadi8 to i64 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i8 %loadi8 to i64 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r6 = sext i16 %loadi16 to i32 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r7 = zext i16 %loadi16 to i32 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r8 = sext i16 %loadi16 to i64 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = zext i16 %loadi16 to i64 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = sext i32 %loadi32 to i64 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = zext i32 %loadi32 to i64 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = sext <8 x i8> %loadv8i8 to <8 x i16> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = zext <8 x i8> %loadv8i8 to <8 x i16> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2 = sext <4 x i8> %loadv4i8 to <4 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3 = zext <4 x i8> %loadv4i8 to <4 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; CHECK-MVE-LABEL: 'load_extends' +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, i16* undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, i32* undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r3 = zext i8 %loadi8 to i32 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i8 %loadi8 to i64 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i8 %loadi8 to i64 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r6 = sext i16 %loadi16 to i32 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r7 = zext i16 %loadi16 to i32 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r8 = sext i16 %loadi16 to i64 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = zext i16 %loadi16 to i64 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = sext i32 %loadi32 to i64 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = zext i32 %loadi32 to i64 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v0 = sext <8 x i8> %loadv8i8 to <8 x i16> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v1 = zext <8 x i8> %loadv8i8 to <8 x i16> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2 = sext <4 x i8> %loadv4i8 to <4 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3 = zext <4 x i8> %loadv4i8 to <4 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; + + %loadi8 = load i8, i8* undef + %loadi16 = load i16, i16* undef + %loadi32 = load i32, i32* undef + %loadv2i8 = load <2 x i8>, <2 x i8>* undef + %loadv4i8 = load <4 x i8>, <4 x i8>* undef + %loadv8i8 = load <8 x i8>, <8 x i8>* undef + %loadv2i16 = load <2 x i16>, <2 x i16>* undef + %loadv4i16 = load <4 x i16>, <4 x i16>* undef + %loadv2i32 = load <2 x i32>, <2 x i32>* undef + + %r0 = sext i8 %loadi8 to i16 + %r1 = zext i8 %loadi8 to i16 + %r2 = sext i8 %loadi8 to i32 + %r3 = zext i8 %loadi8 to i32 + %r4 = sext i8 %loadi8 to i64 + %r5 = zext i8 %loadi8 to i64 + %r6 = sext i16 %loadi16 to i32 + %r7 = zext i16 %loadi16 to i32 + %r8 = sext i16 %loadi16 to i64 + %r9 = zext i16 %loadi16 to i64 + %r10 = sext i32 %loadi32 to i64 + %r11 = zext i32 %loadi32 to i64 + + %v0 = sext <8 x i8> %loadv8i8 to <8 x i16> + %v1 = zext <8 x i8> %loadv8i8 to <8 x i16> + %v2 = sext <4 x i8> %loadv4i8 to <4 x i32> + %v3 = zext <4 x i8> %loadv4i8 to <4 x i32> + %v4 = sext <2 x i8> %loadv2i8 to <2 x i64> + %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> + %v6 = sext <4 x i16> %loadv4i16 to <4 x i32> + %v7 = zext <4 x i16> %loadv4i16 to <4 x i32> + %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> + %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> + %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> + %v11 = zext <2 x i32> %loadv2i32 to <2 x i64> + ret i32 undef } diff --git a/llvm/test/Analysis/CostModel/ARM/divrem.ll b/llvm/test/Analysis/CostModel/ARM/divrem.ll index c4ac59b59..d20fcb522 100644 --- a/llvm/test/Analysis/CostModel/ARM/divrem.ll +++ b/llvm/test/Analysis/CostModel/ARM/divrem.ll @@ -1,450 +1,838 @@ -; RUN: opt < %s -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=cortex-a9 | FileCheck %s - -define <2 x i8> @sdiv_v2_i8(<2 x i8> %a, <2 x i8> %b) { - ; CHECK: sdiv_v2_i8 - ; CHECK: cost of 40 {{.*}} sdiv - - %1 = sdiv <2 x i8> %a, %b - ret <2 x i8> %1 -} -define <2 x i16> @sdiv_v2_i16(<2 x i16> %a, <2 x i16> %b) { - ; CHECK: sdiv_v2_i16 - ; CHECK: cost of 40 {{.*}} sdiv - - %1 = sdiv <2 x i16> %a, %b - ret <2 x i16> %1 -} -define <2 x i32> @sdiv_v2_i32(<2 x i32> %a, <2 x i32> %b) { - ; CHECK: sdiv_v2_i32 - ; CHECK: cost of 40 {{.*}} sdiv - - %1 = sdiv <2 x i32> %a, %b - ret <2 x i32> %1 -} -define <2 x i64> @sdiv_v2_i64(<2 x i64> %a, <2 x i64> %b) { - ; CHECK: sdiv_v2_i64 - ; CHECK: cost of 40 {{.*}} sdiv - - %1 = sdiv <2 x i64> %a, %b - ret <2 x i64> %1 -} -define <4 x i8> @sdiv_v4_i8(<4 x i8> %a, <4 x i8> %b) { - ; CHECK: sdiv_v4_i8 - ; CHECK: cost of 10 {{.*}} sdiv - - %1 = sdiv <4 x i8> %a, %b - ret <4 x i8> %1 -} -define <4 x i16> @sdiv_v4_i16(<4 x i16> %a, <4 x i16> %b) { - ; CHECK: sdiv_v4_i16 - ; CHECK: cost of 10 {{.*}} sdiv - - %1 = sdiv <4 x i16> %a, %b - ret <4 x i16> %1 -} -define <4 x i32> @sdiv_v4_i32(<4 x i32> %a, <4 x i32> %b) { - ; CHECK: sdiv_v4_i32 - ; CHECK: cost of 80 {{.*}} sdiv - - %1 = sdiv <4 x i32> %a, %b - ret <4 x i32> %1 -} -define <4 x i64> @sdiv_v4_i64(<4 x i64> %a, <4 x i64> %b) { - ; CHECK: sdiv_v4_i64 - ; CHECK: cost of 80 {{.*}} sdiv - - %1 = sdiv <4 x i64> %a, %b - ret <4 x i64> %1 -} -define <8 x i8> @sdiv_v8_i8(<8 x i8> %a, <8 x i8> %b) { - ; CHECK: sdiv_v8_i8 - ; CHECK: cost of 10 {{.*}} sdiv - - %1 = sdiv <8 x i8> %a, %b - ret <8 x i8> %1 -} -define <8 x i16> @sdiv_v8_i16(<8 x i16> %a, <8 x i16> %b) { - ; CHECK: sdiv_v8_i16 - ; CHECK: cost of 160 {{.*}} sdiv - - %1 = sdiv <8 x i16> %a, %b - ret <8 x i16> %1 -} -define <8 x i32> @sdiv_v8_i32(<8 x i32> %a, <8 x i32> %b) { - ; CHECK: sdiv_v8_i32 - ; CHECK: cost of 160 {{.*}} sdiv - - %1 = sdiv <8 x i32> %a, %b - ret <8 x i32> %1 -} -define <8 x i64> @sdiv_v8_i64(<8 x i64> %a, <8 x i64> %b) { - ; CHECK: sdiv_v8_i64 - ; CHECK: cost of 160 {{.*}} sdiv - - %1 = sdiv <8 x i64> %a, %b - ret <8 x i64> %1 -} -define <16 x i8> @sdiv_v16_i8(<16 x i8> %a, <16 x i8> %b) { - ; CHECK: sdiv_v16_i8 - ; CHECK: cost of 320 {{.*}} sdiv - - %1 = sdiv <16 x i8> %a, %b - ret <16 x i8> %1 -} -define <16 x i16> @sdiv_v16_i16(<16 x i16> %a, <16 x i16> %b) { - ; CHECK: sdiv_v16_i16 - ; CHECK: cost of 320 {{.*}} sdiv - - %1 = sdiv <16 x i16> %a, %b - ret <16 x i16> %1 -} -define <16 x i32> @sdiv_v16_i32(<16 x i32> %a, <16 x i32> %b) { - ; CHECK: sdiv_v16_i32 - ; CHECK: cost of 320 {{.*}} sdiv - - %1 = sdiv <16 x i32> %a, %b - ret <16 x i32> %1 -} -define <16 x i64> @sdiv_v16_i64(<16 x i64> %a, <16 x i64> %b) { - ; CHECK: sdiv_v16_i64 - ; CHECK: cost of 320 {{.*}} sdiv - - %1 = sdiv <16 x i64> %a, %b - ret <16 x i64> %1 -} -define <2 x i8> @udiv_v2_i8(<2 x i8> %a, <2 x i8> %b) { - ; CHECK: udiv_v2_i8 - ; CHECK: cost of 40 {{.*}} udiv - - %1 = udiv <2 x i8> %a, %b - ret <2 x i8> %1 -} -define <2 x i16> @udiv_v2_i16(<2 x i16> %a, <2 x i16> %b) { - ; CHECK: udiv_v2_i16 - ; CHECK: cost of 40 {{.*}} udiv - - %1 = udiv <2 x i16> %a, %b - ret <2 x i16> %1 -} -define <2 x i32> @udiv_v2_i32(<2 x i32> %a, <2 x i32> %b) { - ; CHECK: udiv_v2_i32 - ; CHECK: cost of 40 {{.*}} udiv - - %1 = udiv <2 x i32> %a, %b - ret <2 x i32> %1 -} -define <2 x i64> @udiv_v2_i64(<2 x i64> %a, <2 x i64> %b) { - ; CHECK: udiv_v2_i64 - ; CHECK: cost of 40 {{.*}} udiv - - %1 = udiv <2 x i64> %a, %b - ret <2 x i64> %1 -} -define <4 x i8> @udiv_v4_i8(<4 x i8> %a, <4 x i8> %b) { - ; CHECK: udiv_v4_i8 - ; CHECK: cost of 10 {{.*}} udiv - - %1 = udiv <4 x i8> %a, %b - ret <4 x i8> %1 -} -define <4 x i16> @udiv_v4_i16(<4 x i16> %a, <4 x i16> %b) { - ; CHECK: udiv_v4_i16 - ; CHECK: cost of 10 {{.*}} udiv - - %1 = udiv <4 x i16> %a, %b - ret <4 x i16> %1 -} -define <4 x i32> @udiv_v4_i32(<4 x i32> %a, <4 x i32> %b) { - ; CHECK: udiv_v4_i32 - ; CHECK: cost of 80 {{.*}} udiv - - %1 = udiv <4 x i32> %a, %b - ret <4 x i32> %1 -} -define <4 x i64> @udiv_v4_i64(<4 x i64> %a, <4 x i64> %b) { - ; CHECK: udiv_v4_i64 - ; CHECK: cost of 80 {{.*}} udiv - - %1 = udiv <4 x i64> %a, %b - ret <4 x i64> %1 -} -define <8 x i8> @udiv_v8_i8(<8 x i8> %a, <8 x i8> %b) { - ; CHECK: udiv_v8_i8 - ; CHECK: cost of 10 {{.*}} udiv - - %1 = udiv <8 x i8> %a, %b - ret <8 x i8> %1 -} -define <8 x i16> @udiv_v8_i16(<8 x i16> %a, <8 x i16> %b) { - ; CHECK: udiv_v8_i16 - ; CHECK: cost of 160 {{.*}} udiv - - %1 = udiv <8 x i16> %a, %b - ret <8 x i16> %1 -} -define <8 x i32> @udiv_v8_i32(<8 x i32> %a, <8 x i32> %b) { - ; CHECK: udiv_v8_i32 - ; CHECK: cost of 160 {{.*}} udiv - - %1 = udiv <8 x i32> %a, %b - ret <8 x i32> %1 -} -define <8 x i64> @udiv_v8_i64(<8 x i64> %a, <8 x i64> %b) { - ; CHECK: udiv_v8_i64 - ; CHECK: cost of 160 {{.*}} udiv - - %1 = udiv <8 x i64> %a, %b - ret <8 x i64> %1 -} -define <16 x i8> @udiv_v16_i8(<16 x i8> %a, <16 x i8> %b) { - ; CHECK: udiv_v16_i8 - ; CHECK: cost of 320 {{.*}} udiv - - %1 = udiv <16 x i8> %a, %b - ret <16 x i8> %1 -} -define <16 x i16> @udiv_v16_i16(<16 x i16> %a, <16 x i16> %b) { - ; CHECK: udiv_v16_i16 - ; CHECK: cost of 320 {{.*}} udiv - - %1 = udiv <16 x i16> %a, %b - ret <16 x i16> %1 -} -define <16 x i32> @udiv_v16_i32(<16 x i32> %a, <16 x i32> %b) { - ; CHECK: udiv_v16_i32 - ; CHECK: cost of 320 {{.*}} udiv - - %1 = udiv <16 x i32> %a, %b - ret <16 x i32> %1 -} -define <16 x i64> @udiv_v16_i64(<16 x i64> %a, <16 x i64> %b) { - ; CHECK: udiv_v16_i64 - ; CHECK: cost of 320 {{.*}} udiv - - %1 = udiv <16 x i64> %a, %b - ret <16 x i64> %1 +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=cortex-a9 | FileCheck %s --check-prefix=CHECK-NEON +; RUN: opt -cost-model -analyze -mtriple=thumbv8.1-m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefix=CHECK-MVE + +define void @i8() { +; CHECK-NEON-LABEL: 'i8' +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = sdiv i8 undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = udiv i8 undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = srem i8 undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = urem i8 undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = sdiv i8 undef, 2 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = udiv i8 undef, 2 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i8 undef, 2 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i8 undef, 2 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVE-LABEL: 'i8' +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i8 undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = udiv i8 undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = srem i8 undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = urem i8 undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = sdiv i8 undef, 2 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i8 undef, 2 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i8 undef, 2 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i8 undef, 2 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %1 = sdiv i8 undef, undef + %2 = udiv i8 undef, undef + %3 = srem i8 undef, undef + %4 = urem i8 undef, undef + %5 = sdiv i8 undef, 2 + %6 = udiv i8 undef, 2 + %7 = srem i8 undef, 2 + %8 = urem i8 undef, 2 + ret void +} + +define void @i16() { +; CHECK-NEON-LABEL: 'i16' +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = sdiv i16 undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = udiv i16 undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = srem i16 undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = urem i16 undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = sdiv i16 undef, 2 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = udiv i16 undef, 2 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i16 undef, 2 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i16 undef, 2 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVE-LABEL: 'i16' +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i16 undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = udiv i16 undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = srem i16 undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = urem i16 undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = sdiv i16 undef, 2 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i16 undef, 2 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i16 undef, 2 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i16 undef, 2 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %1 = sdiv i16 undef, undef + %2 = udiv i16 undef, undef + %3 = srem i16 undef, undef + %4 = urem i16 undef, undef + %5 = sdiv i16 undef, 2 + %6 = udiv i16 undef, 2 + %7 = srem i16 undef, 2 + %8 = urem i16 undef, 2 + ret void +} + +define void @i32() { +; CHECK-NEON-LABEL: 'i32' +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = sdiv i32 undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = udiv i32 undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = srem i32 undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = urem i32 undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = sdiv i32 undef, 2 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = udiv i32 undef, 2 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i32 undef, 2 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i32 undef, 2 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVE-LABEL: 'i32' +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i32 undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = udiv i32 undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = srem i32 undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = urem i32 undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = sdiv i32 undef, 2 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i32 undef, 2 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i32 undef, 2 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i32 undef, 2 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %1 = sdiv i32 undef, undef + %2 = udiv i32 undef, undef + %3 = srem i32 undef, undef + %4 = urem i32 undef, undef + %5 = sdiv i32 undef, 2 + %6 = udiv i32 undef, 2 + %7 = srem i32 undef, 2 + %8 = urem i32 undef, 2 + ret void +} + +define void @i64() { +; CHECK-NEON-LABEL: 'i64' +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = sdiv i64 undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = udiv i64 undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = srem i64 undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = urem i64 undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = sdiv i64 undef, 2 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %6 = udiv i64 undef, 2 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i64 undef, 2 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i64 undef, 2 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVE-LABEL: 'i64' +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i64 undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = udiv i64 undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = srem i64 undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = urem i64 undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = sdiv i64 undef, 2 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i64 undef, 2 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i64 undef, 2 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i64 undef, 2 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %1 = sdiv i64 undef, undef + %2 = udiv i64 undef, undef + %3 = srem i64 undef, undef + %4 = urem i64 undef, undef + %5 = sdiv i64 undef, 2 + %6 = udiv i64 undef, 2 + %7 = srem i64 undef, 2 + %8 = urem i64 undef, 2 + ret void +} + +define void @f16() { +; CHECK-NEON-LABEL: 'f16' +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv half undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = frem half undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = fdiv half undef, 0xH4000 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = frem half undef, 0xH4000 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVE-LABEL: 'f16' +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = fdiv half undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = frem half undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = fdiv half undef, 0xH4000 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = frem half undef, 0xH4000 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %1 = fdiv half undef, undef + %2 = frem half undef, undef + %3 = fdiv half undef, 2. + %4 = frem half undef, 2. + ret void +} + +define void @f32() { +; CHECK-NEON-LABEL: 'f32' +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv float undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = frem float undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = fdiv float undef, 2.000000e+00 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = frem float undef, 2.000000e+00 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVE-LABEL: 'f32' +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = fdiv float undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = frem float undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = fdiv float undef, 2.000000e+00 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = frem float undef, 2.000000e+00 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %1 = fdiv float undef, undef + %2 = frem float undef, undef + %3 = fdiv float undef, 2. + %4 = frem float undef, 2. + ret void +} + +define void @f64() { +; CHECK-NEON-LABEL: 'f64' +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv double undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = frem double undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = fdiv double undef, 2.000000e+00 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = frem double undef, 2.000000e+00 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVE-LABEL: 'f64' +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = fdiv double undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = frem double undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = fdiv double undef, 2.000000e+00 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = frem double undef, 2.000000e+00 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %1 = fdiv double undef, undef + %2 = frem double undef, undef + %3 = fdiv double undef, 2. + %4 = frem double undef, 2. + ret void +} + +define void @vi8() { +; CHECK-NEON-LABEL: 'vi8' +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i8> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i8> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i8> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i8> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i8> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i8> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i8> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i8> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e1 = sdiv <8 x i8> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e2 = udiv <8 x i8> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i8> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i8> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i8> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i8> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i8> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i8> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVE-LABEL: 'vi8' +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t1 = sdiv <2 x i8> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t2 = udiv <2 x i8> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t3 = srem <2 x i8> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t4 = urem <2 x i8> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i8> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i8> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i8> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i8> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e1 = sdiv <8 x i8> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e2 = udiv <8 x i8> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e3 = srem <8 x i8> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e4 = urem <8 x i8> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s1 = sdiv <16 x i8> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s2 = udiv <16 x i8> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s3 = srem <16 x i8> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s4 = urem <16 x i8> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %t1 = sdiv <2 x i8> undef, undef + %t2 = udiv <2 x i8> undef, undef + %t3 = srem <2 x i8> undef, undef + %t4 = urem <2 x i8> undef, undef + %f1 = sdiv <4 x i8> undef, undef + %f2 = udiv <4 x i8> undef, undef + %f3 = srem <4 x i8> undef, undef + %f4 = urem <4 x i8> undef, undef + %e1 = sdiv <8 x i8> undef, undef + %e2 = udiv <8 x i8> undef, undef + %e3 = srem <8 x i8> undef, undef + %e4 = urem <8 x i8> undef, undef + %s1 = sdiv <16 x i8> undef, undef + %s2 = udiv <16 x i8> undef, undef + %s3 = srem <16 x i8> undef, undef + %s4 = urem <16 x i8> undef, undef + ret void +} + +define void @vi16() { +; CHECK-NEON-LABEL: 'vi16' +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i16> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i16> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i16> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i16> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i16> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i16> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i16> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i16> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i16> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i16> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i16> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i16> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i16> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i16> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i16> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i16> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVE-LABEL: 'vi16' +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t1 = sdiv <2 x i16> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t2 = udiv <2 x i16> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t3 = srem <2 x i16> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t4 = urem <2 x i16> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i16> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i16> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i16> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i16> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e1 = sdiv <8 x i16> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e2 = udiv <8 x i16> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e3 = srem <8 x i16> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e4 = urem <8 x i16> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s1 = sdiv <16 x i16> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s2 = udiv <16 x i16> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s3 = srem <16 x i16> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s4 = urem <16 x i16> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %t1 = sdiv <2 x i16> undef, undef + %t2 = udiv <2 x i16> undef, undef + %t3 = srem <2 x i16> undef, undef + %t4 = urem <2 x i16> undef, undef + %f1 = sdiv <4 x i16> undef, undef + %f2 = udiv <4 x i16> undef, undef + %f3 = srem <4 x i16> undef, undef + %f4 = urem <4 x i16> undef, undef + %e1 = sdiv <8 x i16> undef, undef + %e2 = udiv <8 x i16> undef, undef + %e3 = srem <8 x i16> undef, undef + %e4 = urem <8 x i16> undef, undef + %s1 = sdiv <16 x i16> undef, undef + %s2 = udiv <16 x i16> undef, undef + %s3 = srem <16 x i16> undef, undef + %s4 = urem <16 x i16> undef, undef + ret void +} + +define void @vi32() { +; CHECK-NEON-LABEL: 'vi32' +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i32> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i32> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i32> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i32> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i32> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i32> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i32> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i32> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i32> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i32> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i32> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i32> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i32> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i32> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i32> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i32> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVE-LABEL: 'vi32' +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t1 = sdiv <2 x i32> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t2 = udiv <2 x i32> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t3 = srem <2 x i32> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t4 = urem <2 x i32> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i32> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i32> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i32> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i32> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e1 = sdiv <8 x i32> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e2 = udiv <8 x i32> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e3 = srem <8 x i32> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e4 = urem <8 x i32> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s1 = sdiv <16 x i32> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s2 = udiv <16 x i32> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s3 = srem <16 x i32> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s4 = urem <16 x i32> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %t1 = sdiv <2 x i32> undef, undef + %t2 = udiv <2 x i32> undef, undef + %t3 = srem <2 x i32> undef, undef + %t4 = urem <2 x i32> undef, undef + %f1 = sdiv <4 x i32> undef, undef + %f2 = udiv <4 x i32> undef, undef + %f3 = srem <4 x i32> undef, undef + %f4 = urem <4 x i32> undef, undef + %e1 = sdiv <8 x i32> undef, undef + %e2 = udiv <8 x i32> undef, undef + %e3 = srem <8 x i32> undef, undef + %e4 = urem <8 x i32> undef, undef + %s1 = sdiv <16 x i32> undef, undef + %s2 = udiv <16 x i32> undef, undef + %s3 = srem <16 x i32> undef, undef + %s4 = urem <16 x i32> undef, undef + ret void +} + +define void @vi64() { +; CHECK-NEON-LABEL: 'vi64' +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i64> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i64> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i64> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i64> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i64> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i64> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i64> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i64> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i64> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i64> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i64> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i64> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i64> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i64> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i64> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i64> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVE-LABEL: 'vi64' +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t1 = sdiv <2 x i64> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t2 = udiv <2 x i64> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t3 = srem <2 x i64> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t4 = urem <2 x i64> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i64> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i64> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i64> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i64> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e1 = sdiv <8 x i64> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e2 = udiv <8 x i64> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e3 = srem <8 x i64> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e4 = urem <8 x i64> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s1 = sdiv <16 x i64> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s2 = udiv <16 x i64> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s3 = srem <16 x i64> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s4 = urem <16 x i64> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %t1 = sdiv <2 x i64> undef, undef + %t2 = udiv <2 x i64> undef, undef + %t3 = srem <2 x i64> undef, undef + %t4 = urem <2 x i64> undef, undef + %f1 = sdiv <4 x i64> undef, undef + %f2 = udiv <4 x i64> undef, undef + %f3 = srem <4 x i64> undef, undef + %f4 = urem <4 x i64> undef, undef + %e1 = sdiv <8 x i64> undef, undef + %e2 = udiv <8 x i64> undef, undef + %e3 = srem <8 x i64> undef, undef + %e4 = urem <8 x i64> undef, undef + %s1 = sdiv <16 x i64> undef, undef + %s2 = udiv <16 x i64> undef, undef + %s3 = srem <16 x i64> undef, undef + %s4 = urem <16 x i64> undef, undef + ret void +} + +define void @vf16() { +; CHECK-NEON-LABEL: 'vf16' +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x half> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = fdiv <2 x half> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x half> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %4 = fdiv <4 x half> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = fdiv <8 x half> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVE-LABEL: 'vf16' +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = fdiv <2 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %2 = fdiv <2 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %3 = fdiv <4 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %4 = fdiv <4 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %5 = fdiv <8 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %6 = fdiv <8 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %1 = fdiv <2 x half> undef, undef + %2 = fdiv <2 x half> undef, undef + %3 = fdiv <4 x half> undef, undef + %4 = fdiv <4 x half> undef, undef + %5 = fdiv <8 x half> undef, undef + %6 = fdiv <8 x half> undef, undef + ret void +} + +define void @vf32() { +; CHECK-NEON-LABEL: 'vf32' +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %1 = fdiv <2 x float> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %2 = fdiv <2 x float> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %3 = fdiv <4 x float> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %4 = fdiv <4 x float> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %5 = fdiv <8 x float> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %6 = fdiv <8 x float> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVE-LABEL: 'vf32' +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = fdiv <2 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %2 = fdiv <2 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %3 = fdiv <4 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %4 = fdiv <4 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %5 = fdiv <8 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %6 = fdiv <8 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %1 = fdiv <2 x float> undef, undef + %2 = fdiv <2 x float> undef, undef + %3 = fdiv <4 x float> undef, undef + %4 = fdiv <4 x float> undef, undef + %5 = fdiv <8 x float> undef, undef + %6 = fdiv <8 x float> undef, undef + ret void +} + +define void @vf64() { +; CHECK-NEON-LABEL: 'vf64' +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = fdiv <2 x double> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %2 = fdiv <2 x double> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %3 = fdiv <4 x double> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %4 = fdiv <4 x double> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %5 = fdiv <8 x double> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %6 = fdiv <8 x double> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVE-LABEL: 'vf64' +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = fdiv <2 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %2 = fdiv <2 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %3 = fdiv <4 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %4 = fdiv <4 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %5 = fdiv <8 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %6 = fdiv <8 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %1 = fdiv <2 x double> undef, undef + %2 = fdiv <2 x double> undef, undef + %3 = fdiv <4 x double> undef, undef + %4 = fdiv <4 x double> undef, undef + %5 = fdiv <8 x double> undef, undef + %6 = fdiv <8 x double> undef, undef + ret void +} + +define void @vi8_2() { +; CHECK-NEON-LABEL: 'vi8_2' +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i8> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i8> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i8> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i8> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i8> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i8> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i8> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i8> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e1 = sdiv <8 x i8> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e2 = udiv <8 x i8> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i8> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i8> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i8> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i8> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i8> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i8> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVE-LABEL: 'vi8_2' +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t1 = sdiv <2 x i8> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t2 = udiv <2 x i8> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t3 = srem <2 x i8> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t4 = urem <2 x i8> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i8> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i8> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i8> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i8> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e1 = sdiv <8 x i8> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e2 = udiv <8 x i8> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e3 = srem <8 x i8> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e4 = urem <8 x i8> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s1 = sdiv <16 x i8> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s2 = udiv <16 x i8> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s3 = srem <16 x i8> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s4 = urem <16 x i8> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %t1 = sdiv <2 x i8> undef, + %t2 = udiv <2 x i8> undef, + %t3 = srem <2 x i8> undef, + %t4 = urem <2 x i8> undef, + %f1 = sdiv <4 x i8> undef, + %f2 = udiv <4 x i8> undef, + %f3 = srem <4 x i8> undef, + %f4 = urem <4 x i8> undef, + %e1 = sdiv <8 x i8> undef, + %e2 = udiv <8 x i8> undef, + %e3 = srem <8 x i8> undef, + %e4 = urem <8 x i8> undef, + %s1 = sdiv <16 x i8> undef, + %s2 = udiv <16 x i8> undef, + %s3 = srem <16 x i8> undef, + %s4 = urem <16 x i8> undef, + ret void +} + +define void @vi16_2() { +; CHECK-NEON-LABEL: 'vi16_2' +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i16> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i16> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i16> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i16> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i16> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i16> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i16> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i16> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i16> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i16> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i16> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i16> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i16> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i16> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i16> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i16> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVE-LABEL: 'vi16_2' +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t1 = sdiv <2 x i16> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t2 = udiv <2 x i16> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t3 = srem <2 x i16> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t4 = urem <2 x i16> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i16> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i16> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i16> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i16> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e1 = sdiv <8 x i16> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e2 = udiv <8 x i16> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e3 = srem <8 x i16> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e4 = urem <8 x i16> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s1 = sdiv <16 x i16> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s2 = udiv <16 x i16> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s3 = srem <16 x i16> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s4 = urem <16 x i16> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %t1 = sdiv <2 x i16> undef, + %t2 = udiv <2 x i16> undef, + %t3 = srem <2 x i16> undef, + %t4 = urem <2 x i16> undef, + %f1 = sdiv <4 x i16> undef, + %f2 = udiv <4 x i16> undef, + %f3 = srem <4 x i16> undef, + %f4 = urem <4 x i16> undef, + %e1 = sdiv <8 x i16> undef, + %e2 = udiv <8 x i16> undef, + %e3 = srem <8 x i16> undef, + %e4 = urem <8 x i16> undef, + %s1 = sdiv <16 x i16> undef, + %s2 = udiv <16 x i16> undef, + %s3 = srem <16 x i16> undef, + %s4 = urem <16 x i16> undef, + ret void +} + +define void @vi32_2() { +; CHECK-NEON-LABEL: 'vi32_2' +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i32> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i32> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i32> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i32> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i32> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i32> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i32> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i32> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i32> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i32> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i32> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i32> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i32> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i32> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i32> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i32> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVE-LABEL: 'vi32_2' +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t1 = sdiv <2 x i32> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t2 = udiv <2 x i32> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t3 = srem <2 x i32> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t4 = urem <2 x i32> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i32> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i32> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i32> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i32> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e1 = sdiv <8 x i32> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e2 = udiv <8 x i32> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e3 = srem <8 x i32> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e4 = urem <8 x i32> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s1 = sdiv <16 x i32> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s2 = udiv <16 x i32> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s3 = srem <16 x i32> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s4 = urem <16 x i32> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %t1 = sdiv <2 x i32> undef, + %t2 = udiv <2 x i32> undef, + %t3 = srem <2 x i32> undef, + %t4 = urem <2 x i32> undef, + %f1 = sdiv <4 x i32> undef, + %f2 = udiv <4 x i32> undef, + %f3 = srem <4 x i32> undef, + %f4 = urem <4 x i32> undef, + %e1 = sdiv <8 x i32> undef, + %e2 = udiv <8 x i32> undef, + %e3 = srem <8 x i32> undef, + %e4 = urem <8 x i32> undef, + %s1 = sdiv <16 x i32> undef, + %s2 = udiv <16 x i32> undef, + %s3 = srem <16 x i32> undef, + %s4 = urem <16 x i32> undef, + ret void +} + +define void @vi64_2() { +; CHECK-NEON-LABEL: 'vi64_2' +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i64> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i64> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i64> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i64> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i64> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i64> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i64> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i64> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i64> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i64> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i64> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i64> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i64> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i64> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i64> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i64> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVE-LABEL: 'vi64_2' +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t1 = sdiv <2 x i64> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t2 = udiv <2 x i64> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t3 = srem <2 x i64> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t4 = urem <2 x i64> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i64> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i64> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i64> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i64> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e1 = sdiv <8 x i64> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e2 = udiv <8 x i64> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e3 = srem <8 x i64> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e4 = urem <8 x i64> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s1 = sdiv <16 x i64> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s2 = udiv <16 x i64> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s3 = srem <16 x i64> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s4 = urem <16 x i64> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %t1 = sdiv <2 x i64> undef, + %t2 = udiv <2 x i64> undef, + %t3 = srem <2 x i64> undef, + %t4 = urem <2 x i64> undef, + %f1 = sdiv <4 x i64> undef, + %f2 = udiv <4 x i64> undef, + %f3 = srem <4 x i64> undef, + %f4 = urem <4 x i64> undef, + %e1 = sdiv <8 x i64> undef, + %e2 = udiv <8 x i64> undef, + %e3 = srem <8 x i64> undef, + %e4 = urem <8 x i64> undef, + %s1 = sdiv <16 x i64> undef, + %s2 = udiv <16 x i64> undef, + %s3 = srem <16 x i64> undef, + %s4 = urem <16 x i64> undef, + ret void +} + +define void @vf16_2() { +; CHECK-NEON-LABEL: 'vf16_2' +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x half> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = fdiv <2 x half> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x half> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %4 = fdiv <4 x half> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = fdiv <8 x half> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVE-LABEL: 'vf16_2' +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = fdiv <2 x half> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %2 = fdiv <2 x half> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %3 = fdiv <4 x half> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %4 = fdiv <4 x half> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %5 = fdiv <8 x half> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %6 = fdiv <8 x half> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %1 = fdiv <2 x half> undef, + %2 = fdiv <2 x half> undef, + %3 = fdiv <4 x half> undef, + %4 = fdiv <4 x half> undef, + %5 = fdiv <8 x half> undef, + %6 = fdiv <8 x half> undef, + ret void +} + +define void @vf32_2() { +; CHECK-NEON-LABEL: 'vf32_2' +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %1 = fdiv <2 x float> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %2 = fdiv <2 x float> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %3 = fdiv <4 x float> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %4 = fdiv <4 x float> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %5 = fdiv <8 x float> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %6 = fdiv <8 x float> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVE-LABEL: 'vf32_2' +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = fdiv <2 x float> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %2 = fdiv <2 x float> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %3 = fdiv <4 x float> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %4 = fdiv <4 x float> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %5 = fdiv <8 x float> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %6 = fdiv <8 x float> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %1 = fdiv <2 x float> undef, + %2 = fdiv <2 x float> undef, + %3 = fdiv <4 x float> undef, + %4 = fdiv <4 x float> undef, + %5 = fdiv <8 x float> undef, + %6 = fdiv <8 x float> undef, + ret void +} + +define void @vf64_2() { +; CHECK-NEON-LABEL: 'vf64_2' +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = fdiv <2 x double> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %2 = fdiv <2 x double> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %3 = fdiv <4 x double> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %4 = fdiv <4 x double> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %5 = fdiv <8 x double> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %6 = fdiv <8 x double> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVE-LABEL: 'vf64_2' +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = fdiv <2 x double> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %2 = fdiv <2 x double> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %3 = fdiv <4 x double> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %4 = fdiv <4 x double> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %5 = fdiv <8 x double> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %6 = fdiv <8 x double> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %1 = fdiv <2 x double> undef, + %2 = fdiv <2 x double> undef, + %3 = fdiv <4 x double> undef, + %4 = fdiv <4 x double> undef, + %5 = fdiv <8 x double> undef, + %6 = fdiv <8 x double> undef, + ret void } -define <2 x i8> @srem_v2_i8(<2 x i8> %a, <2 x i8> %b) { - ; CHECK: srem_v2_i8 - ; CHECK: cost of 40 {{.*}} srem - %1 = srem <2 x i8> %a, %b - ret <2 x i8> %1 -} -define <2 x i16> @srem_v2_i16(<2 x i16> %a, <2 x i16> %b) { - ; CHECK: srem_v2_i16 - ; CHECK: cost of 40 {{.*}} srem - - %1 = srem <2 x i16> %a, %b - ret <2 x i16> %1 -} -define <2 x i32> @srem_v2_i32(<2 x i32> %a, <2 x i32> %b) { - ; CHECK: srem_v2_i32 - ; CHECK: cost of 40 {{.*}} srem - - %1 = srem <2 x i32> %a, %b - ret <2 x i32> %1 -} -define <2 x i64> @srem_v2_i64(<2 x i64> %a, <2 x i64> %b) { - ; CHECK: srem_v2_i64 - ; CHECK: cost of 40 {{.*}} srem - - %1 = srem <2 x i64> %a, %b - ret <2 x i64> %1 -} -define <4 x i8> @srem_v4_i8(<4 x i8> %a, <4 x i8> %b) { - ; CHECK: srem_v4_i8 - ; CHECK: cost of 80 {{.*}} srem - - %1 = srem <4 x i8> %a, %b - ret <4 x i8> %1 -} -define <4 x i16> @srem_v4_i16(<4 x i16> %a, <4 x i16> %b) { - ; CHECK: srem_v4_i16 - ; CHECK: cost of 80 {{.*}} srem - - %1 = srem <4 x i16> %a, %b - ret <4 x i16> %1 -} -define <4 x i32> @srem_v4_i32(<4 x i32> %a, <4 x i32> %b) { - ; CHECK: srem_v4_i32 - ; CHECK: cost of 80 {{.*}} srem - - %1 = srem <4 x i32> %a, %b - ret <4 x i32> %1 -} -define <4 x i64> @srem_v4_i64(<4 x i64> %a, <4 x i64> %b) { - ; CHECK: srem_v4_i64 - ; CHECK: cost of 80 {{.*}} srem - - %1 = srem <4 x i64> %a, %b - ret <4 x i64> %1 -} -define <8 x i8> @srem_v8_i8(<8 x i8> %a, <8 x i8> %b) { - ; CHECK: srem_v8_i8 - ; CHECK: cost of 160 {{.*}} srem - - %1 = srem <8 x i8> %a, %b - ret <8 x i8> %1 -} -define <8 x i16> @srem_v8_i16(<8 x i16> %a, <8 x i16> %b) { - ; CHECK: srem_v8_i16 - ; CHECK: cost of 160 {{.*}} srem - - %1 = srem <8 x i16> %a, %b - ret <8 x i16> %1 -} -define <8 x i32> @srem_v8_i32(<8 x i32> %a, <8 x i32> %b) { - ; CHECK: srem_v8_i32 - ; CHECK: cost of 160 {{.*}} srem - - %1 = srem <8 x i32> %a, %b - ret <8 x i32> %1 -} -define <8 x i64> @srem_v8_i64(<8 x i64> %a, <8 x i64> %b) { - ; CHECK: srem_v8_i64 - ; CHECK: cost of 160 {{.*}} srem - - %1 = srem <8 x i64> %a, %b - ret <8 x i64> %1 -} -define <16 x i8> @srem_v16_i8(<16 x i8> %a, <16 x i8> %b) { - ; CHECK: srem_v16_i8 - ; CHECK: cost of 320 {{.*}} srem - - %1 = srem <16 x i8> %a, %b - ret <16 x i8> %1 -} -define <16 x i16> @srem_v16_i16(<16 x i16> %a, <16 x i16> %b) { - ; CHECK: srem_v16_i16 - ; CHECK: cost of 320 {{.*}} srem - - %1 = srem <16 x i16> %a, %b - ret <16 x i16> %1 -} -define <16 x i32> @srem_v16_i32(<16 x i32> %a, <16 x i32> %b) { - ; CHECK: srem_v16_i32 - ; CHECK: cost of 320 {{.*}} srem - - %1 = srem <16 x i32> %a, %b - ret <16 x i32> %1 -} -define <16 x i64> @srem_v16_i64(<16 x i64> %a, <16 x i64> %b) { - ; CHECK: srem_v16_i64 - ; CHECK: cost of 320 {{.*}} srem - - %1 = srem <16 x i64> %a, %b - ret <16 x i64> %1 -} -define <2 x i8> @urem_v2_i8(<2 x i8> %a, <2 x i8> %b) { - ; CHECK: urem_v2_i8 - ; CHECK: cost of 40 {{.*}} urem - - %1 = urem <2 x i8> %a, %b - ret <2 x i8> %1 -} -define <2 x i16> @urem_v2_i16(<2 x i16> %a, <2 x i16> %b) { - ; CHECK: urem_v2_i16 - ; CHECK: cost of 40 {{.*}} urem - - %1 = urem <2 x i16> %a, %b - ret <2 x i16> %1 -} -define <2 x i32> @urem_v2_i32(<2 x i32> %a, <2 x i32> %b) { - ; CHECK: urem_v2_i32 - ; CHECK: cost of 40 {{.*}} urem - - %1 = urem <2 x i32> %a, %b - ret <2 x i32> %1 -} -define <2 x i64> @urem_v2_i64(<2 x i64> %a, <2 x i64> %b) { - ; CHECK: urem_v2_i64 - ; CHECK: cost of 40 {{.*}} urem - - %1 = urem <2 x i64> %a, %b - ret <2 x i64> %1 -} -define <4 x i8> @urem_v4_i8(<4 x i8> %a, <4 x i8> %b) { - ; CHECK: urem_v4_i8 - ; CHECK: cost of 80 {{.*}} urem - - %1 = urem <4 x i8> %a, %b - ret <4 x i8> %1 -} -define <4 x i16> @urem_v4_i16(<4 x i16> %a, <4 x i16> %b) { - ; CHECK: urem_v4_i16 - ; CHECK: cost of 80 {{.*}} urem - - %1 = urem <4 x i16> %a, %b - ret <4 x i16> %1 -} -define <4 x i32> @urem_v4_i32(<4 x i32> %a, <4 x i32> %b) { - ; CHECK: urem_v4_i32 - ; CHECK: cost of 80 {{.*}} urem - - %1 = urem <4 x i32> %a, %b - ret <4 x i32> %1 -} -define <4 x i64> @urem_v4_i64(<4 x i64> %a, <4 x i64> %b) { - ; CHECK: urem_v4_i64 - ; CHECK: cost of 80 {{.*}} urem - - %1 = urem <4 x i64> %a, %b - ret <4 x i64> %1 -} -define <8 x i8> @urem_v8_i8(<8 x i8> %a, <8 x i8> %b) { - ; CHECK: urem_v8_i8 - ; CHECK: cost of 160 {{.*}} urem - - %1 = urem <8 x i8> %a, %b - ret <8 x i8> %1 -} -define <8 x i16> @urem_v8_i16(<8 x i16> %a, <8 x i16> %b) { - ; CHECK: urem_v8_i16 - ; CHECK: cost of 160 {{.*}} urem - - %1 = urem <8 x i16> %a, %b - ret <8 x i16> %1 -} -define <8 x i32> @urem_v8_i32(<8 x i32> %a, <8 x i32> %b) { - ; CHECK: urem_v8_i32 - ; CHECK: cost of 160 {{.*}} urem - - %1 = urem <8 x i32> %a, %b - ret <8 x i32> %1 -} -define <8 x i64> @urem_v8_i64(<8 x i64> %a, <8 x i64> %b) { - ; CHECK: urem_v8_i64 - ; CHECK: cost of 160 {{.*}} urem - - %1 = urem <8 x i64> %a, %b - ret <8 x i64> %1 -} -define <16 x i8> @urem_v16_i8(<16 x i8> %a, <16 x i8> %b) { - ; CHECK: urem_v16_i8 - ; CHECK: cost of 320 {{.*}} urem - - %1 = urem <16 x i8> %a, %b - ret <16 x i8> %1 -} -define <16 x i16> @urem_v16_i16(<16 x i16> %a, <16 x i16> %b) { - ; CHECK: urem_v16_i16 - ; CHECK: cost of 320 {{.*}} urem - - %1 = urem <16 x i16> %a, %b - ret <16 x i16> %1 -} -define <16 x i32> @urem_v16_i32(<16 x i32> %a, <16 x i32> %b) { - ; CHECK: urem_v16_i32 - ; CHECK: cost of 320 {{.*}} urem - - %1 = urem <16 x i32> %a, %b - ret <16 x i32> %1 -} -define <16 x i64> @urem_v16_i64(<16 x i64> %a, <16 x i64> %b) { - ; CHECK: urem_v16_i64 - ; CHECK: cost of 320 {{.*}} urem - - %1 = urem <16 x i64> %a, %b - ret <16 x i64> %1 -} diff --git a/llvm/test/Analysis/CostModel/ARM/fparith.ll b/llvm/test/Analysis/CostModel/ARM/fparith.ll new file mode 100644 index 000000000..bc2cd104a --- /dev/null +++ b/llvm/test/Analysis/CostModel/ARM/fparith.ll @@ -0,0 +1,172 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -cost-model -analyze -mtriple=thumbv8.1-m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefix=CHECK-MVE +; RUN: opt -cost-model -analyze -mtriple=thumbv8.1-m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefix=CHECK-MVEFP + +define void @f32() { +; CHECK-MVE-LABEL: 'f32' +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = fadd float undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = fsub float undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fmul float undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVEFP-LABEL: 'f32' +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = fadd float undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = fsub float undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fmul float undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %c = fadd float undef, undef + %d = fsub float undef, undef + %e = fmul float undef, undef + ret void +} + +define void @f16() { +; CHECK-MVE-LABEL: 'f16' +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = fadd half undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = fsub half undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fmul half undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVEFP-LABEL: 'f16' +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = fadd half undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = fsub half undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fmul half undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %c = fadd half undef, undef + %d = fsub half undef, undef + %e = fmul half undef, undef + ret void +} + +define void @f64() { +; CHECK-MVE-LABEL: 'f64' +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = fadd double undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = fsub double undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fmul double undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVEFP-LABEL: 'f64' +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = fadd double undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = fsub double undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fmul double undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %c = fadd double undef, undef + %d = fsub double undef, undef + %e = fmul double undef, undef + ret void +} + +define void @vf32() { +; CHECK-MVE-LABEL: 'vf32' +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %c2 = fadd <2 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %d2 = fsub <2 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %e2 = fmul <2 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %c4 = fadd <4 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %d4 = fsub <4 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %e4 = fmul <4 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %c8 = fadd <8 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %d8 = fsub <8 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e8 = fmul <8 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVEFP-LABEL: 'vf32' +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c2 = fadd <2 x float> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d2 = fsub <2 x float> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e2 = fmul <2 x float> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c4 = fadd <4 x float> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d4 = fsub <4 x float> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e4 = fmul <4 x float> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c8 = fadd <8 x float> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d8 = fsub <8 x float> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e8 = fmul <8 x float> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %c2 = fadd <2 x float> undef, undef + %d2 = fsub <2 x float> undef, undef + %e2 = fmul <2 x float> undef, undef + %c4 = fadd <4 x float> undef, undef + %d4 = fsub <4 x float> undef, undef + %e4 = fmul <4 x float> undef, undef + %c8 = fadd <8 x float> undef, undef + %d8 = fsub <8 x float> undef, undef + %e8 = fmul <8 x float> undef, undef + ret void +} + +define void @vf16() { +; CHECK-MVE-LABEL: 'vf16' +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %c2 = fadd <2 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %d2 = fsub <2 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %e2 = fmul <2 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %c4 = fadd <4 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %d4 = fsub <4 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %e4 = fmul <4 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %c8 = fadd <8 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %d8 = fsub <8 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e8 = fmul <8 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVEFP-LABEL: 'vf16' +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c2 = fadd <2 x half> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d2 = fsub <2 x half> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e2 = fmul <2 x half> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c4 = fadd <4 x half> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d4 = fsub <4 x half> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e4 = fmul <4 x half> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c8 = fadd <8 x half> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d8 = fsub <8 x half> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e8 = fmul <8 x half> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %c2 = fadd <2 x half> undef, undef + %d2 = fsub <2 x half> undef, undef + %e2 = fmul <2 x half> undef, undef + %c4 = fadd <4 x half> undef, undef + %d4 = fsub <4 x half> undef, undef + %e4 = fmul <4 x half> undef, undef + %c8 = fadd <8 x half> undef, undef + %d8 = fsub <8 x half> undef, undef + %e8 = fmul <8 x half> undef, undef + ret void +} + +define void @vf64() { +; CHECK-MVE-LABEL: 'vf64' +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %c2 = fadd <2 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %d2 = fsub <2 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %e2 = fmul <2 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %c4 = fadd <4 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %d4 = fsub <4 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %e4 = fmul <4 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %c8 = fadd <8 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %d8 = fsub <8 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e8 = fmul <8 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVEFP-LABEL: 'vf64' +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %c2 = fadd <2 x double> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %d2 = fsub <2 x double> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %e2 = fmul <2 x double> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %c4 = fadd <4 x double> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %d4 = fsub <4 x double> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %e4 = fmul <4 x double> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %c8 = fadd <8 x double> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %d8 = fsub <8 x double> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e8 = fmul <8 x double> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %c2 = fadd <2 x double> undef, undef + %d2 = fsub <2 x double> undef, undef + %e2 = fmul <2 x double> undef, undef + %c4 = fadd <4 x double> undef, undef + %d4 = fsub <4 x double> undef, undef + %e4 = fmul <4 x double> undef, undef + %c8 = fadd <8 x double> undef, undef + %d8 = fsub <8 x double> undef, undef + %e8 = fmul <8 x double> undef, undef + ret void +} + diff --git a/llvm/test/Analysis/CostModel/ARM/freeshift.ll b/llvm/test/Analysis/CostModel/ARM/freeshift.ll new file mode 100644 index 000000000..464b55225 --- /dev/null +++ b/llvm/test/Analysis/CostModel/ARM/freeshift.ll @@ -0,0 +1,96 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -cost-model -analyze -mtriple=thumbv8.1m.main-none-eabi < %s | FileCheck %s + +define void @shl(i32 %a, i32 %b) { +; CHECK-LABEL: 'shl' +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %as = shl i32 %a, 3 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ac = add i32 %b, %as +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ss = shl i32 %a, 3 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sc = sub i32 %b, %ss +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %xs = shl i32 %a, 3 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xc = xor i32 %b, %xs +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ns = shl i32 %a, 3 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nc = and i32 %b, %ns +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %os = shl i32 %a, 3 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %oc = or i32 %b, %os +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %is = shl i32 %a, 3 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ic = icmp eq i32 %b, %is +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %as = shl i32 %a, 3 + %ac = add i32 %b, %as + %ss = shl i32 %a, 3 + %sc = sub i32 %b, %ss + %xs = shl i32 %a, 3 + %xc = xor i32 %b, %xs + %ns = shl i32 %a, 3 + %nc = and i32 %b, %ns + %os = shl i32 %a, 3 + %oc = or i32 %b, %os + %is = shl i32 %a, 3 + %ic = icmp eq i32 %b, %is + ret void +} + +define void @ashr(i32 %a, i32 %b) { +; CHECK-LABEL: 'ashr' +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %as = ashr i32 %a, 3 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ac = add i32 %b, %as +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ss = ashr i32 %a, 3 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sc = sub i32 %b, %ss +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %xs = ashr i32 %a, 3 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xc = xor i32 %b, %xs +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ns = ashr i32 %a, 3 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nc = and i32 %b, %ns +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %os = ashr i32 %a, 3 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %oc = or i32 %b, %os +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %is = ashr i32 %a, 3 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ic = icmp eq i32 %b, %is +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %as = ashr i32 %a, 3 + %ac = add i32 %b, %as + %ss = ashr i32 %a, 3 + %sc = sub i32 %b, %ss + %xs = ashr i32 %a, 3 + %xc = xor i32 %b, %xs + %ns = ashr i32 %a, 3 + %nc = and i32 %b, %ns + %os = ashr i32 %a, 3 + %oc = or i32 %b, %os + %is = ashr i32 %a, 3 + %ic = icmp eq i32 %b, %is + ret void +} + +define void @lshr(i32 %a, i32 %b) { +; CHECK-LABEL: 'lshr' +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %as = lshr i32 %a, 3 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ac = add i32 %b, %as +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ss = lshr i32 %a, 3 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sc = sub i32 %b, %ss +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %xs = lshr i32 %a, 3 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xc = xor i32 %b, %xs +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ns = lshr i32 %a, 3 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nc = and i32 %b, %ns +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %os = lshr i32 %a, 3 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %oc = or i32 %b, %os +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %is = lshr i32 %a, 3 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ic = icmp eq i32 %b, %is +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %as = lshr i32 %a, 3 + %ac = add i32 %b, %as + %ss = lshr i32 %a, 3 + %sc = sub i32 %b, %ss + %xs = lshr i32 %a, 3 + %xc = xor i32 %b, %xs + %ns = lshr i32 %a, 3 + %nc = and i32 %b, %ns + %os = lshr i32 %a, 3 + %oc = or i32 %b, %os + %is = lshr i32 %a, 3 + %ic = icmp eq i32 %b, %is + ret void +} + diff --git a/llvm/test/Analysis/CostModel/ARM/load_store.ll b/llvm/test/Analysis/CostModel/ARM/load_store.ll new file mode 100644 index 000000000..89b18d90f --- /dev/null +++ b/llvm/test/Analysis/CostModel/ARM/load_store.ll @@ -0,0 +1,278 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -cost-model -analyze -mtriple=thumbv6m-none-eabi < %s | FileCheck %s --check-prefix=CHECK-NOVEC +; RUN: opt -cost-model -analyze -mtriple=thumbv7m-none-eabi -mcpu=cortex-m3 < %s | FileCheck %s --check-prefix=CHECK-NOVEC +; RUN: opt -cost-model -analyze -mtriple=thumbv7m-none-eabi -mcpu=cortex-m4 < %s | FileCheck %s --check-prefix=CHECK-FP +; RUN: opt -cost-model -analyze -mtriple=thumbv8.1-m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefix=CHECK-MVE +; RUN: opt -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift < %s | FileCheck %s --check-prefix=CHECK-NEON +; RUN: opt -cost-model -analyze -mtriple=arm-none-eabi -mcpu=cortex-a53 < %s | FileCheck %s --check-prefix=CHECK-NEON + +define void @stores() { +; CHECK-NOVEC-LABEL: 'stores' +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 undef, i64* undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 undef, i128* undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, float* undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store double undef, double* undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> undef, <2 x i8>* undef, align 1 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i16> undef, <2 x i16>* undef, align 2 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i32> undef, <2 x i32>* undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 2 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> undef, <16 x i8>* undef, align 1 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x float> undef, <4 x float>* undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <4 x double> undef, <4 x double>* undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x float> undef, <2 x float>* undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x double> undef, <2 x double>* undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 1 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 1 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 1 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x float> undef, <4 x float>* undef, align 1 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x double> undef, <2 x double>* undef, align 1 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-FP-LABEL: 'stores' +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 undef, i64* undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 undef, i128* undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, float* undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, double* undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> undef, <2 x i8>* undef, align 1 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i16> undef, <2 x i16>* undef, align 2 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i32> undef, <2 x i32>* undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 2 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> undef, <16 x i8>* undef, align 1 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x float> undef, <4 x float>* undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x double> undef, <4 x double>* undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x float> undef, <2 x float>* undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x double> undef, <2 x double>* undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 1 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 1 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 1 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x float> undef, <4 x float>* undef, align 1 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x double> undef, <2 x double>* undef, align 1 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVE-LABEL: 'stores' +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 undef, i64* undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 undef, i128* undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, float* undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, double* undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> undef, <2 x i8>* undef, align 1 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i16> undef, <2 x i16>* undef, align 2 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i32> undef, <2 x i32>* undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 2 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i8> undef, <16 x i8>* undef, align 1 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x float> undef, <4 x float>* undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x double> undef, <4 x double>* undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x float> undef, <2 x float>* undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x double> undef, <2 x double>* undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 1 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 1 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 1 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x float> undef, <4 x float>* undef, align 1 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x double> undef, <2 x double>* undef, align 1 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-NEON-LABEL: 'stores' +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 undef, i64* undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 undef, i128* undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, float* undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, double* undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, <2 x i8>* undef, align 1 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, <2 x i16>* undef, align 2 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, <2 x i32>* undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 2 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, <16 x i8>* undef, align 1 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, <4 x float>* undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <4 x double> undef, <4 x double>* undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, <2 x float>* undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x double> undef, <2 x double>* undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 1 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 1 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 1 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, <4 x float>* undef, align 1 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x double> undef, <2 x double>* undef, align 1 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + store i8 undef, i8* undef, align 4 + store i16 undef, i16* undef, align 4 + store i32 undef, i32* undef, align 4 + store i64 undef, i64* undef, align 4 + store i128 undef, i128* undef, align 4 + store float undef, float* undef, align 4 + store double undef, double* undef, align 4 + + store <2 x i8> undef, <2 x i8>* undef, align 1 + store <2 x i16> undef, <2 x i16>* undef, align 2 + store <2 x i32> undef, <2 x i32>* undef, align 4 + store <2 x i64> undef, <2 x i64>* undef, align 4 + store <4 x i32> undef, <4 x i32>* undef, align 4 + store <8 x i16> undef, <8 x i16>* undef, align 2 + store <16 x i8> undef, <16 x i8>* undef, align 1 + + store <4 x float> undef, <4 x float>* undef, align 4 + store <4 x double> undef, <4 x double>* undef, align 4 + store <2 x float> undef, <2 x float>* undef, align 4 + store <2 x double> undef, <2 x double>* undef, align 4 + + store <2 x i64> undef, <2 x i64>* undef, align 1 + store <4 x i32> undef, <4 x i32>* undef, align 1 + store <8 x i16> undef, <8 x i16>* undef, align 1 + store <4 x float> undef, <4 x float>* undef, align 1 + store <2 x double> undef, <2 x double>* undef, align 1 + + ret void +} + +define void @loads() { +; CHECK-NOVEC-LABEL: 'loads' +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, i16* undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = load i64, i64* undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = load i128, i128* undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load float, float* undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = load double, double* undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = load <2 x i8>, <2 x i8>* undef, align 1 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = load <2 x i16>, <2 x i16>* undef, align 2 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = load <2 x i32>, <2 x i32>* undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %11 = load <2 x i64>, <2 x i64>* undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %12 = load <4 x i32>, <4 x i32>* undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %13 = load <8 x i16>, <8 x i16>* undef, align 2 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %14 = load <16 x i8>, <16 x i8>* undef, align 1 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %15 = load <4 x float>, <4 x float>* undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %16 = load <4 x double>, <4 x double>* undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <2 x float>, <2 x float>* undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %18 = load <2 x double>, <2 x double>* undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %19 = load <2 x i64>, <2 x i64>* undef, align 1 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = load <4 x i32>, <4 x i32>* undef, align 1 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %21 = load <8 x i16>, <8 x i16>* undef, align 1 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %22 = load <4 x float>, <4 x float>* undef, align 1 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %23 = load <2 x double>, <2 x double>* undef, align 1 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-FP-LABEL: 'loads' +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, i16* undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = load i64, i64* undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = load i128, i128* undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load float, float* undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = load double, double* undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = load <2 x i8>, <2 x i8>* undef, align 1 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = load <2 x i16>, <2 x i16>* undef, align 2 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = load <2 x i32>, <2 x i32>* undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %11 = load <2 x i64>, <2 x i64>* undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %12 = load <4 x i32>, <4 x i32>* undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %13 = load <8 x i16>, <8 x i16>* undef, align 2 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %14 = load <16 x i8>, <16 x i8>* undef, align 1 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %15 = load <4 x float>, <4 x float>* undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %16 = load <4 x double>, <4 x double>* undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <2 x float>, <2 x float>* undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %18 = load <2 x double>, <2 x double>* undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %19 = load <2 x i64>, <2 x i64>* undef, align 1 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = load <4 x i32>, <4 x i32>* undef, align 1 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %21 = load <8 x i16>, <8 x i16>* undef, align 1 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %22 = load <4 x float>, <4 x float>* undef, align 1 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %23 = load <2 x double>, <2 x double>* undef, align 1 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVE-LABEL: 'loads' +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, i16* undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = load i64, i64* undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = load i128, i128* undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load float, float* undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = load double, double* undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = load <2 x i8>, <2 x i8>* undef, align 1 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = load <2 x i16>, <2 x i16>* undef, align 2 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = load <2 x i32>, <2 x i32>* undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = load <2 x i64>, <2 x i64>* undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = load <4 x i32>, <4 x i32>* undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %13 = load <8 x i16>, <8 x i16>* undef, align 2 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = load <16 x i8>, <16 x i8>* undef, align 1 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = load <4 x float>, <4 x float>* undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %16 = load <4 x double>, <4 x double>* undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <2 x float>, <2 x float>* undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %18 = load <2 x double>, <2 x double>* undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = load <2 x i64>, <2 x i64>* undef, align 1 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %20 = load <4 x i32>, <4 x i32>* undef, align 1 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %21 = load <8 x i16>, <8 x i16>* undef, align 1 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = load <4 x float>, <4 x float>* undef, align 1 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %23 = load <2 x double>, <2 x double>* undef, align 1 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-NEON-LABEL: 'loads' +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, i16* undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = load i64, i64* undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = load i128, i128* undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load float, float* undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = load double, double* undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load <2 x i8>, <2 x i8>* undef, align 1 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load <2 x i16>, <2 x i16>* undef, align 2 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load <2 x i32>, <2 x i32>* undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <2 x i64>, <2 x i64>* undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <4 x i32>, <4 x i32>* undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <8 x i16>, <8 x i16>* undef, align 2 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <16 x i8>, <16 x i8>* undef, align 1 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x float>, <4 x float>* undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %16 = load <4 x double>, <4 x double>* undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = load <2 x float>, <2 x float>* undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %18 = load <2 x double>, <2 x double>* undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x i64>, <2 x i64>* undef, align 1 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <4 x i32>, <4 x i32>* undef, align 1 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <8 x i16>, <8 x i16>* undef, align 1 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <4 x float>, <4 x float>* undef, align 1 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %23 = load <2 x double>, <2 x double>* undef, align 1 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + load i8, i8* undef, align 4 + load i16, i16* undef, align 4 + load i32, i32* undef, align 4 + load i64, i64* undef, align 4 + load i128, i128* undef, align 4 + load float, float* undef, align 4 + load double, double* undef, align 4 + + load <2 x i8>, <2 x i8>* undef, align 1 + load <2 x i16>, <2 x i16>* undef, align 2 + load <2 x i32>, <2 x i32>* undef, align 4 + load <2 x i64>, <2 x i64>* undef, align 4 + load <4 x i32>, <4 x i32>* undef, align 4 + load <8 x i16>, <8 x i16>* undef, align 2 + load <16 x i8>, <16 x i8>* undef, align 1 + + load <4 x float>, <4 x float>* undef, align 4 + load <4 x double>, <4 x double>* undef, align 4 + load <2 x float>, <2 x float>* undef, align 4 + load <2 x double>, <2 x double>* undef, align 4 + + load <2 x i64>, <2 x i64>* undef, align 1 + load <4 x i32>, <4 x i32>* undef, align 1 + load <8 x i16>, <8 x i16>* undef, align 1 + load <4 x float>, <4 x float>* undef, align 1 + load <2 x double>, <2 x double>* undef, align 1 + + ret void +} + diff --git a/llvm/test/Analysis/CostModel/ARM/select.ll b/llvm/test/Analysis/CostModel/ARM/select.ll index 57e1418a3..4821438e8 100644 --- a/llvm/test/Analysis/CostModel/ARM/select.ll +++ b/llvm/test/Analysis/CostModel/ARM/select.ll @@ -1,74 +1,107 @@ -; RUN: opt < %s -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s -target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" -target triple = "thumbv7-apple-ios6.0.0" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -cost-model -analyze -mtriple=thumbv8.1-m.main-none-eabi -mattr=+mve.fp | FileCheck %s --check-prefix=CHECK-MVE +; RUN: opt < %s -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s --check-prefix=CHECK-NEON -; CHECK: casts define void @casts() { - ; Scalar values - ; CHECK: cost of 1 {{.*}} select +; CHECK-MVE-LABEL: 'casts' +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2 = select i1 undef, i16 undef, i16 undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3 = select i1 undef, i32 undef, i32 undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4 = select i1 undef, i64 undef, i64 undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5 = select i1 undef, float undef, float undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v6 = select i1 undef, double undef, double undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-NEON-LABEL: 'casts' +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2 = select i1 undef, i16 undef, i16 undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3 = select i1 undef, i32 undef, i32 undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4 = select i1 undef, i64 undef, i64 undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5 = select i1 undef, float undef, float undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v6 = select i1 undef, double undef, double undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + ; Scalar values %v1 = select i1 undef, i8 undef, i8 undef - ; CHECK: cost of 1 {{.*}} select %v2 = select i1 undef, i16 undef, i16 undef - ; CHECK: cost of 1 {{.*}} select %v3 = select i1 undef, i32 undef, i32 undef - ; CHECK: cost of 2 {{.*}} select %v4 = select i1 undef, i64 undef, i64 undef - ; CHECK: cost of 1 {{.*}} select %v5 = select i1 undef, float undef, float undef - ; CHECK: cost of 1 {{.*}} select %v6 = select i1 undef, double undef, double undef - ; Vector values - ; CHECK: cost of 1 {{.*}} select + ; Vector values %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef - ; CHECK: cost of 1 {{.*}} select %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef - ; CHECK: cost of 1 {{.*}} select %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef - ; CHECK: cost of 1 {{.*}} select %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef - ; CHECK: cost of 1 {{.*}} select %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef - ; CHECK: cost of 1 {{.*}} select %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef - ; CHECK: cost of 1 {{.*}} select %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef - ; CHECK: cost of 2 {{.*}} select %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef - ; CHECK: cost of 1 {{.*}} select %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef - ; CHECK: cost of 1 {{.*}} select %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef - ; CHECK: cost of 2 {{.*}} select %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef - ; CHECK: cost of 4 {{.*}} select %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef - ; CHECK: cost of 1 {{.*}} select %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef - ; CHECK: cost of 19 {{.*}} select %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef - ; CHECK: cost of 50 {{.*}} select %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef - ; CHECK: cost of 100 {{.*}} select %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef - ; CHECK: cost of 1 {{.*}} select %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef - ; CHECK: cost of 1 {{.*}} select %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef - ; CHECK: cost of 1 {{.*}} select %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef ; odd vectors get legalized and should have similar costs - ; CHECK: cost of 1 {{.*}} select %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef - ; CHECK: cost of 1 {{.*}} select %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef - ; CHECK: cost of 4 {{.*}} select %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef ret void diff --git a/llvm/test/Analysis/CostModel/ARM/shuffle.ll b/llvm/test/Analysis/CostModel/ARM/shuffle.ll index a6a423525..ce2098653 100644 --- a/llvm/test/Analysis/CostModel/ARM/shuffle.ll +++ b/llvm/test/Analysis/CostModel/ARM/shuffle.ll @@ -1,71 +1,104 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s -target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" -target triple = "thumbv7-apple-ios6.0.0" +; RUN: opt < %s -cost-model -analyze -mtriple=thumbv8.1-m.main-none-eabi -mattr=+mve.fp | FileCheck %s --check-prefix=CHECK-MVE +; RUN: opt < %s -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s --check-prefix=CHECK-NEON define void @broadcast() { -; CHECK-LABEL: 'broadcast' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v14 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVE-LABEL: 'broadcast' +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v14 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Unknown cost for instruction: %v18 = shufflevector <8 x half> undef, <8 x half> undef, <4 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; - %v7 = shufflevector <2 x i8> undef, <2 x i8>undef, <2 x i32> zeroinitializer - %v8 = shufflevector <4 x i8> undef, <4 x i8>undef, <4 x i32> zeroinitializer - %v9 = shufflevector <8 x i8> undef, <8 x i8>undef, <8 x i32> zeroinitializer - %v10 = shufflevector <16 x i8> undef, <16 x i8>undef, <16 x i32> zeroinitializer +; CHECK-NEON-LABEL: 'broadcast' +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> zeroinitializer +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> zeroinitializer +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> zeroinitializer +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> zeroinitializer +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> zeroinitializer +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> zeroinitializer +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> zeroinitializer +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v14 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> zeroinitializer +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> zeroinitializer +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer +; CHECK-NEON-NEXT: Cost Model: Unknown cost for instruction: %v18 = shufflevector <8 x half> undef, <8 x half> undef, <4 x i32> zeroinitializer +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> zeroinitializer + %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> zeroinitializer + %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> zeroinitializer + %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> zeroinitializer - %v11 = shufflevector <2 x i16> undef, <2 x i16>undef, <2 x i32> zeroinitializer - %v12 = shufflevector <4 x i16> undef, <4 x i16>undef, <4 x i32> zeroinitializer - %v13 = shufflevector <8 x i16> undef, <8 x i16>undef, <8 x i32> zeroinitializer + %v11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> zeroinitializer + %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> zeroinitializer + %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> zeroinitializer - %v14 = shufflevector <2 x i32> undef, <2 x i32>undef, <2 x i32> zeroinitializer - %v15 = shufflevector <4 x i32> undef, <4 x i32>undef, <4 x i32> zeroinitializer + %v14 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> zeroinitializer + %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> zeroinitializer - %v16 = shufflevector <2 x float> undef, <2 x float>undef, <2 x i32> zeroinitializer - %v17 = shufflevector <4 x float> undef, <4 x float>undef, <4 x i32> zeroinitializer + %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> zeroinitializer + %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer + %v18 = shufflevector <8 x half> undef, <8 x half> undef, <4 x i32> zeroinitializer ret void } -;; Reverse shuffles should be lowered to vrev and possibly a vext (for quadwords) +;; Reverse shuffles should be lowered to vrev and possibly a vext (for quadwords, on neon) define void @reverse() { -; CHECK-LABEL: 'reverse' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v14 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVE-LABEL: 'reverse' +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v14 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v18 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-NEON-LABEL: 'reverse' +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v14 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v18 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; - %v7 = shufflevector <2 x i8> undef, <2 x i8>undef, <2 x i32> - %v8 = shufflevector <4 x i8> undef, <4 x i8>undef, <4 x i32> - %v9 = shufflevector <8 x i8> undef, <8 x i8>undef, <8 x i32> - %v10 = shufflevector <16 x i8> undef, <16 x i8>undef, <16 x i32> + %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> + %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> + %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> + %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> - %v11 = shufflevector <2 x i16> undef, <2 x i16>undef, <2 x i32> - %v12 = shufflevector <4 x i16> undef, <4 x i16>undef, <4 x i32> - %v13 = shufflevector <8 x i16> undef, <8 x i16>undef, <8 x i32> + %v11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> + %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> + %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> - %v14 = shufflevector <2 x i32> undef, <2 x i32>undef, <2 x i32> - %v15 = shufflevector <4 x i32> undef, <4 x i32>undef, <4 x i32> + %v14 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> + %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> - %v16 = shufflevector <2 x float> undef, <2 x float>undef, <2 x i32> - %v17 = shufflevector <4 x float> undef, <4 x float>undef, <4 x i32> + %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> + %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> + %v18 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> ret void } diff --git a/llvm/test/Analysis/CostModel/PowerPC/future-cost-model.ll b/llvm/test/Analysis/CostModel/PowerPC/future-cost-model.ll new file mode 100644 index 000000000..3e4fb82e6 --- /dev/null +++ b/llvm/test/Analysis/CostModel/PowerPC/future-cost-model.ll @@ -0,0 +1,16 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future | FileCheck %s --check-prefix=FUTURE +; RUN: opt < %s -cost-model -analyze -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr9 | FileCheck %s --check-prefix=PWR9 + +define void @test(i16 %p1, i16 %p2, <4 x i16> %p3, <4 x i16> %p4) { + %i1 = add i16 %p1, %p2 + %v1 = add <4 x i16> %p3, %p4 + ret void + ; FUTURE: cost of 1 {{.*}} add + ; FUTURE: cost of 1 {{.*}} add + + ; PWR9: cost of 1 {{.*}} add + ; PWR9: cost of 2 {{.*}} add +} + diff --git a/llvm/test/Analysis/CostModel/PowerPC/insert_extract.ll b/llvm/test/Analysis/CostModel/PowerPC/insert_extract.ll index 030af67d6..59dbd9411 100644 --- a/llvm/test/Analysis/CostModel/PowerPC/insert_extract.ll +++ b/llvm/test/Analysis/CostModel/PowerPC/insert_extract.ll @@ -14,15 +14,15 @@ define i32 @insert(i32 %arg) { ; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-P8LE-LABEL: 'insert' -; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %x = insertelement <4 x i32> undef, i32 %arg, i32 0 +; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %x = insertelement <4 x i32> undef, i32 %arg, i32 0 ; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-P9BE-LABEL: 'insert' -; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %x = insertelement <4 x i32> undef, i32 %arg, i32 0 +; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x = insertelement <4 x i32> undef, i32 %arg, i32 0 ; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-P9LE-LABEL: 'insert' -; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %x = insertelement <4 x i32> undef, i32 %arg, i32 0 +; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x = insertelement <4 x i32> undef, i32 %arg, i32 0 ; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %x = insertelement <4 x i32> undef, i32 %arg, i32 0 @@ -40,11 +40,11 @@ define i32 @extract(<4 x i32> %arg) { ; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %x ; ; CHECK-P9BE-LABEL: 'extract' -; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x = extractelement <4 x i32> %arg, i32 0 +; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %x = extractelement <4 x i32> %arg, i32 0 ; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %x ; ; CHECK-P9LE-LABEL: 'extract' -; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x = extractelement <4 x i32> %arg, i32 0 +; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %x = extractelement <4 x i32> %arg, i32 0 ; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %x ; %x = extractelement <4 x i32> %arg, i32 0 @@ -83,15 +83,15 @@ define void @test4xi32(<4 x i32> %v1, i32 %x1) { ; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-P8LE-LABEL: 'test4xi32' -; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v2 = insertelement <4 x i32> %v1, i32 %x1, i32 2 +; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2 = insertelement <4 x i32> %v1, i32 %x1, i32 2 ; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-P9BE-LABEL: 'test4xi32' -; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2 = insertelement <4 x i32> %v1, i32 %x1, i32 2 +; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = insertelement <4 x i32> %v1, i32 %x1, i32 2 ; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-P9LE-LABEL: 'test4xi32' -; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2 = insertelement <4 x i32> %v1, i32 %x1, i32 2 +; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = insertelement <4 x i32> %v1, i32 %x1, i32 2 ; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %v2 = insertelement <4 x i32> %v1, i32 %x1, i32 2 @@ -114,17 +114,17 @@ define void @vexti32(<4 x i32> %p1) { ; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-P9BE-LABEL: 'vexti32' -; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i1 = extractelement <4 x i32> %p1, i32 0 -; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i2 = extractelement <4 x i32> %p1, i32 1 -; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i3 = extractelement <4 x i32> %p1, i32 2 -; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i4 = extractelement <4 x i32> %p1, i32 3 +; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i1 = extractelement <4 x i32> %p1, i32 0 +; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i2 = extractelement <4 x i32> %p1, i32 1 +; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i3 = extractelement <4 x i32> %p1, i32 2 +; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i4 = extractelement <4 x i32> %p1, i32 3 ; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-P9LE-LABEL: 'vexti32' -; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i1 = extractelement <4 x i32> %p1, i32 0 -; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i2 = extractelement <4 x i32> %p1, i32 1 -; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i3 = extractelement <4 x i32> %p1, i32 2 -; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i4 = extractelement <4 x i32> %p1, i32 3 +; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i1 = extractelement <4 x i32> %p1, i32 0 +; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i2 = extractelement <4 x i32> %p1, i32 1 +; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i3 = extractelement <4 x i32> %p1, i32 2 +; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i4 = extractelement <4 x i32> %p1, i32 3 ; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %i1 = extractelement <4 x i32> %p1, i32 0 @@ -146,13 +146,13 @@ define void @vexti64(<2 x i64> %p1) { ; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-P9BE-LABEL: 'vexti64' -; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i1 = extractelement <2 x i64> %p1, i32 0 -; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i2 = extractelement <2 x i64> %p1, i32 1 +; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i1 = extractelement <2 x i64> %p1, i32 0 +; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i2 = extractelement <2 x i64> %p1, i32 1 ; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-P9LE-LABEL: 'vexti64' -; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i1 = extractelement <2 x i64> %p1, i32 0 -; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i2 = extractelement <2 x i64> %p1, i32 1 +; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i1 = extractelement <2 x i64> %p1, i32 0 +; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i2 = extractelement <2 x i64> %p1, i32 1 ; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %i1 = extractelement <2 x i64> %p1, i32 0 @@ -172,13 +172,13 @@ define void @vext(<8 x i16> %p1, <16 x i8> %p2) { ; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-P9BE-LABEL: 'vext' -; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i1 = extractelement <8 x i16> %p1, i32 0 -; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i2 = extractelement <16 x i8> %p2, i32 0 +; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i1 = extractelement <8 x i16> %p1, i32 0 +; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i2 = extractelement <16 x i8> %p2, i32 0 ; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-P9LE-LABEL: 'vext' -; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i1 = extractelement <8 x i16> %p1, i32 0 -; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i2 = extractelement <16 x i8> %p2, i32 0 +; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i1 = extractelement <8 x i16> %p1, i32 0 +; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i2 = extractelement <16 x i8> %p2, i32 0 ; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %i1 = extractelement <8 x i16> %p1, i32 0 diff --git a/llvm/test/Analysis/CostModel/SystemZ/fp-cast.ll b/llvm/test/Analysis/CostModel/SystemZ/fp-cast.ll index 9a43ca318..0c95b5373 100644 --- a/llvm/test/Analysis/CostModel/SystemZ/fp-cast.ll +++ b/llvm/test/Analysis/CostModel/SystemZ/fp-cast.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 \ ; RUN: | FileCheck %s -check-prefixes=CHECK,Z13 -; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=arch13 \ -; RUN: | FileCheck %s -check-prefixes=CHECK,AR13 +; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z15 \ +; RUN: | FileCheck %s -check-prefixes=CHECK,Z15 ; ; Note: The scalarized vector instructions costs are not including any ; extracts, due to the undef operands. @@ -118,7 +118,7 @@ define void @fptosi() { ; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v19 = fptosi <2 x double> undef to <2 x i8> ; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %v20 = fptosi <2 x float> undef to <2 x i64> ; Z13: Cost Model: Found an estimated cost of 12 for instruction: %v21 = fptosi <2 x float> undef to <2 x i32> -; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v21 = fptosi <2 x float> undef to <2 x i32> +; Z15: Cost Model: Found an estimated cost of 1 for instruction: %v21 = fptosi <2 x float> undef to <2 x i32> ; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v22 = fptosi <2 x float> undef to <2 x i16> ; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v23 = fptosi <2 x float> undef to <2 x i8> ; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v24 = fptosi <4 x fp128> undef to <4 x i64> @@ -131,7 +131,7 @@ define void @fptosi() { ; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v31 = fptosi <4 x double> undef to <4 x i8> ; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %v32 = fptosi <4 x float> undef to <4 x i64> ; Z13: Cost Model: Found an estimated cost of 12 for instruction: %v33 = fptosi <4 x float> undef to <4 x i32> -; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v33 = fptosi <4 x float> undef to <4 x i32> +; Z15: Cost Model: Found an estimated cost of 1 for instruction: %v33 = fptosi <4 x float> undef to <4 x i32> ; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v34 = fptosi <4 x float> undef to <4 x i16> ; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v35 = fptosi <4 x float> undef to <4 x i8> ; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v36 = fptosi <8 x fp128> undef to <8 x i64> @@ -144,7 +144,7 @@ define void @fptosi() { ; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v43 = fptosi <8 x double> undef to <8 x i8> ; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %v44 = fptosi <8 x float> undef to <8 x i64> ; Z13: Cost Model: Found an estimated cost of 24 for instruction: %v45 = fptosi <8 x float> undef to <8 x i32> -; AR13: Cost Model: Found an estimated cost of 2 for instruction: %v45 = fptosi <8 x float> undef to <8 x i32> +; Z15: Cost Model: Found an estimated cost of 2 for instruction: %v45 = fptosi <8 x float> undef to <8 x i32> ; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v46 = fptosi <8 x float> undef to <8 x i16> ; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v47 = fptosi <8 x float> undef to <8 x i8> ; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v48 = fptosi <16 x double> undef to <16 x i64> @@ -153,7 +153,7 @@ define void @fptosi() { ; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v51 = fptosi <16 x double> undef to <16 x i8> ; CHECK: Cost Model: Found an estimated cost of 40 for instruction: %v52 = fptosi <16 x float> undef to <16 x i64> ; Z13: Cost Model: Found an estimated cost of 48 for instruction: %v53 = fptosi <16 x float> undef to <16 x i32> -; AR13: Cost Model: Found an estimated cost of 4 for instruction: %v53 = fptosi <16 x float> undef to <16 x i32> +; Z15: Cost Model: Found an estimated cost of 4 for instruction: %v53 = fptosi <16 x float> undef to <16 x i32> ; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v54 = fptosi <16 x float> undef to <16 x i16> ; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v55 = fptosi <16 x float> undef to <16 x i8> @@ -241,7 +241,7 @@ define void @fptoui() { ; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v19 = fptoui <2 x double> undef to <2 x i8> ; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %v20 = fptoui <2 x float> undef to <2 x i64> ; Z13: Cost Model: Found an estimated cost of 12 for instruction: %v21 = fptoui <2 x float> undef to <2 x i32> -; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v21 = fptoui <2 x float> undef to <2 x i32> +; Z15: Cost Model: Found an estimated cost of 1 for instruction: %v21 = fptoui <2 x float> undef to <2 x i32> ; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v22 = fptoui <2 x float> undef to <2 x i16> ; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v23 = fptoui <2 x float> undef to <2 x i8> ; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v24 = fptoui <4 x fp128> undef to <4 x i64> @@ -254,7 +254,7 @@ define void @fptoui() { ; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v31 = fptoui <4 x double> undef to <4 x i8> ; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %v32 = fptoui <4 x float> undef to <4 x i64> ; Z13: Cost Model: Found an estimated cost of 12 for instruction: %v33 = fptoui <4 x float> undef to <4 x i32> -; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v33 = fptoui <4 x float> undef to <4 x i32> +; Z15: Cost Model: Found an estimated cost of 1 for instruction: %v33 = fptoui <4 x float> undef to <4 x i32> ; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v34 = fptoui <4 x float> undef to <4 x i16> ; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v35 = fptoui <4 x float> undef to <4 x i8> ; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v36 = fptoui <8 x fp128> undef to <8 x i64> @@ -267,7 +267,7 @@ define void @fptoui() { ; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v43 = fptoui <8 x double> undef to <8 x i8> ; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %v44 = fptoui <8 x float> undef to <8 x i64> ; Z13: Cost Model: Found an estimated cost of 24 for instruction: %v45 = fptoui <8 x float> undef to <8 x i32> -; AR13: Cost Model: Found an estimated cost of 2 for instruction: %v45 = fptoui <8 x float> undef to <8 x i32> +; Z15: Cost Model: Found an estimated cost of 2 for instruction: %v45 = fptoui <8 x float> undef to <8 x i32> ; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v46 = fptoui <8 x float> undef to <8 x i16> ; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v47 = fptoui <8 x float> undef to <8 x i8> ; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v48 = fptoui <16 x double> undef to <16 x i64> @@ -276,7 +276,7 @@ define void @fptoui() { ; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v51 = fptoui <16 x double> undef to <16 x i8> ; CHECK: Cost Model: Found an estimated cost of 40 for instruction: %v52 = fptoui <16 x float> undef to <16 x i64> ; Z13: Cost Model: Found an estimated cost of 48 for instruction: %v53 = fptoui <16 x float> undef to <16 x i32> -; AR13: Cost Model: Found an estimated cost of 4 for instruction: %v53 = fptoui <16 x float> undef to <16 x i32> +; Z15: Cost Model: Found an estimated cost of 4 for instruction: %v53 = fptoui <16 x float> undef to <16 x i32> ; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v54 = fptoui <16 x float> undef to <16 x i16> ; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v55 = fptoui <16 x float> undef to <16 x i8> @@ -391,7 +391,7 @@ define void @sitofp() { ; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %v15 = sitofp <2 x i32> undef to <2 x fp128> ; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %v16 = sitofp <2 x i32> undef to <2 x double> ; Z13: Cost Model: Found an estimated cost of 14 for instruction: %v17 = sitofp <2 x i32> undef to <2 x float> -; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v17 = sitofp <2 x i32> undef to <2 x float> +; Z15: Cost Model: Found an estimated cost of 1 for instruction: %v17 = sitofp <2 x i32> undef to <2 x float> ; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %v18 = sitofp <2 x i16> undef to <2 x fp128> ; CHECK: Cost Model: Found an estimated cost of 9 for instruction: %v19 = sitofp <2 x i16> undef to <2 x double> ; CHECK: Cost Model: Found an estimated cost of 9 for instruction: %v20 = sitofp <2 x i16> undef to <2 x float> @@ -404,7 +404,7 @@ define void @sitofp() { ; CHECK: Cost Model: Found an estimated cost of 9 for instruction: %v27 = sitofp <4 x i32> undef to <4 x fp128> ; CHECK: Cost Model: Found an estimated cost of 13 for instruction: %v28 = sitofp <4 x i32> undef to <4 x double> ; Z13: Cost Model: Found an estimated cost of 13 for instruction: %v29 = sitofp <4 x i32> undef to <4 x float> -; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v29 = sitofp <4 x i32> undef to <4 x float> +; Z15: Cost Model: Found an estimated cost of 1 for instruction: %v29 = sitofp <4 x i32> undef to <4 x float> ; CHECK: Cost Model: Found an estimated cost of 13 for instruction: %v30 = sitofp <4 x i16> undef to <4 x fp128> ; CHECK: Cost Model: Found an estimated cost of 17 for instruction: %v31 = sitofp <4 x i16> undef to <4 x double> ; CHECK: Cost Model: Found an estimated cost of 17 for instruction: %v32 = sitofp <4 x i16> undef to <4 x float> @@ -417,7 +417,7 @@ define void @sitofp() { ; CHECK: Cost Model: Found an estimated cost of 17 for instruction: %v39 = sitofp <8 x i32> undef to <8 x fp128> ; CHECK: Cost Model: Found an estimated cost of 25 for instruction: %v40 = sitofp <8 x i32> undef to <8 x double> ; Z13: Cost Model: Found an estimated cost of 25 for instruction: %v41 = sitofp <8 x i32> undef to <8 x float> -; AR13: Cost Model: Found an estimated cost of 2 for instruction: %v41 = sitofp <8 x i32> undef to <8 x float> +; Z15: Cost Model: Found an estimated cost of 2 for instruction: %v41 = sitofp <8 x i32> undef to <8 x float> ; CHECK: Cost Model: Found an estimated cost of 25 for instruction: %v42 = sitofp <8 x i16> undef to <8 x fp128> ; CHECK: Cost Model: Found an estimated cost of 33 for instruction: %v43 = sitofp <8 x i16> undef to <8 x double> ; CHECK: Cost Model: Found an estimated cost of 33 for instruction: %v44 = sitofp <8 x i16> undef to <8 x float> @@ -428,7 +428,7 @@ define void @sitofp() { ; CHECK: Cost Model: Found an estimated cost of 49 for instruction: %v49 = sitofp <16 x i64> undef to <16 x float> ; CHECK: Cost Model: Found an estimated cost of 49 for instruction: %v50 = sitofp <16 x i32> undef to <16 x double> ; Z13: Cost Model: Found an estimated cost of 49 for instruction: %v51 = sitofp <16 x i32> undef to <16 x float> -; AR13: Cost Model: Found an estimated cost of 4 for instruction: %v51 = sitofp <16 x i32> undef to <16 x float> +; Z15: Cost Model: Found an estimated cost of 4 for instruction: %v51 = sitofp <16 x i32> undef to <16 x float> ; CHECK: Cost Model: Found an estimated cost of 65 for instruction: %v52 = sitofp <16 x i16> undef to <16 x double> ; CHECK: Cost Model: Found an estimated cost of 65 for instruction: %v53 = sitofp <16 x i16> undef to <16 x float> ; CHECK: Cost Model: Found an estimated cost of 65 for instruction: %v54 = sitofp <16 x i8> undef to <16 x double> @@ -513,7 +513,7 @@ define void @uitofp() { ; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %v15 = uitofp <2 x i32> undef to <2 x fp128> ; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %v16 = uitofp <2 x i32> undef to <2 x double> ; Z13: Cost Model: Found an estimated cost of 14 for instruction: %v17 = uitofp <2 x i32> undef to <2 x float> -; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v17 = uitofp <2 x i32> undef to <2 x float> +; Z15: Cost Model: Found an estimated cost of 1 for instruction: %v17 = uitofp <2 x i32> undef to <2 x float> ; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %v18 = uitofp <2 x i16> undef to <2 x fp128> ; CHECK: Cost Model: Found an estimated cost of 9 for instruction: %v19 = uitofp <2 x i16> undef to <2 x double> ; CHECK: Cost Model: Found an estimated cost of 9 for instruction: %v20 = uitofp <2 x i16> undef to <2 x float> @@ -526,7 +526,7 @@ define void @uitofp() { ; CHECK: Cost Model: Found an estimated cost of 9 for instruction: %v27 = uitofp <4 x i32> undef to <4 x fp128> ; CHECK: Cost Model: Found an estimated cost of 13 for instruction: %v28 = uitofp <4 x i32> undef to <4 x double> ; Z13: Cost Model: Found an estimated cost of 13 for instruction: %v29 = uitofp <4 x i32> undef to <4 x float> -; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v29 = uitofp <4 x i32> undef to <4 x float> +; Z15: Cost Model: Found an estimated cost of 1 for instruction: %v29 = uitofp <4 x i32> undef to <4 x float> ; CHECK: Cost Model: Found an estimated cost of 13 for instruction: %v30 = uitofp <4 x i16> undef to <4 x fp128> ; CHECK: Cost Model: Found an estimated cost of 17 for instruction: %v31 = uitofp <4 x i16> undef to <4 x double> ; CHECK: Cost Model: Found an estimated cost of 17 for instruction: %v32 = uitofp <4 x i16> undef to <4 x float> @@ -539,7 +539,7 @@ define void @uitofp() { ; CHECK: Cost Model: Found an estimated cost of 17 for instruction: %v39 = uitofp <8 x i32> undef to <8 x fp128> ; CHECK: Cost Model: Found an estimated cost of 25 for instruction: %v40 = uitofp <8 x i32> undef to <8 x double> ; Z13: Cost Model: Found an estimated cost of 25 for instruction: %v41 = uitofp <8 x i32> undef to <8 x float> -; AR13: Cost Model: Found an estimated cost of 2 for instruction: %v41 = uitofp <8 x i32> undef to <8 x float> +; Z15: Cost Model: Found an estimated cost of 2 for instruction: %v41 = uitofp <8 x i32> undef to <8 x float> ; CHECK: Cost Model: Found an estimated cost of 25 for instruction: %v42 = uitofp <8 x i16> undef to <8 x fp128> ; CHECK: Cost Model: Found an estimated cost of 33 for instruction: %v43 = uitofp <8 x i16> undef to <8 x double> ; CHECK: Cost Model: Found an estimated cost of 33 for instruction: %v44 = uitofp <8 x i16> undef to <8 x float> @@ -550,7 +550,7 @@ define void @uitofp() { ; CHECK: Cost Model: Found an estimated cost of 49 for instruction: %v49 = uitofp <16 x i64> undef to <16 x float> ; CHECK: Cost Model: Found an estimated cost of 49 for instruction: %v50 = uitofp <16 x i32> undef to <16 x double> ; Z13: Cost Model: Found an estimated cost of 49 for instruction: %v51 = uitofp <16 x i32> undef to <16 x float> -; AR13: Cost Model: Found an estimated cost of 4 for instruction: %v51 = uitofp <16 x i32> undef to <16 x float> +; Z15: Cost Model: Found an estimated cost of 4 for instruction: %v51 = uitofp <16 x i32> undef to <16 x float> ; CHECK: Cost Model: Found an estimated cost of 65 for instruction: %v52 = uitofp <16 x i16> undef to <16 x double> ; CHECK: Cost Model: Found an estimated cost of 65 for instruction: %v53 = uitofp <16 x i16> undef to <16 x float> ; CHECK: Cost Model: Found an estimated cost of 65 for instruction: %v54 = uitofp <16 x i8> undef to <16 x double> diff --git a/llvm/test/Analysis/CostModel/SystemZ/intrinsic-cost-crash.ll b/llvm/test/Analysis/CostModel/SystemZ/intrinsic-cost-crash.ll index ff5b2a205..866bd9e4b 100644 --- a/llvm/test/Analysis/CostModel/SystemZ/intrinsic-cost-crash.ll +++ b/llvm/test/Analysis/CostModel/SystemZ/intrinsic-cost-crash.ll @@ -49,7 +49,7 @@ for.body: ; preds = %for.body, %for.body } attributes #0 = { argmemonly nounwind } -attributes #1 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } !llvm.ident = !{!0} diff --git a/llvm/test/Analysis/CostModel/SystemZ/intrinsics.ll b/llvm/test/Analysis/CostModel/SystemZ/intrinsics.ll index bbde62783..4bc050830 100644 --- a/llvm/test/Analysis/CostModel/SystemZ/intrinsics.ll +++ b/llvm/test/Analysis/CostModel/SystemZ/intrinsics.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 \ ; RUN: | FileCheck %s -check-prefixes=CHECK,Z13 -; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=arch13 \ -; RUN: | FileCheck %s -check-prefixes=CHECK,AR13 +; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z15 \ +; RUN: | FileCheck %s -check-prefixes=CHECK,Z15 define void @bswap_i64(i64 %arg, <2 x i64> %arg2) { ; CHECK: Printing analysis 'Cost Model Analysis' for function 'bswap_i64': @@ -69,15 +69,15 @@ define void @bswap_i64_mem(i64* %src, i64 %arg, i64* %dst) { define void @bswap_v2i64_mem(<2 x i64>* %src, <2 x i64> %arg, <2 x i64>* %dst) { ; CHECK:Printing analysis 'Cost Model Analysis' for function 'bswap_v2i64_mem': ; Z13: Cost Model: Found an estimated cost of 1 for instruction: %Ld1 = load <2 x i64>, <2 x i64>* %src -; AR13: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load <2 x i64>, <2 x i64>* %src +; Z15: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load <2 x i64>, <2 x i64>* %src ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %Ld1) ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %arg) ; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %swp2, <2 x i64>* %dst -; AR13: Cost Model: Found an estimated cost of 0 for instruction: store <2 x i64> %swp2, <2 x i64>* %dst +; Z15: Cost Model: Found an estimated cost of 0 for instruction: store <2 x i64> %swp2, <2 x i64>* %dst ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load <2 x i64>, <2 x i64>* %src ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp3 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %Ld2) ; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %swp3, <2 x i64>* %dst -; AR13: Cost Model: Found an estimated cost of 0 for instruction: store <2 x i64> %swp3, <2 x i64>* %dst +; Z15: Cost Model: Found an estimated cost of 0 for instruction: store <2 x i64> %swp3, <2 x i64>* %dst %Ld1 = load <2 x i64>, <2 x i64>* %src %swp1 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %Ld1) @@ -117,15 +117,15 @@ define void @bswap_i32_mem(i32* %src, i32 %arg, i32* %dst) { define void @bswap_v4i32_mem(<4 x i32>* %src, <4 x i32> %arg, <4 x i32>* %dst) { ; CHECK: Printing analysis 'Cost Model Analysis' for function 'bswap_v4i32_mem': ; Z13: Cost Model: Found an estimated cost of 1 for instruction: %Ld1 = load <4 x i32>, <4 x i32>* %src -; AR13: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load <4 x i32>, <4 x i32>* %src +; Z15: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load <4 x i32>, <4 x i32>* %src ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %Ld1) ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %arg) ; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %swp2, <4 x i32>* %dst -; AR13: Cost Model: Found an estimated cost of 0 for instruction: store <4 x i32> %swp2, <4 x i32>* %dst +; Z15: Cost Model: Found an estimated cost of 0 for instruction: store <4 x i32> %swp2, <4 x i32>* %dst ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load <4 x i32>, <4 x i32>* %src ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp3 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %Ld2) ; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %swp3, <4 x i32>* %dst -; AR13: Cost Model: Found an estimated cost of 0 for instruction: store <4 x i32> %swp3, <4 x i32>* %dst +; Z15: Cost Model: Found an estimated cost of 0 for instruction: store <4 x i32> %swp3, <4 x i32>* %dst %Ld1 = load <4 x i32>, <4 x i32>* %src %swp1 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %Ld1) @@ -164,15 +164,15 @@ define void @bswap_i16_mem(i16* %src, i16 %arg, i16* %dst) { define void @bswap_v8i16_mem(<8 x i16>* %src, <8 x i16> %arg, <8 x i16>* %dst) { ; CHECK: Printing analysis 'Cost Model Analysis' for function 'bswap_v8i16_mem': ; Z13: Cost Model: Found an estimated cost of 1 for instruction: %Ld1 = load <8 x i16>, <8 x i16>* %src -; AR13: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load <8 x i16>, <8 x i16>* %src +; Z15: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load <8 x i16>, <8 x i16>* %src ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %Ld1) ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %arg) ; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %swp2, <8 x i16>* %dst -; AR13: Cost Model: Found an estimated cost of 0 for instruction: store <8 x i16> %swp2, <8 x i16>* %dst +; Z15: Cost Model: Found an estimated cost of 0 for instruction: store <8 x i16> %swp2, <8 x i16>* %dst ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load <8 x i16>, <8 x i16>* %src ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp3 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %Ld2) ; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %swp3, <8 x i16>* %dst -; AR13: Cost Model: Found an estimated cost of 0 for instruction: store <8 x i16> %swp3, <8 x i16>* %dst +; Z15: Cost Model: Found an estimated cost of 0 for instruction: store <8 x i16> %swp3, <8 x i16>* %dst %Ld1 = load <8 x i16>, <8 x i16>* %src %swp1 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %Ld1) diff --git a/llvm/test/Analysis/CostModel/SystemZ/logic-miscext3.ll b/llvm/test/Analysis/CostModel/SystemZ/logic-miscext3.ll index 86706309e..f3b915d0a 100644 --- a/llvm/test/Analysis/CostModel/SystemZ/logic-miscext3.ll +++ b/llvm/test/Analysis/CostModel/SystemZ/logic-miscext3.ll @@ -1,25 +1,25 @@ ; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 \ ; RUN: | FileCheck %s -check-prefixes=CHECK,Z13 -; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=arch13 \ -; RUN: | FileCheck %s -check-prefixes=CHECK,AR13 +; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z15 \ +; RUN: | FileCheck %s -check-prefixes=CHECK,Z15 define void @fun0(i32 %a) { ; CHECK-LABEL: Printing analysis 'Cost Model Analysis' for function 'fun0': ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c0 = xor i32 %l0, -1 ; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res0 = or i32 %a, %c0 -; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res0 = or i32 %a, %c0 +; Z15: Cost Model: Found an estimated cost of 0 for instruction: %res0 = or i32 %a, %c0 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c1 = xor i32 %l1, -1 ; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res1 = and i32 %a, %c1 -; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res1 = and i32 %a, %c1 +; Z15: Cost Model: Found an estimated cost of 0 for instruction: %res1 = and i32 %a, %c1 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c2 = and i32 %l2, %a ; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res2 = xor i32 %c2, -1 -; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res2 = xor i32 %c2, -1 +; Z15: Cost Model: Found an estimated cost of 0 for instruction: %res2 = xor i32 %c2, -1 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c3 = or i32 %l3, %a ; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res3 = xor i32 %c3, -1 -; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res3 = xor i32 %c3, -1 +; Z15: Cost Model: Found an estimated cost of 0 for instruction: %res3 = xor i32 %c3, -1 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c4 = xor i32 %l4, %a ; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res4 = xor i32 %c4, -1 -; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res4 = xor i32 %c4, -1 +; Z15: Cost Model: Found an estimated cost of 0 for instruction: %res4 = xor i32 %c4, -1 entry: %l0 = load i32, i32* undef @@ -54,19 +54,19 @@ define void @fun1(i64 %a) { ; CHECK-LABEL: Printing analysis 'Cost Model Analysis' for function 'fun1': ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c0 = xor i64 %l0, -1 ; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res0 = or i64 %a, %c0 -; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res0 = or i64 %a, %c0 +; Z15: Cost Model: Found an estimated cost of 0 for instruction: %res0 = or i64 %a, %c0 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c1 = xor i64 %l1, -1 ; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res1 = and i64 %a, %c1 -; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res1 = and i64 %a, %c1 +; Z15: Cost Model: Found an estimated cost of 0 for instruction: %res1 = and i64 %a, %c1 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c2 = and i64 %l2, %a ; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res2 = xor i64 %c2, -1 -; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res2 = xor i64 %c2, -1 +; Z15: Cost Model: Found an estimated cost of 0 for instruction: %res2 = xor i64 %c2, -1 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c3 = or i64 %l3, %a ; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res3 = xor i64 %c3, -1 -; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res3 = xor i64 %c3, -1 +; Z15: Cost Model: Found an estimated cost of 0 for instruction: %res3 = xor i64 %c3, -1 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c4 = xor i64 %l4, %a ; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res4 = xor i64 %c4, -1 -; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res4 = xor i64 %c4, -1 +; Z15: Cost Model: Found an estimated cost of 0 for instruction: %res4 = xor i64 %c4, -1 entry: %l0 = load i64, i64* undef %c0 = xor i64 %l0, -1 diff --git a/llvm/test/Analysis/CostModel/X86/aggregates.ll b/llvm/test/Analysis/CostModel/X86/aggregates.ll new file mode 100644 index 000000000..3fd97d8bf --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/aggregates.ll @@ -0,0 +1,142 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -cost-kind=throughput -analyze | FileCheck %s --check-prefixes=ALL,THROUGHPUT +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -cost-kind=latency -analyze | FileCheck %s --check-prefixes=ALL,LATENCY +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -cost-kind=code-size -analyze | FileCheck %s --check-prefixes=ALL,CODESIZE + +define i32 @extract_first_i32({i32, i32} %agg) { +; THROUGHPUT-LABEL: 'extract_first_i32' +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i32 } %agg, 0 +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r +; +; LATENCY-LABEL: 'extract_first_i32' +; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i32 } %agg, 0 +; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %r +; +; CODESIZE-LABEL: 'extract_first_i32' +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i32 } %agg, 0 +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %r +; + %r = extractvalue {i32, i32} %agg, 0 + ret i32 %r +} + +define i32 @extract_second_i32({i32, i32} %agg) { +; THROUGHPUT-LABEL: 'extract_second_i32' +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i32 } %agg, 1 +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r +; +; LATENCY-LABEL: 'extract_second_i32' +; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i32 } %agg, 1 +; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %r +; +; CODESIZE-LABEL: 'extract_second_i32' +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i32 } %agg, 1 +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %r +; + %r = extractvalue {i32, i32} %agg, 1 + ret i32 %r +} + +define i32 @extract_i32({i32, i1} %agg) { +; THROUGHPUT-LABEL: 'extract_i32' +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i1 } %agg, 0 +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r +; +; LATENCY-LABEL: 'extract_i32' +; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i1 } %agg, 0 +; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %r +; +; CODESIZE-LABEL: 'extract_i32' +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i1 } %agg, 0 +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %r +; + %r = extractvalue {i32, i1} %agg, 0 + ret i32 %r +} + +define i1 @extract_i1({i32, i1} %agg) { +; THROUGHPUT-LABEL: 'extract_i1' +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i1 } %agg, 1 +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %r +; +; LATENCY-LABEL: 'extract_i1' +; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i1 } %agg, 1 +; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i1 %r +; +; CODESIZE-LABEL: 'extract_i1' +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i1 } %agg, 1 +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i1 %r +; + %r = extractvalue {i32, i1} %agg, 1 + ret i1 %r +} + +define float @extract_float({i32, float} %agg) { +; THROUGHPUT-LABEL: 'extract_float' +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, float } %agg, 1 +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r +; +; LATENCY-LABEL: 'extract_float' +; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, float } %agg, 1 +; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %r +; +; CODESIZE-LABEL: 'extract_float' +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, float } %agg, 1 +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %r +; + %r = extractvalue {i32, float} %agg, 1 + ret float %r +} + +define [42 x i42] @extract_array({i32, [42 x i42]} %agg) { +; THROUGHPUT-LABEL: 'extract_array' +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, [42 x i42] } %agg, 1 +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret [42 x i42] %r +; +; LATENCY-LABEL: 'extract_array' +; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, [42 x i42] } %agg, 1 +; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret [42 x i42] %r +; +; CODESIZE-LABEL: 'extract_array' +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, [42 x i42] } %agg, 1 +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret [42 x i42] %r +; + %r = extractvalue {i32, [42 x i42]} %agg, 1 + ret [42 x i42] %r +} + +define <42 x i42> @extract_vector({i32, <42 x i42>} %agg) { +; THROUGHPUT-LABEL: 'extract_vector' +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, <42 x i42> } %agg, 1 +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <42 x i42> %r +; +; LATENCY-LABEL: 'extract_vector' +; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, <42 x i42> } %agg, 1 +; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <42 x i42> %r +; +; CODESIZE-LABEL: 'extract_vector' +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, <42 x i42> } %agg, 1 +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <42 x i42> %r +; + %r = extractvalue {i32, <42 x i42>} %agg, 1 + ret <42 x i42> %r +} + +%T1 = type { i32, float, <4 x i1> } + +define %T1 @extract_struct({i32, %T1} %agg) { +; THROUGHPUT-LABEL: 'extract_struct' +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, %T1 } %agg, 1 +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret %T1 %r +; +; LATENCY-LABEL: 'extract_struct' +; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, %T1 } %agg, 1 +; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret %T1 %r +; +; CODESIZE-LABEL: 'extract_struct' +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, %T1 } %agg, 1 +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret %T1 %r +; + %r = extractvalue {i32, %T1} %agg, 1 + ret %T1 %r +} diff --git a/llvm/test/Analysis/CostModel/X86/alternate-shuffle-cost.ll b/llvm/test/Analysis/CostModel/X86/alternate-shuffle-cost.ll index 42204e1ee..9a8fc259e 100644 --- a/llvm/test/Analysis/CostModel/X86/alternate-shuffle-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/alternate-shuffle-cost.ll @@ -18,9 +18,21 @@ ; 64-bit packed float vectors (v2f32) are widened to type v4f32. define <2 x i32> @test_v2i32(<2 x i32> %a, <2 x i32> %b) { -; CHECK-LABEL: 'test_v2i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %1 +; SSE2-LABEL: 'test_v2i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %1 +; +; SSSE3-LABEL: 'test_v2i32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %1 +; +; SSE42-LABEL: 'test_v2i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %1 +; +; AVX-LABEL: 'test_v2i32' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %1 ; ; BTVER2-LABEL: 'test_v2i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> @@ -56,9 +68,21 @@ define <2 x float> @test_v2f32(<2 x float> %a, <2 x float> %b) { } define <2 x i32> @test_v2i32_2(<2 x i32> %a, <2 x i32> %b) { -; CHECK-LABEL: 'test_v2i32_2' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %1 +; SSE2-LABEL: 'test_v2i32_2' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %1 +; +; SSSE3-LABEL: 'test_v2i32_2' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %1 +; +; SSE42-LABEL: 'test_v2i32_2' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %1 +; +; AVX-LABEL: 'test_v2i32_2' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %1 ; ; BTVER2-LABEL: 'test_v2i32_2' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/arith-fix.ll b/llvm/test/Analysis/CostModel/X86/arith-fix.ll index b65ddb630..b72ca1c1e 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-fix.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-fix.ll @@ -38,9 +38,9 @@ define i32 @smul(i32 %arg) { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) @@ -57,9 +57,9 @@ define i32 @smul(i32 %arg) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) @@ -171,9 +171,9 @@ define i32 @smul(i32 %arg) { ; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) @@ -190,9 +190,9 @@ define i32 @smul(i32 %arg) { ; GLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) @@ -272,9 +272,9 @@ define i32 @umul(i32 %arg) { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) @@ -291,9 +291,9 @@ define i32 @umul(i32 %arg) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) @@ -405,9 +405,9 @@ define i32 @umul(i32 %arg) { ; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) @@ -424,9 +424,9 @@ define i32 @umul(i32 %arg) { ; GLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) diff --git a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll index b705ed901..5ab9b845e 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll @@ -171,9 +171,9 @@ define i32 @sadd(i32 %arg) { ; ; SLM-LABEL: 'sadd' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) @@ -409,9 +409,9 @@ define i32 @uadd(i32 %arg) { ; ; SLM-LABEL: 'uadd' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) @@ -647,9 +647,9 @@ define i32 @ssub(i32 %arg) { ; ; SLM-LABEL: 'ssub' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) @@ -885,9 +885,9 @@ define i32 @usub(i32 %arg) { ; ; SLM-LABEL: 'usub' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) @@ -994,9 +994,9 @@ define i32 @smul(i32 %arg) { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) @@ -1013,9 +1013,9 @@ define i32 @smul(i32 %arg) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) @@ -1123,13 +1123,13 @@ define i32 @smul(i32 %arg) { ; ; SLM-LABEL: 'smul' ; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) @@ -1146,9 +1146,9 @@ define i32 @smul(i32 %arg) { ; GLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) @@ -1232,9 +1232,9 @@ define i32 @umul(i32 %arg) { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) @@ -1251,9 +1251,9 @@ define i32 @umul(i32 %arg) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) @@ -1361,13 +1361,13 @@ define i32 @umul(i32 %arg) { ; ; SLM-LABEL: 'umul' ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) @@ -1384,9 +1384,9 @@ define i32 @umul(i32 %arg) { ; GLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) diff --git a/llvm/test/Analysis/CostModel/X86/arith-ssat.ll b/llvm/test/Analysis/CostModel/X86/arith-ssat.ll index d99bba326..c951330bc 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-ssat.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-ssat.ll @@ -170,9 +170,9 @@ define i32 @add(i32 %arg) { ; ; SLM-LABEL: 'add' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) @@ -404,9 +404,9 @@ define i32 @sub(i32 %arg) { ; ; SLM-LABEL: 'sub' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) diff --git a/llvm/test/Analysis/CostModel/X86/arith-usat.ll b/llvm/test/Analysis/CostModel/X86/arith-usat.ll index e7aaba8c0..a83c16e84 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-usat.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-usat.ll @@ -170,9 +170,9 @@ define i32 @add(i32 %arg) { ; ; SLM-LABEL: 'add' ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) @@ -404,9 +404,9 @@ define i32 @sub(i32 %arg) { ; ; SLM-LABEL: 'sub' ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) diff --git a/llvm/test/Analysis/CostModel/X86/arith.ll b/llvm/test/Analysis/CostModel/X86/arith.ll index f52cbf88b..a2ab5e847 100644 --- a/llvm/test/Analysis/CostModel/X86/arith.ll +++ b/llvm/test/Analysis/CostModel/X86/arith.ll @@ -1342,36 +1342,32 @@ define i32 @mul(i32 %arg) { ; A <2 x i64> vector multiply is implemented using ; 3 PMULUDQ and 2 PADDS and 4 shifts. define void @mul_2i32() { -; SSE-LABEL: 'mul_2i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A0 = mul <2 x i32> undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; SSSE3-LABEL: 'mul_2i32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %A0 = mul <2 x i32> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'mul_2i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %A0 = mul <2 x i32> undef, undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'mul_2i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A0 = mul <2 x i32> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %A0 = mul <2 x i32> undef, undef ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX512F-LABEL: 'mul_2i32' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A0 = mul <2 x i32> undef, undef -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; AVX512BW-LABEL: 'mul_2i32' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A0 = mul <2 x i32> undef, undef -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; AVX512DQ-LABEL: 'mul_2i32' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A0 = mul <2 x i32> undef, undef -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX512-LABEL: 'mul_2i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A0 = mul <2 x i32> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'mul_2i32' -; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %A0 = mul <2 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %A0 = mul <2 x i32> undef, undef ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'mul_2i32' -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A0 = mul <2 x i32> undef, undef +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %A0 = mul <2 x i32> undef, undef ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'mul_2i32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A0 = mul <2 x i32> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %A0 = mul <2 x i32> undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %A0 = mul <2 x i32> undef, undef diff --git a/llvm/test/Analysis/CostModel/X86/cast.ll b/llvm/test/Analysis/CostModel/X86/cast.ll index 7072313f5..b43e871dd 100644 --- a/llvm/test/Analysis/CostModel/X86/cast.ll +++ b/llvm/test/Analysis/CostModel/X86/cast.ll @@ -90,12 +90,12 @@ define i32 @zext_sext(<8 x i1> %in) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %D3 = zext <16 x i16> undef to <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %D4 = zext <16 x i8> undef to <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %D5 = zext <16 x i1> undef to <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %E = trunc <4 x i64> undef to <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %E = trunc <4 x i64> undef to <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F = trunc <8 x i32> undef to <8 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F1 = trunc <16 x i16> undef to <16 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F2 = trunc <8 x i32> undef to <8 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F3 = trunc <4 x i64> undef to <4 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %G = trunc <8 x i64> undef to <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %F3 = trunc <4 x i64> undef to <4 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %G = trunc <8 x i64> undef to <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %G1 = trunc <16 x i32> undef to <16 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %G2 = trunc <16 x i32> undef to <16 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -120,12 +120,12 @@ define i32 @zext_sext(<8 x i1> %in) { ; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D3 = zext <16 x i16> undef to <16 x i32> ; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D4 = zext <16 x i8> undef to <16 x i32> ; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %D5 = zext <16 x i1> undef to <16 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %E = trunc <4 x i64> undef to <4 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %E = trunc <4 x i64> undef to <4 x i32> ; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F = trunc <8 x i32> undef to <8 x i16> ; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F1 = trunc <16 x i16> undef to <16 x i8> ; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F2 = trunc <8 x i32> undef to <8 x i8> -; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F3 = trunc <4 x i64> undef to <4 x i8> -; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %G = trunc <8 x i64> undef to <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F3 = trunc <4 x i64> undef to <4 x i8> +; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %G = trunc <8 x i64> undef to <8 x i32> ; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %G1 = trunc <16 x i32> undef to <16 x i16> ; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %G2 = trunc <16 x i32> undef to <16 x i8> ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -139,11 +139,11 @@ define i32 @zext_sext(<8 x i1> %in) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B = zext <8 x i16> undef to <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C = sext <4 x i32> undef to <4 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C.v8i8.z = zext <8 x i8> undef to <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %C.v8i8.s = sext <8 x i8> undef to <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C.v8i8.s = sext <8 x i8> undef to <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C.v4i16.z = zext <4 x i16> undef to <4 x i64> -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %C.v4i16.s = sext <4 x i16> undef to <4 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C.v4i16.s = sext <4 x i16> undef to <4 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C.v4i8.z = zext <4 x i8> undef to <4 x i64> -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %C.v4i8.s = sext <4 x i8> undef to <4 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C.v4i8.s = sext <4 x i8> undef to <4 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D = zext <4 x i32> undef to <4 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %D1 = zext <8 x i32> undef to <8 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %D2 = sext <8 x i32> undef to <8 x i64> @@ -168,12 +168,12 @@ define i32 @zext_sext(<8 x i1> %in) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A = sext <8 x i16> undef to <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B = zext <8 x i16> undef to <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C = sext <4 x i32> undef to <4 x i64> -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C.v8i8.z = zext <8 x i8> undef to <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C.v8i8.s = sext <8 x i8> undef to <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C.v4i16.z = zext <4 x i16> undef to <4 x i64> -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C.v4i16.s = sext <4 x i16> undef to <4 x i64> -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C.v4i8.z = zext <4 x i8> undef to <4 x i64> -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C.v4i8.s = sext <4 x i8> undef to <4 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v8i8.z = zext <8 x i8> undef to <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v8i8.s = sext <8 x i8> undef to <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v4i16.z = zext <4 x i16> undef to <4 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v4i16.s = sext <4 x i16> undef to <4 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v4i8.z = zext <4 x i8> undef to <4 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v4i8.s = sext <4 x i8> undef to <4 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D = zext <4 x i32> undef to <4 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %D1 = zext <8 x i32> undef to <8 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %D2 = sext <8 x i32> undef to <8 x i64> @@ -198,12 +198,12 @@ define i32 @zext_sext(<8 x i1> %in) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A = sext <8 x i16> undef to <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B = zext <8 x i16> undef to <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C = sext <4 x i32> undef to <4 x i64> -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C.v8i8.z = zext <8 x i8> undef to <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C.v8i8.s = sext <8 x i8> undef to <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C.v4i16.z = zext <4 x i16> undef to <4 x i64> -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C.v4i16.s = sext <4 x i16> undef to <4 x i64> -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C.v4i8.z = zext <4 x i8> undef to <4 x i64> -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C.v4i8.s = sext <4 x i8> undef to <4 x i64> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v8i8.z = zext <8 x i8> undef to <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v8i8.s = sext <8 x i8> undef to <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v4i16.z = zext <4 x i16> undef to <4 x i64> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v4i16.s = sext <4 x i16> undef to <4 x i64> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v4i8.z = zext <4 x i8> undef to <4 x i64> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v4i8.s = sext <4 x i8> undef to <4 x i64> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D = zext <4 x i32> undef to <4 x i64> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = zext <8 x i32> undef to <8 x i64> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D2 = sext <8 x i32> undef to <8 x i64> @@ -314,13 +314,13 @@ define i32 @masks4(<4 x i1> %in) { define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) { ; SSE-LABEL: 'sitofp4' ; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %A1 = sitofp <4 x i1> %a to <4 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float> +; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float> +; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double> ; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %D1 = sitofp <4 x i32> %d to <4 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'sitofp4' @@ -359,7 +359,7 @@ define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) { define void @sitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) { ; SSE-LABEL: 'sitofp8' ; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float> ; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float> ; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -390,9 +390,9 @@ define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) { ; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A1 = uitofp <4 x i1> %a to <4 x float> ; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %A2 = uitofp <4 x i1> %a to <4 x double> ; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = uitofp <4 x i8> %b to <4 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %B2 = uitofp <4 x i8> %b to <4 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C1 = uitofp <4 x i16> %c to <4 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %C2 = uitofp <4 x i16> %c to <4 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %B2 = uitofp <4 x i8> %b to <4 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %C1 = uitofp <4 x i16> %c to <4 x float> +; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %C2 = uitofp <4 x i16> %c to <4 x double> ; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %D1 = uitofp <4 x i32> %d to <4 x float> ; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %D2 = uitofp <4 x i32> %d to <4 x double> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -433,7 +433,7 @@ define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) { define void @uitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) { ; SSE-LABEL: 'uitofp8' ; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float> ; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float> ; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %D1 = uitofp <8 x i32> %d to <8 x float> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void diff --git a/llvm/test/Analysis/CostModel/X86/ctlz.ll b/llvm/test/Analysis/CostModel/X86/ctlz.ll index e2a6bc521..6da6cb5f9 100644 --- a/llvm/test/Analysis/CostModel/X86/ctlz.ll +++ b/llvm/test/Analysis/CostModel/X86/ctlz.ll @@ -1,11 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse2 | FileCheck %s -check-prefixes=CHECK,SSE,SSE2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse4.2 | FileCheck %s -check-prefixes=CHECK,SSE,SSE42 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx | FileCheck %s -check-prefixes=CHECK,AVX,AVX1 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx2 | FileCheck %s -check-prefixes=CHECK,AVX,AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f | FileCheck %s -check-prefixes=CHECK,AVX512,AVX512F -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512vl,+avx512bw,+avx512dq | FileCheck %s -check-prefixes=CHECK,AVX512,AVX512BW -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512vl,+avx512bw,+avx512dq,+avx512cd | FileCheck %s -check-prefixes=CHECK,AVX512CD +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=-lzcnt,+sse2 | FileCheck %s -check-prefixes=CHECK,SSE,SSE2,NOLZCNT +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+lzcnt,+sse2 | FileCheck %s -check-prefixes=CHECK,SSE,SSE2,LZCNT +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+lzcnt,+sse4.2 | FileCheck %s -check-prefixes=CHECK,LZCNT,SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+lzcnt,+avx | FileCheck %s -check-prefixes=CHECK,LZCNT,AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+lzcnt,+avx2 | FileCheck %s -check-prefixes=CHECK,LZCNT,AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+lzcnt,+avx512f | FileCheck %s -check-prefixes=CHECK,LZCNT,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+lzcnt,+avx512vl,+avx512bw,+avx512dq | FileCheck %s -check-prefixes=CHECK,LZCNT,AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+lzcnt,+avx512vl,+avx512bw,+avx512dq,+avx512cd | FileCheck %s -check-prefixes=CHECK,LZCNT,AVX512CD ; Verify the cost of scalar leading zero count instructions. @@ -15,72 +16,104 @@ declare i16 @llvm.ctlz.i16(i16, i1) declare i8 @llvm.ctlz.i8(i8, i1) define i64 @var_ctlz_i64(i64 %a) { -; CHECK-LABEL: 'var_ctlz_i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %ctlz +; NOLZCNT-LABEL: 'var_ctlz_i64' +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 false) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %ctlz +; +; LZCNT-LABEL: 'var_ctlz_i64' +; LZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 false) +; LZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %ctlz ; %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 0) ret i64 %ctlz } define i64 @var_ctlz_i64u(i64 %a) { -; CHECK-LABEL: 'var_ctlz_i64u' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 true) -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %ctlz +; NOLZCNT-LABEL: 'var_ctlz_i64u' +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 true) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %ctlz +; +; LZCNT-LABEL: 'var_ctlz_i64u' +; LZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 true) +; LZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %ctlz ; %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 1) ret i64 %ctlz } define i32 @var_ctlz_i32(i32 %a) { -; CHECK-LABEL: 'var_ctlz_i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %ctlz +; NOLZCNT-LABEL: 'var_ctlz_i32' +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 false) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %ctlz +; +; LZCNT-LABEL: 'var_ctlz_i32' +; LZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 false) +; LZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %ctlz ; %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 0) ret i32 %ctlz } define i32 @var_ctlz_i32u(i32 %a) { -; CHECK-LABEL: 'var_ctlz_i32u' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 true) -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %ctlz +; NOLZCNT-LABEL: 'var_ctlz_i32u' +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 true) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %ctlz +; +; LZCNT-LABEL: 'var_ctlz_i32u' +; LZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 true) +; LZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %ctlz ; %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 1) ret i32 %ctlz } define i16 @var_ctlz_i16(i16 %a) { -; CHECK-LABEL: 'var_ctlz_i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %ctlz +; NOLZCNT-LABEL: 'var_ctlz_i16' +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 false) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %ctlz +; +; LZCNT-LABEL: 'var_ctlz_i16' +; LZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 false) +; LZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %ctlz ; %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 0) ret i16 %ctlz } define i16 @var_ctlz_i16u(i16 %a) { -; CHECK-LABEL: 'var_ctlz_i16u' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 true) -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %ctlz +; NOLZCNT-LABEL: 'var_ctlz_i16u' +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 true) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %ctlz +; +; LZCNT-LABEL: 'var_ctlz_i16u' +; LZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 true) +; LZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %ctlz ; %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 1) ret i16 %ctlz } define i8 @var_ctlz_i8(i8 %a) { -; CHECK-LABEL: 'var_ctlz_i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %ctlz +; NOLZCNT-LABEL: 'var_ctlz_i8' +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 false) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %ctlz +; +; LZCNT-LABEL: 'var_ctlz_i8' +; LZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 false) +; LZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %ctlz ; %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 0) ret i8 %ctlz } define i8 @var_ctlz_i8u(i8 %a) { -; CHECK-LABEL: 'var_ctlz_i8u' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 true) -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %ctlz +; NOLZCNT-LABEL: 'var_ctlz_i8u' +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 true) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %ctlz +; +; LZCNT-LABEL: 'var_ctlz_i8u' +; LZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 true) +; LZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %ctlz ; %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 1) ret i8 %ctlz diff --git a/llvm/test/Analysis/CostModel/X86/ctpop.ll b/llvm/test/Analysis/CostModel/X86/ctpop.ll index bb59558e4..e9cf666af 100644 --- a/llvm/test/Analysis/CostModel/X86/ctpop.ll +++ b/llvm/test/Analysis/CostModel/X86/ctpop.ll @@ -16,7 +16,7 @@ declare i8 @llvm.ctpop.i8(i8) define i64 @var_ctpop_i64(i64 %a) { ; NOPOPCNT-LABEL: 'var_ctpop_i64' -; NOPOPCNT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call i64 @llvm.ctpop.i64(i64 %a) +; NOPOPCNT-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %ctpop = call i64 @llvm.ctpop.i64(i64 %a) ; NOPOPCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %ctpop ; ; POPCNT-LABEL: 'var_ctpop_i64' @@ -29,7 +29,7 @@ define i64 @var_ctpop_i64(i64 %a) { define i32 @var_ctpop_i32(i32 %a) { ; NOPOPCNT-LABEL: 'var_ctpop_i32' -; NOPOPCNT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call i32 @llvm.ctpop.i32(i32 %a) +; NOPOPCNT-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call i32 @llvm.ctpop.i32(i32 %a) ; NOPOPCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %ctpop ; ; POPCNT-LABEL: 'var_ctpop_i32' @@ -42,7 +42,7 @@ define i32 @var_ctpop_i32(i32 %a) { define i16 @var_ctpop_i16(i16 %a) { ; NOPOPCNT-LABEL: 'var_ctpop_i16' -; NOPOPCNT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call i16 @llvm.ctpop.i16(i16 %a) +; NOPOPCNT-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %ctpop = call i16 @llvm.ctpop.i16(i16 %a) ; NOPOPCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %ctpop ; ; POPCNT-LABEL: 'var_ctpop_i16' @@ -55,7 +55,7 @@ define i16 @var_ctpop_i16(i16 %a) { define i8 @var_ctpop_i8(i8 %a) { ; NOPOPCNT-LABEL: 'var_ctpop_i8' -; NOPOPCNT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call i8 @llvm.ctpop.i8(i8 %a) +; NOPOPCNT-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %ctpop = call i8 @llvm.ctpop.i8(i8 %a) ; NOPOPCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %ctpop ; ; POPCNT-LABEL: 'var_ctpop_i8' diff --git a/llvm/test/Analysis/CostModel/X86/extend.ll b/llvm/test/Analysis/CostModel/X86/extend.ll index 1de0b0b20..93831165e 100644 --- a/llvm/test/Analysis/CostModel/X86/extend.ll +++ b/llvm/test/Analysis/CostModel/X86/extend.ll @@ -13,47 +13,55 @@ define i32 @zext_vXi32() { ; SSE2-LABEL: 'zext_vXi32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %I64 = zext i32 undef to i64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i32> undef to <2 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = zext <4 x i32> undef to <4 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = zext <8 x i32> undef to <8 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'zext_vXi32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %I64 = zext i32 undef to i64 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i32> undef to <2 x i64> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = zext <4 x i32> undef to <4 x i64> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = zext <8 x i32> undef to <8 x i64> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'zext_vXi32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %I64 = zext i32 undef to i64 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i32> undef to <2 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = zext <4 x i32> undef to <4 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i64 = zext <8 x i32> undef to <8 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'zext_vXi32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %I64 = zext i32 undef to i64 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i32> undef to <2 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = zext <4 x i32> undef to <4 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = zext <8 x i32> undef to <8 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'zext_vXi32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %I64 = zext i32 undef to i64 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i32> undef to <2 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = zext <4 x i32> undef to <4 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = zext <8 x i32> undef to <8 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'zext_vXi32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %I64 = zext i32 undef to i64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i32> undef to <2 x i64> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = zext <4 x i32> undef to <4 x i64> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = zext <8 x i32> undef to <8 x i64> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'zext_vXi32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %I64 = zext i32 undef to i64 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i32> undef to <2 x i64> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = zext <4 x i32> undef to <4 x i64> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = zext <8 x i32> undef to <8 x i64> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; + %I64 = zext i32 undef to i64 %V2i64 = zext <2 x i32> undef to <2 x i64> %V4i64 = zext <4 x i32> undef to <4 x i64> %V8i64 = zext <8 x i32> undef to <8 x i64> @@ -63,9 +71,11 @@ define i32 @zext_vXi32() { define i32 @zext_vXi16() { ; SSE2-LABEL: 'zext_vXi16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = zext i16 undef to i64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i16> undef to <2 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = zext <4 x i16> undef to <4 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = zext <8 x i16> undef to <8 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = zext i16 undef to i32 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i16> undef to <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i16> undef to <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = zext <8 x i16> undef to <8 x i32> @@ -73,9 +83,11 @@ define i32 @zext_vXi16() { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'zext_vXi16' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = zext i16 undef to i64 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i16> undef to <2 x i64> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = zext <4 x i16> undef to <4 x i64> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = zext <8 x i16> undef to <8 x i64> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = zext i16 undef to i32 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i16> undef to <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i16> undef to <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = zext <8 x i16> undef to <8 x i32> @@ -83,9 +95,11 @@ define i32 @zext_vXi16() { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'zext_vXi16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = zext i16 undef to i64 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i16> undef to <2 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = zext <4 x i16> undef to <4 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i64 = zext <8 x i16> undef to <8 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = zext i16 undef to i32 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i16> undef to <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i16> undef to <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = zext <8 x i16> undef to <8 x i32> @@ -93,9 +107,11 @@ define i32 @zext_vXi16() { ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'zext_vXi16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = zext i16 undef to i64 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i16> undef to <2 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = zext <4 x i16> undef to <4 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = zext <8 x i16> undef to <8 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = zext i16 undef to i32 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i16> undef to <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i16> undef to <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = zext <8 x i16> undef to <8 x i32> @@ -103,9 +119,11 @@ define i32 @zext_vXi16() { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'zext_vXi16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = zext i16 undef to i64 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i16> undef to <2 x i64> -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = zext <4 x i16> undef to <4 x i64> -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = zext <8 x i16> undef to <8 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = zext <4 x i16> undef to <4 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = zext <8 x i16> undef to <8 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = zext i16 undef to i32 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i16> undef to <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i16> undef to <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = zext <8 x i16> undef to <8 x i32> @@ -113,9 +131,11 @@ define i32 @zext_vXi16() { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'zext_vXi16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = zext i16 undef to i64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i16> undef to <2 x i64> -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = zext <4 x i16> undef to <4 x i64> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = zext <4 x i16> undef to <4 x i64> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = zext <8 x i16> undef to <8 x i64> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = zext i16 undef to i32 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i16> undef to <2 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i16> undef to <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = zext <8 x i16> undef to <8 x i32> @@ -123,19 +143,23 @@ define i32 @zext_vXi16() { ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'zext_vXi16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = zext i16 undef to i64 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i16> undef to <2 x i64> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = zext <4 x i16> undef to <4 x i64> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = zext <8 x i16> undef to <8 x i64> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = zext i16 undef to i32 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i16> undef to <2 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i16> undef to <4 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = zext <8 x i16> undef to <8 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = zext <16 x i16> undef to <16 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; + %I64 = zext i16 undef to i64 %V2i64 = zext <2 x i16> undef to <2 x i64> %V4i64 = zext <4 x i16> undef to <4 x i64> %V8i64 = zext <8 x i16> undef to <8 x i64> + %I32 = zext i16 undef to i32 %V2i32 = zext <2 x i16> undef to <2 x i32> %V4i32 = zext <4 x i16> undef to <4 x i32> %V8i32 = zext <8 x i16> undef to <8 x i32> @@ -146,13 +170,16 @@ define i32 @zext_vXi16() { define i32 @zext_vXi8() { ; SSE2-LABEL: 'zext_vXi8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = zext i8 undef to i64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i8> undef to <2 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = zext <4 x i8> undef to <4 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = zext <8 x i8> undef to <8 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = zext i8 undef to i32 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i8> undef to <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = zext <4 x i8> undef to <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = zext <8 x i8> undef to <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i32 = zext <16 x i8> undef to <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = zext i8 undef to i16 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = zext <2 x i8> undef to <2 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i8> undef to <4 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i8> undef to <8 x i16> @@ -161,13 +188,16 @@ define i32 @zext_vXi8() { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'zext_vXi8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = zext i8 undef to i64 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i8> undef to <2 x i64> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = zext <4 x i8> undef to <4 x i64> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = zext <8 x i8> undef to <8 x i64> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = zext i8 undef to i32 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i8> undef to <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = zext <4 x i8> undef to <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = zext <8 x i8> undef to <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i32 = zext <16 x i8> undef to <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = zext i8 undef to i16 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = zext <2 x i8> undef to <2 x i16> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i8> undef to <4 x i16> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i8> undef to <8 x i16> @@ -176,13 +206,16 @@ define i32 @zext_vXi8() { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'zext_vXi8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = zext i8 undef to i64 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i8> undef to <2 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = zext <4 x i8> undef to <4 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i64 = zext <8 x i8> undef to <8 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = zext i8 undef to i32 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i8> undef to <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i8> undef to <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = zext <8 x i8> undef to <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = zext <16 x i8> undef to <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = zext i8 undef to i16 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = zext <2 x i8> undef to <2 x i16> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i8> undef to <4 x i16> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i8> undef to <8 x i16> @@ -191,13 +224,16 @@ define i32 @zext_vXi8() { ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'zext_vXi8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = zext i8 undef to i64 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i8> undef to <2 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = zext <4 x i8> undef to <4 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = zext <8 x i8> undef to <8 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = zext i8 undef to i32 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i8> undef to <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i8> undef to <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = zext <8 x i8> undef to <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = zext <16 x i8> undef to <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = zext i8 undef to i16 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = zext <2 x i8> undef to <2 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i8> undef to <4 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i8> undef to <8 x i16> @@ -206,13 +242,16 @@ define i32 @zext_vXi8() { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'zext_vXi8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = zext i8 undef to i64 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i8> undef to <2 x i64> -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = zext <4 x i8> undef to <4 x i64> -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = zext <8 x i8> undef to <8 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = zext <4 x i8> undef to <4 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = zext <8 x i8> undef to <8 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = zext i8 undef to i32 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i8> undef to <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i8> undef to <4 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = zext <8 x i8> undef to <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = zext <8 x i8> undef to <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = zext <16 x i8> undef to <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = zext i8 undef to i16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = zext <2 x i8> undef to <2 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i8> undef to <4 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i8> undef to <8 x i16> @@ -221,13 +260,16 @@ define i32 @zext_vXi8() { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'zext_vXi8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = zext i8 undef to i64 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i8> undef to <2 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = zext <4 x i8> undef to <4 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = zext <8 x i8> undef to <8 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = zext <4 x i8> undef to <4 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = zext <8 x i8> undef to <8 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = zext i8 undef to i32 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i8> undef to <2 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i8> undef to <4 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = zext <8 x i8> undef to <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = zext <8 x i8> undef to <8 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = zext <16 x i8> undef to <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = zext i8 undef to i16 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = zext <2 x i8> undef to <2 x i16> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i8> undef to <4 x i16> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i8> undef to <8 x i16> @@ -236,13 +278,16 @@ define i32 @zext_vXi8() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'zext_vXi8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = zext i8 undef to i64 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i8> undef to <2 x i64> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = zext <4 x i8> undef to <4 x i64> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = zext <8 x i8> undef to <8 x i64> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = zext <4 x i8> undef to <4 x i64> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = zext <8 x i8> undef to <8 x i64> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = zext i8 undef to i32 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i8> undef to <2 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i8> undef to <4 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = zext <8 x i8> undef to <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = zext <8 x i8> undef to <8 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = zext <16 x i8> undef to <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = zext i8 undef to i16 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = zext <2 x i8> undef to <2 x i16> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i8> undef to <4 x i16> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i8> undef to <8 x i16> @@ -251,13 +296,16 @@ define i32 @zext_vXi8() { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'zext_vXi8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = zext i8 undef to i64 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i8> undef to <2 x i64> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = zext <4 x i8> undef to <4 x i64> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = zext <8 x i8> undef to <8 x i64> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = zext i8 undef to i32 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i8> undef to <2 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i8> undef to <4 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = zext <8 x i8> undef to <8 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = zext <16 x i8> undef to <16 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = zext i8 undef to i16 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = zext <2 x i8> undef to <2 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i8> undef to <4 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i8> undef to <8 x i16> @@ -265,15 +313,18 @@ define i32 @zext_vXi8() { ; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = zext <32 x i8> undef to <32 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; + %I64 = zext i8 undef to i64 %V2i64 = zext <2 x i8> undef to <2 x i64> %V4i64 = zext <4 x i8> undef to <4 x i64> %V8i64 = zext <8 x i8> undef to <8 x i64> + %I32 = zext i8 undef to i32 %V2i32 = zext <2 x i8> undef to <2 x i32> %V4i32 = zext <4 x i8> undef to <4 x i32> %V8i32 = zext <8 x i8> undef to <8 x i32> %V16i32 = zext <16 x i8> undef to <16 x i32> + %I16 = zext i8 undef to i16 %V2i16 = zext <2 x i8> undef to <2 x i16> %V4i16 = zext <4 x i8> undef to <4 x i16> %V8i16 = zext <8 x i8> undef to <8 x i16> @@ -285,18 +336,22 @@ define i32 @zext_vXi8() { define i32 @zext_vXi1() { ; SSE-LABEL: 'zext_vXi1' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = zext i1 undef to i64 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i1> undef to <2 x i64> ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = zext <4 x i1> undef to <4 x i64> ; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = zext <8 x i1> undef to <8 x i64> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = zext i1 undef to i32 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i1> undef to <2 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i1> undef to <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = zext <8 x i1> undef to <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = zext <16 x i1> undef to <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = zext i1 undef to i16 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = zext <2 x i1> undef to <2 x i16> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i1> undef to <4 x i16> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i1> undef to <8 x i16> ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16> ; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = zext i1 undef to i8 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = zext <2 x i1> undef to <2 x i8> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = zext <4 x i1> undef to <4 x i8> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = zext <8 x i1> undef to <8 x i8> @@ -306,18 +361,22 @@ define i32 @zext_vXi1() { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'zext_vXi1' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = zext i1 undef to i64 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i1> undef to <2 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = zext <4 x i1> undef to <4 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = zext <8 x i1> undef to <8 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = zext i1 undef to i32 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i1> undef to <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i1> undef to <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = zext <8 x i1> undef to <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i32 = zext <16 x i1> undef to <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = zext i1 undef to i16 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = zext <2 x i1> undef to <2 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i1> undef to <4 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i1> undef to <8 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = zext i1 undef to i8 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = zext <2 x i1> undef to <2 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = zext <4 x i1> undef to <4 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = zext <8 x i1> undef to <8 x i8> @@ -327,18 +386,22 @@ define i32 @zext_vXi1() { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'zext_vXi1' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = zext i1 undef to i64 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i1> undef to <2 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = zext <4 x i1> undef to <4 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = zext <8 x i1> undef to <8 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = zext i1 undef to i32 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i1> undef to <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i1> undef to <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = zext <8 x i1> undef to <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = zext <16 x i1> undef to <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = zext i1 undef to i16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = zext <2 x i1> undef to <2 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i1> undef to <4 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i1> undef to <8 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = zext i1 undef to i8 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = zext <2 x i1> undef to <2 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = zext <4 x i1> undef to <4 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = zext <8 x i1> undef to <8 x i8> @@ -348,18 +411,22 @@ define i32 @zext_vXi1() { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'zext_vXi1' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = zext i1 undef to i64 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = zext <2 x i1> undef to <2 x i64> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = zext <4 x i1> undef to <4 x i64> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = zext <8 x i1> undef to <8 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = zext i1 undef to i32 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i32 = zext <2 x i1> undef to <2 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = zext <4 x i1> undef to <4 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = zext <8 x i1> undef to <8 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = zext <16 x i1> undef to <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = zext i1 undef to i16 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i16 = zext <2 x i1> undef to <2 x i16> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i16 = zext <4 x i1> undef to <4 x i16> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i16 = zext <8 x i1> undef to <8 x i16> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = zext i1 undef to i8 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i8 = zext <2 x i1> undef to <2 x i8> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i8 = zext <4 x i1> undef to <4 x i8> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i8 = zext <8 x i1> undef to <8 x i8> @@ -369,18 +436,22 @@ define i32 @zext_vXi1() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'zext_vXi1' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = zext i1 undef to i64 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = zext <2 x i1> undef to <2 x i64> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = zext <4 x i1> undef to <4 x i64> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = zext <8 x i1> undef to <8 x i64> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = zext i1 undef to i32 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i32 = zext <2 x i1> undef to <2 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = zext <4 x i1> undef to <4 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = zext <8 x i1> undef to <8 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = zext <16 x i1> undef to <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = zext i1 undef to i16 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i16 = zext <2 x i1> undef to <2 x i16> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i16 = zext <4 x i1> undef to <4 x i16> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = zext <8 x i1> undef to <8 x i16> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = zext i1 undef to i8 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i8 = zext <2 x i1> undef to <2 x i8> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i8 = zext <4 x i1> undef to <4 x i8> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i8 = zext <8 x i1> undef to <8 x i8> @@ -390,18 +461,22 @@ define i32 @zext_vXi1() { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'zext_vXi1' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = zext i1 undef to i64 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i1> undef to <2 x i64> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = zext <4 x i1> undef to <4 x i64> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = zext <8 x i1> undef to <8 x i64> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = zext i1 undef to i32 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i1> undef to <2 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i1> undef to <4 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = zext <8 x i1> undef to <8 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i32 = zext <16 x i1> undef to <16 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = zext i1 undef to i16 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = zext <2 x i1> undef to <2 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i1> undef to <4 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i1> undef to <8 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = zext i1 undef to i8 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = zext <2 x i1> undef to <2 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = zext <4 x i1> undef to <4 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = zext <8 x i1> undef to <8 x i8> @@ -410,21 +485,25 @@ define i32 @zext_vXi1() { ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = zext <64 x i1> undef to <64 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; + %I64 = zext i1 undef to i64 %V2i64 = zext <2 x i1> undef to <2 x i64> %V4i64 = zext <4 x i1> undef to <4 x i64> %V8i64 = zext <8 x i1> undef to <8 x i64> + %I32 = zext i1 undef to i32 %V2i32 = zext <2 x i1> undef to <2 x i32> %V4i32 = zext <4 x i1> undef to <4 x i32> %V8i32 = zext <8 x i1> undef to <8 x i32> %V16i32 = zext <16 x i1> undef to <16 x i32> + %I16 = zext i1 undef to i16 %V2i16 = zext <2 x i1> undef to <2 x i16> %V4i16 = zext <4 x i1> undef to <4 x i16> %V8i16 = zext <8 x i1> undef to <8 x i16> %V16i16 = zext <16 x i1> undef to <16 x i16> %V32i16 = zext <32 x i1> undef to <32 x i16> + %I8 = zext i1 undef to i8 %V2i8 = zext <2 x i1> undef to <2 x i8> %V4i8 = zext <4 x i1> undef to <4 x i8> %V8i8 = zext <8 x i1> undef to <8 x i8> @@ -437,47 +516,55 @@ define i32 @zext_vXi1() { define i32 @sext_vXi32() { ; SSE2-LABEL: 'sext_vXi32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i32 undef to i64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i32> undef to <2 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = sext <4 x i32> undef to <4 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = sext <8 x i32> undef to <8 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'sext_vXi32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i32 undef to i64 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i32> undef to <2 x i64> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = sext <4 x i32> undef to <4 x i64> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = sext <8 x i32> undef to <8 x i64> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'sext_vXi32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i32 undef to i64 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i32> undef to <2 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = sext <4 x i32> undef to <4 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i64 = sext <8 x i32> undef to <8 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'sext_vXi32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i32 undef to i64 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i32> undef to <2 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sext <4 x i32> undef to <4 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = sext <8 x i32> undef to <8 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'sext_vXi32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i32 undef to i64 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i32> undef to <2 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = sext <4 x i32> undef to <4 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = sext <8 x i32> undef to <8 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'sext_vXi32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i32 undef to i64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i32> undef to <2 x i64> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = sext <4 x i32> undef to <4 x i64> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = sext <8 x i32> undef to <8 x i64> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'sext_vXi32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i32 undef to i64 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i32> undef to <2 x i64> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sext <4 x i32> undef to <4 x i64> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = sext <8 x i32> undef to <8 x i64> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; + %I64 = sext i32 undef to i64 %V2i64 = sext <2 x i32> undef to <2 x i64> %V4i64 = sext <4 x i32> undef to <4 x i64> %V8i64 = sext <8 x i32> undef to <8 x i64> @@ -487,9 +574,11 @@ define i32 @sext_vXi32() { define i32 @sext_vXi16() { ; SSE2-LABEL: 'sext_vXi16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i16 undef to i64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i16> undef to <2 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4i64 = sext <4 x i16> undef to <4 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8i64 = sext <8 x i16> undef to <8 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sext i16 undef to i32 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i16> undef to <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = sext <4 x i16> undef to <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sext <8 x i16> undef to <8 x i32> @@ -497,9 +586,11 @@ define i32 @sext_vXi16() { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'sext_vXi16' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i16 undef to i64 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i16> undef to <2 x i64> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4i64 = sext <4 x i16> undef to <4 x i64> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8i64 = sext <8 x i16> undef to <8 x i64> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sext i16 undef to i32 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i16> undef to <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = sext <4 x i16> undef to <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sext <8 x i16> undef to <8 x i32> @@ -507,9 +598,11 @@ define i32 @sext_vXi16() { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'sext_vXi16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i16 undef to i64 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i16> undef to <2 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = sext <4 x i16> undef to <4 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i64 = sext <8 x i16> undef to <8 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sext i16 undef to i32 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i16> undef to <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i16> undef to <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = sext <8 x i16> undef to <8 x i32> @@ -517,9 +610,11 @@ define i32 @sext_vXi16() { ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'sext_vXi16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i16 undef to i64 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i16> undef to <2 x i64> -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = sext <4 x i16> undef to <4 x i64> -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = sext <8 x i16> undef to <8 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sext <4 x i16> undef to <4 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = sext <8 x i16> undef to <8 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sext i16 undef to i32 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i16> undef to <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i16> undef to <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sext <8 x i16> undef to <8 x i32> @@ -527,9 +622,11 @@ define i32 @sext_vXi16() { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'sext_vXi16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i16 undef to i64 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i16> undef to <2 x i64> -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = sext <4 x i16> undef to <4 x i64> -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = sext <8 x i16> undef to <8 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = sext <4 x i16> undef to <4 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = sext <8 x i16> undef to <8 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sext i16 undef to i32 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i16> undef to <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i16> undef to <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = sext <8 x i16> undef to <8 x i32> @@ -537,9 +634,11 @@ define i32 @sext_vXi16() { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'sext_vXi16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i16 undef to i64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i16> undef to <2 x i64> -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = sext <4 x i16> undef to <4 x i64> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = sext <4 x i16> undef to <4 x i64> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = sext <8 x i16> undef to <8 x i64> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sext i16 undef to i32 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i16> undef to <2 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i16> undef to <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = sext <8 x i16> undef to <8 x i32> @@ -547,19 +646,23 @@ define i32 @sext_vXi16() { ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'sext_vXi16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i16 undef to i64 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i16> undef to <2 x i64> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = sext <4 x i16> undef to <4 x i64> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = sext <8 x i16> undef to <8 x i64> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sext <4 x i16> undef to <4 x i64> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = sext <8 x i16> undef to <8 x i64> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sext i16 undef to i32 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i16> undef to <2 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i16> undef to <4 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sext <8 x i16> undef to <8 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sext <16 x i16> undef to <16 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; + %I64 = sext i16 undef to i64 %V2i64 = sext <2 x i16> undef to <2 x i64> %V4i64 = sext <4 x i16> undef to <4 x i64> %V8i64 = sext <8 x i16> undef to <8 x i64> + %I32 = sext i16 undef to i32 %V2i32 = sext <2 x i16> undef to <2 x i32> %V4i32 = sext <4 x i16> undef to <4 x i32> %V8i32 = sext <8 x i16> undef to <8 x i32> @@ -570,13 +673,16 @@ define i32 @sext_vXi16() { define i32 @sext_vXi8() { ; SSE2-LABEL: 'sext_vXi8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i8 undef to i64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i8> undef to <2 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i64 = sext <4 x i8> undef to <4 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8i64 = sext <8 x i8> undef to <8 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sext i8 undef to i32 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i8> undef to <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i32 = sext <4 x i8> undef to <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sext <8 x i8> undef to <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i32 = sext <16 x i8> undef to <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sext i8 undef to i16 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i8> undef to <2 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i16 = sext <4 x i8> undef to <4 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = sext <8 x i8> undef to <8 x i16> @@ -585,13 +691,16 @@ define i32 @sext_vXi8() { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'sext_vXi8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i8 undef to i64 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i8> undef to <2 x i64> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i64 = sext <4 x i8> undef to <4 x i64> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8i64 = sext <8 x i8> undef to <8 x i64> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sext i8 undef to i32 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i8> undef to <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i32 = sext <4 x i8> undef to <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sext <8 x i8> undef to <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i32 = sext <16 x i8> undef to <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sext i8 undef to i16 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i8> undef to <2 x i16> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i16 = sext <4 x i8> undef to <4 x i16> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = sext <8 x i8> undef to <8 x i16> @@ -600,13 +709,16 @@ define i32 @sext_vXi8() { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'sext_vXi8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i8 undef to i64 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i8> undef to <2 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = sext <4 x i8> undef to <4 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i64 = sext <8 x i8> undef to <8 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sext i8 undef to i32 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i8> undef to <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i8> undef to <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = sext <8 x i8> undef to <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sext <16 x i8> undef to <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sext i8 undef to i16 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i8> undef to <2 x i16> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i8> undef to <4 x i16> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = sext <8 x i8> undef to <8 x i16> @@ -615,13 +727,16 @@ define i32 @sext_vXi8() { ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'sext_vXi8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i8 undef to i64 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i8> undef to <2 x i64> -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = sext <4 x i8> undef to <4 x i64> -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = sext <8 x i8> undef to <8 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sext <4 x i8> undef to <4 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = sext <8 x i8> undef to <8 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sext i8 undef to i32 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i8> undef to <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i8> undef to <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sext <8 x i8> undef to <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sext <8 x i8> undef to <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sext <16 x i8> undef to <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sext i8 undef to i16 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i8> undef to <2 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i8> undef to <4 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = sext <8 x i8> undef to <8 x i16> @@ -630,13 +745,16 @@ define i32 @sext_vXi8() { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'sext_vXi8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i8 undef to i64 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i8> undef to <2 x i64> -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = sext <4 x i8> undef to <4 x i64> -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = sext <8 x i8> undef to <8 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = sext <4 x i8> undef to <4 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = sext <8 x i8> undef to <8 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sext i8 undef to i32 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i8> undef to <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i8> undef to <4 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = sext <8 x i8> undef to <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = sext <8 x i8> undef to <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sext <16 x i8> undef to <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sext i8 undef to i16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i8> undef to <2 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i8> undef to <4 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = sext <8 x i8> undef to <8 x i16> @@ -645,13 +763,16 @@ define i32 @sext_vXi8() { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'sext_vXi8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i8 undef to i64 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i8> undef to <2 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = sext <4 x i8> undef to <4 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = sext <8 x i8> undef to <8 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = sext <4 x i8> undef to <4 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = sext <8 x i8> undef to <8 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sext i8 undef to i32 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i8> undef to <2 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i8> undef to <4 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = sext <8 x i8> undef to <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = sext <8 x i8> undef to <8 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = sext <16 x i8> undef to <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sext i8 undef to i16 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i8> undef to <2 x i16> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i8> undef to <4 x i16> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = sext <8 x i8> undef to <8 x i16> @@ -660,13 +781,16 @@ define i32 @sext_vXi8() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'sext_vXi8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i8 undef to i64 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i8> undef to <2 x i64> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = sext <4 x i8> undef to <4 x i64> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = sext <8 x i8> undef to <8 x i64> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = sext <4 x i8> undef to <4 x i64> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = sext <8 x i8> undef to <8 x i64> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sext i8 undef to i32 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i8> undef to <2 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i8> undef to <4 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = sext <8 x i8> undef to <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = sext <8 x i8> undef to <8 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = sext <16 x i8> undef to <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sext i8 undef to i16 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i8> undef to <2 x i16> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i8> undef to <4 x i16> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = sext <8 x i8> undef to <8 x i16> @@ -675,13 +799,16 @@ define i32 @sext_vXi8() { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'sext_vXi8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i8 undef to i64 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i8> undef to <2 x i64> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = sext <4 x i8> undef to <4 x i64> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = sext <8 x i8> undef to <8 x i64> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sext <4 x i8> undef to <4 x i64> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = sext <8 x i8> undef to <8 x i64> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sext i8 undef to i32 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i8> undef to <2 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i8> undef to <4 x i32> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sext <8 x i8> undef to <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sext <8 x i8> undef to <8 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sext <16 x i8> undef to <16 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sext i8 undef to i16 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i8> undef to <2 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i8> undef to <4 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = sext <8 x i8> undef to <8 x i16> @@ -689,15 +816,18 @@ define i32 @sext_vXi8() { ; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = sext <32 x i8> undef to <32 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; + %I64 = sext i8 undef to i64 %V2i64 = sext <2 x i8> undef to <2 x i64> %V4i64 = sext <4 x i8> undef to <4 x i64> %V8i64 = sext <8 x i8> undef to <8 x i64> + %I32 = sext i8 undef to i32 %V2i32 = sext <2 x i8> undef to <2 x i32> %V4i32 = sext <4 x i8> undef to <4 x i32> %V8i32 = sext <8 x i8> undef to <8 x i32> %V16i32 = sext <16 x i8> undef to <16 x i32> + %I16 = sext i8 undef to i16 %V2i16 = sext <2 x i8> undef to <2 x i16> %V4i16 = sext <4 x i8> undef to <4 x i16> %V8i16 = sext <8 x i8> undef to <8 x i16> @@ -709,18 +839,22 @@ define i32 @sext_vXi8() { define i32 @sext_vXi1() { ; SSE-LABEL: 'sext_vXi1' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i1 undef to i64 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i1> undef to <2 x i64> ; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = sext <4 x i1> undef to <4 x i64> ; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = sext <8 x i1> undef to <8 x i64> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sext i1 undef to i32 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i1> undef to <2 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = sext <4 x i1> undef to <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = sext <8 x i1> undef to <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i32 = sext <16 x i1> undef to <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sext i1 undef to i16 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i1> undef to <2 x i16> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i1> undef to <4 x i16> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = sext <8 x i1> undef to <8 x i16> ; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16> ; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sext i1 undef to i8 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i8 = sext <2 x i1> undef to <2 x i8> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = sext <4 x i1> undef to <4 x i8> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i8 = sext <8 x i1> undef to <8 x i8> @@ -730,18 +864,22 @@ define i32 @sext_vXi1() { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'sext_vXi1' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i1 undef to i64 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i1> undef to <2 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = sext <4 x i1> undef to <4 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = sext <8 x i1> undef to <8 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sext i1 undef to i32 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i1> undef to <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = sext <4 x i1> undef to <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sext <8 x i1> undef to <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i32 = sext <16 x i1> undef to <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sext i1 undef to i16 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i1> undef to <2 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i1> undef to <4 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = sext <8 x i1> undef to <8 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sext i1 undef to i8 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i8 = sext <2 x i1> undef to <2 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = sext <4 x i1> undef to <4 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i8 = sext <8 x i1> undef to <8 x i8> @@ -751,18 +889,22 @@ define i32 @sext_vXi1() { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'sext_vXi1' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i1 undef to i64 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i1> undef to <2 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = sext <4 x i1> undef to <4 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = sext <8 x i1> undef to <8 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sext i1 undef to i32 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i1> undef to <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = sext <4 x i1> undef to <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = sext <8 x i1> undef to <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = sext <16 x i1> undef to <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sext i1 undef to i16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i1> undef to <2 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i1> undef to <4 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = sext <8 x i1> undef to <8 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sext i1 undef to i8 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i8 = sext <2 x i1> undef to <2 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = sext <4 x i1> undef to <4 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i8 = sext <8 x i1> undef to <8 x i8> @@ -772,18 +914,22 @@ define i32 @sext_vXi1() { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'sext_vXi1' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i1 undef to i64 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = sext <2 x i1> undef to <2 x i64> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = sext <4 x i1> undef to <4 x i64> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = sext <8 x i1> undef to <8 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sext i1 undef to i32 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i32 = sext <2 x i1> undef to <2 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = sext <4 x i1> undef to <4 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = sext <8 x i1> undef to <8 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = sext <16 x i1> undef to <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sext i1 undef to i16 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i16 = sext <2 x i1> undef to <2 x i16> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i16 = sext <4 x i1> undef to <4 x i16> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i16 = sext <8 x i1> undef to <8 x i16> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sext i1 undef to i8 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i8 = sext <2 x i1> undef to <2 x i8> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i8 = sext <4 x i1> undef to <4 x i8> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i8 = sext <8 x i1> undef to <8 x i8> @@ -793,18 +939,22 @@ define i32 @sext_vXi1() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'sext_vXi1' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i1 undef to i64 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = sext <2 x i1> undef to <2 x i64> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = sext <4 x i1> undef to <4 x i64> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = sext <8 x i1> undef to <8 x i64> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sext i1 undef to i32 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i32 = sext <2 x i1> undef to <2 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = sext <4 x i1> undef to <4 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = sext <8 x i1> undef to <8 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = sext <16 x i1> undef to <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sext i1 undef to i16 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i16 = sext <2 x i1> undef to <2 x i16> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i16 = sext <4 x i1> undef to <4 x i16> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = sext <8 x i1> undef to <8 x i16> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sext i1 undef to i8 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i8 = sext <2 x i1> undef to <2 x i8> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i8 = sext <4 x i1> undef to <4 x i8> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i8 = sext <8 x i1> undef to <8 x i8> @@ -814,18 +964,22 @@ define i32 @sext_vXi1() { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'sext_vXi1' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i1 undef to i64 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i1> undef to <2 x i64> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = sext <4 x i1> undef to <4 x i64> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = sext <8 x i1> undef to <8 x i64> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sext i1 undef to i32 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i1> undef to <2 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = sext <4 x i1> undef to <4 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sext <8 x i1> undef to <8 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i32 = sext <16 x i1> undef to <16 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sext i1 undef to i16 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i1> undef to <2 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i1> undef to <4 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = sext <8 x i1> undef to <8 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sext i1 undef to i8 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i8 = sext <2 x i1> undef to <2 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = sext <4 x i1> undef to <4 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i8 = sext <8 x i1> undef to <8 x i8> @@ -834,21 +988,25 @@ define i32 @sext_vXi1() { ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = sext <64 x i1> undef to <64 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; + %I64 = sext i1 undef to i64 %V2i64 = sext <2 x i1> undef to <2 x i64> %V4i64 = sext <4 x i1> undef to <4 x i64> %V8i64 = sext <8 x i1> undef to <8 x i64> + %I32 = sext i1 undef to i32 %V2i32 = sext <2 x i1> undef to <2 x i32> %V4i32 = sext <4 x i1> undef to <4 x i32> %V8i32 = sext <8 x i1> undef to <8 x i32> %V16i32 = sext <16 x i1> undef to <16 x i32> + %I16 = sext i1 undef to i16 %V2i16 = sext <2 x i1> undef to <2 x i16> %V4i16 = sext <4 x i1> undef to <4 x i16> %V8i16 = sext <8 x i1> undef to <8 x i16> %V16i16 = sext <16 x i1> undef to <16 x i16> %V32i16 = sext <32 x i1> undef to <32 x i16> + %I8 = sext i1 undef to i8 %V2i8 = sext <2 x i1> undef to <2 x i8> %V4i8 = sext <4 x i1> undef to <4 x i8> %V8i8 = sext <8 x i1> undef to <8 x i8> diff --git a/llvm/test/Analysis/CostModel/X86/fptosi.ll b/llvm/test/Analysis/CostModel/X86/fptosi.ll index ff4050872..35a86fb13 100644 --- a/llvm/test/Analysis/CostModel/X86/fptosi.ll +++ b/llvm/test/Analysis/CostModel/X86/fptosi.ll @@ -6,7 +6,7 @@ ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512DQ ; -; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=SLM ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=BTVER2 @@ -39,6 +39,13 @@ define i32 @fptosi_double_i64(i32 %arg) { ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptosi_double_i64' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64 +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptosi_double_i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64> @@ -75,6 +82,13 @@ define i32 @fptosi_double_i32(i32 %arg) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptosi_double_i32' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32 +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptosi_double_i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32> @@ -92,35 +106,35 @@ define i32 @fptosi_double_i32(i32 %arg) { define i32 @fptosi_double_i16(i32 %arg) { ; SSE-LABEL: 'fptosi_double_i16' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16 -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16> -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16> -; SSE-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16> +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16> +; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'fptosi_double_i16' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16 -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16> ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; AVX512F-LABEL: 'fptosi_double_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; AVX512-LABEL: 'fptosi_double_i16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; AVX512DQ-LABEL: 'fptosi_double_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16 -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SLM-LABEL: 'fptosi_double_i16' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'fptosi_double_i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -143,29 +157,29 @@ define i32 @fptosi_double_i8(i32 %arg) { ; AVX-LABEL: 'fptosi_double_i8' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8 ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8> +; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; AVX512F-LABEL: 'fptosi_double_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; AVX512-LABEL: 'fptosi_double_i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; AVX512DQ-LABEL: 'fptosi_double_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8 -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SLM-LABEL: 'fptosi_double_i8' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8 +; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'fptosi_double_i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I8 = fptosi double undef to i8 @@ -208,6 +222,14 @@ define i32 @fptosi_float_i64(i32 %arg) { ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptosi_float_i64' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64 +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptosi_float_i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64> @@ -232,6 +254,13 @@ define i32 @fptosi_float_i32(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptosi_float_i32' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi float undef to i32 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x float> undef to <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = fptosi <8 x float> undef to <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptosi_float_i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi float undef to i32 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x float> undef to <4 x i32> @@ -249,28 +278,35 @@ define i32 @fptosi_float_i32(i32 %arg) { define i32 @fptosi_float_i16(i32 %arg) { ; SSE-LABEL: 'fptosi_float_i16' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16> -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16> +; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'fptosi_float_i16' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16> ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'fptosi_float_i16' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptosi_float_i16' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptosi_float_i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -285,9 +321,9 @@ define i32 @fptosi_float_i16(i32 %arg) { define i32 @fptosi_float_i8(i32 %arg) { ; SSE-LABEL: 'fptosi_float_i8' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8> -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8> -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8> +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8> +; SSE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8> +; SSE-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'fptosi_float_i8' @@ -304,6 +340,13 @@ define i32 @fptosi_float_i8(i32 %arg) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptosi_float_i8' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8 +; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptosi_float_i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8> diff --git a/llvm/test/Analysis/CostModel/X86/fptoui.ll b/llvm/test/Analysis/CostModel/X86/fptoui.ll index f13781b4e..c3b2cb513 100644 --- a/llvm/test/Analysis/CostModel/X86/fptoui.ll +++ b/llvm/test/Analysis/CostModel/X86/fptoui.ll @@ -6,7 +6,7 @@ ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512DQ ; -; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=SLM ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=BTVER2 @@ -39,6 +39,13 @@ define i32 @fptoui_double_i64(i32 %arg) { ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptoui_double_i64' +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64 +; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptoui_double_i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64> @@ -68,19 +75,19 @@ define i32 @fptoui_double_i32(i32 %arg) { ; AVX-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; AVX512F-LABEL: 'fptoui_double_i32' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; AVX512-LABEL: 'fptoui_double_i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; AVX512DQ-LABEL: 'fptoui_double_i32' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32 -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SLM-LABEL: 'fptoui_double_i32' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32 +; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'fptoui_double_i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32 @@ -106,30 +113,30 @@ define i32 @fptoui_double_i16(i32 %arg) { ; ; AVX-LABEL: 'fptoui_double_i16' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16 -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16> -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16> -; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; AVX512F-LABEL: 'fptoui_double_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; AVX512-LABEL: 'fptoui_double_i16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; AVX512DQ-LABEL: 'fptoui_double_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16 -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SLM-LABEL: 'fptoui_double_i16' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16 +; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'fptoui_double_i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I16 = fptoui double undef to i16 @@ -154,19 +161,19 @@ define i32 @fptoui_double_i8(i32 %arg) { ; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; AVX512F-LABEL: 'fptoui_double_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; AVX512-LABEL: 'fptoui_double_i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; AVX512DQ-LABEL: 'fptoui_double_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8 -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SLM-LABEL: 'fptoui_double_i8' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8 +; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'fptoui_double_i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8 @@ -215,6 +222,14 @@ define i32 @fptoui_float_i64(i32 %arg) { ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptoui_float_i64' +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui float undef to i64 +; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 151 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptoui_float_i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui float undef to i64 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64> @@ -253,6 +268,13 @@ define i32 @fptoui_float_i32(i32 %arg) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptoui_float_i32' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui float undef to i32 +; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptoui_float_i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui float undef to i32 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32> @@ -277,7 +299,7 @@ define i32 @fptoui_float_i16(i32 %arg) { ; ; AVX-LABEL: 'fptoui_float_i16' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16 -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16> ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -289,9 +311,16 @@ define i32 @fptoui_float_i16(i32 %arg) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptoui_float_i16' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16 +; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptoui_float_i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -314,8 +343,8 @@ define i32 @fptoui_float_i8(i32 %arg) { ; AVX-LABEL: 'fptoui_float_i8' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8 ; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8> +; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8> +; AVX-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'fptoui_float_i8' @@ -325,11 +354,18 @@ define i32 @fptoui_float_i8(i32 %arg) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptoui_float_i8' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8 +; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptoui_float_i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I8 = fptoui float undef to i8 diff --git a/llvm/test/Analysis/CostModel/X86/fshl.ll b/llvm/test/Analysis/CostModel/X86/fshl.ll index 38621c35a..976fa750f 100644 --- a/llvm/test/Analysis/CostModel/X86/fshl.ll +++ b/llvm/test/Analysis/CostModel/X86/fshl.ll @@ -57,9 +57,9 @@ define void @var_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64 ; ; SLM-LABEL: 'var_funnel_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 %c64) -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %c128) -; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %c256) -; SLM-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %c512) +; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %c128) +; SLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %c256) +; SLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %c512) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'var_funnel_i64' @@ -385,9 +385,9 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) -; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) +; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) +; SLM-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) +; SLM-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatvar_funnel_i64' @@ -777,9 +777,9 @@ define void @constant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; ; SLM-LABEL: 'constant_funnel_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'constant_funnel_i64' @@ -1093,9 +1093,9 @@ define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 ; ; SLM-LABEL: 'splatconstant_funnel_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i64' diff --git a/llvm/test/Analysis/CostModel/X86/fshr.ll b/llvm/test/Analysis/CostModel/X86/fshr.ll index 31e409cc9..ef74f9d71 100644 --- a/llvm/test/Analysis/CostModel/X86/fshr.ll +++ b/llvm/test/Analysis/CostModel/X86/fshr.ll @@ -57,9 +57,9 @@ define void @var_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64 ; ; SLM-LABEL: 'var_funnel_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 %c64) -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %c128) -; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %c256) -; SLM-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %c512) +; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %c128) +; SLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %c256) +; SLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %c512) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'var_funnel_i64' @@ -385,9 +385,9 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) -; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) +; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) +; SLM-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) +; SLM-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatvar_funnel_i64' @@ -777,9 +777,9 @@ define void @constant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; ; SLM-LABEL: 'constant_funnel_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'constant_funnel_i64' @@ -1093,9 +1093,9 @@ define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 ; ; SLM-LABEL: 'splatconstant_funnel_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i64' diff --git a/llvm/test/Analysis/CostModel/X86/icmp.ll b/llvm/test/Analysis/CostModel/X86/icmp.ll index caf20d1b9..af84cf9fc 100644 --- a/llvm/test/Analysis/CostModel/X86/icmp.ll +++ b/llvm/test/Analysis/CostModel/X86/icmp.ll @@ -11,7 +11,7 @@ ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+xop,+avx | FileCheck %s -check-prefixes=CHECK,AVX,XOPAVX1 ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+xop,+avx2 | FileCheck %s -check-prefixes=CHECK,AVX,XOPAVX2 ; -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SLM ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=BTVER2 @@ -269,6 +269,29 @@ define i32 @cmp_int_eq(i32 %arg) { ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'cmp_int_eq' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp eq i8 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp eq <16 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp eq <32 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp eq <64 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp eq <128 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp eq i16 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp eq <8 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp eq <16 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp eq <32 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp eq <64 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp eq i32 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp eq <4 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp eq <8 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp eq <16 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp eq <32 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp eq i64 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp eq <2 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp eq <4 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp eq <8 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'cmp_int_eq' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp eq i8 undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp eq <16 x i8> undef, undef @@ -573,6 +596,29 @@ define i32 @cmp_int_ne(i32 %arg) { ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'cmp_int_ne' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'cmp_int_ne' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef @@ -877,6 +923,29 @@ define i32 @cmp_int_sge(i32 %arg) { ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'cmp_int_sge' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'cmp_int_sge' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef @@ -1181,6 +1250,29 @@ define i32 @cmp_int_uge(i32 %arg) { ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'cmp_int_uge' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'cmp_int_uge' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef @@ -1485,6 +1577,29 @@ define i32 @cmp_int_sgt(i32 %arg) { ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I64 = icmp sgt <16 x i64> undef, undef ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'cmp_int_sgt' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sgt i8 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sgt <16 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sgt <32 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sgt <64 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sgt <128 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sgt i16 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sgt <8 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sgt <16 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sgt <32 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sgt <64 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sgt i32 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sgt <4 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp sgt <8 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sgt <16 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sgt <32 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sgt i64 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp sgt <2 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp sgt <4 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp sgt <8 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp sgt <16 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'cmp_int_sgt' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sgt i8 undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sgt <16 x i8> undef, undef @@ -1789,6 +1904,29 @@ define i32 @cmp_int_ugt(i32 %arg) { ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'cmp_int_ugt' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'cmp_int_ugt' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef @@ -2093,6 +2231,29 @@ define i32 @cmp_int_sle(i32 %arg) { ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'cmp_int_sle' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'cmp_int_sle' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef @@ -2397,6 +2558,29 @@ define i32 @cmp_int_ule(i32 %arg) { ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'cmp_int_ule' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'cmp_int_ule' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef @@ -2701,6 +2885,29 @@ define i32 @cmp_int_slt(i32 %arg) { ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I64 = icmp slt <16 x i64> undef, undef ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'cmp_int_slt' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp slt i8 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp slt <16 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp slt <32 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp slt <64 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp slt <128 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp slt i16 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp slt <8 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp slt <16 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp slt <32 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp slt <64 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp slt i32 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp slt <4 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp slt <8 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp slt <16 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp slt <32 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp slt i64 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp slt <2 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp slt <4 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp slt <8 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp slt <16 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'cmp_int_slt' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp slt i8 undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp slt <16 x i8> undef, undef @@ -3005,6 +3212,29 @@ define i32 @cmp_int_ult(i32 %arg) { ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'cmp_int_ult' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'cmp_int_ult' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll index 050a83364..076bfa3c7 100644 --- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll @@ -52,7 +52,7 @@ define i32 @masked_load() { ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* undef, i32 1, <16 x i1> undef, <16 x i32> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef) @@ -79,7 +79,7 @@ define i32 @masked_load() { ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* undef, i32 1, <16 x i1> undef, <16 x i32> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef) @@ -106,15 +106,15 @@ define i32 @masked_load() { ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* undef, i32 1, <16 x i1> undef, <16 x i32> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* undef, i32 1, <8 x i1> undef, <8 x double> undef) @@ -194,7 +194,7 @@ define i32 @masked_store() { ; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> undef, <16 x i32>* undef, i32 1, <16 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> undef, <8 x i32>* undef, i32 1, <8 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> undef, <4 x i32>* undef, i32 1, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> undef, <8 x i16>* undef, i32 1, <8 x i1> undef) @@ -221,7 +221,7 @@ define i32 @masked_store() { ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> undef, <16 x i32>* undef, i32 1, <16 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> undef, <8 x i32>* undef, i32 1, <8 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> undef, <4 x i32>* undef, i32 1, <4 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> undef, <8 x i16>* undef, i32 1, <8 x i1> undef) @@ -248,15 +248,15 @@ define i32 @masked_store() { ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> undef, <16 x i32>* undef, i32 1, <16 x i1> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> undef, <8 x i32>* undef, i32 1, <8 x i1> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> undef, <4 x i32>* undef, i32 1, <4 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> undef, <8 x i16>* undef, i32 1, <8 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> undef, <4 x i16>* undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> undef, <4 x i16>* undef, i32 1, <4 x i1> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> undef, <16 x i8>* undef, i32 1, <16 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> undef, <8 x i8>* undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> undef, <8 x i8>* undef, i32 1, <8 x i1> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> undef, <8 x double>* undef, i32 1, <8 x i1> undef) @@ -960,15 +960,10 @@ define <8 x float> @test4(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> %d } define void @test5(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) { -; SSE2-LABEL: 'test5' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask) -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; SSE42-LABEL: 'test5' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask) -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; SSE-LABEL: 'test5' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'test5' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer @@ -986,24 +981,19 @@ define void @test5(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) { } define void @test6(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) { -; SSE2-LABEL: 'test6' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask) -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; SSE42-LABEL: 'test6' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask) -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; SSE-LABEL: 'test6' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'test6' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test6' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %mask = icmp eq <2 x i32> %trigger, zeroinitializer @@ -1012,15 +1002,10 @@ define void @test6(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) { } define <2 x float> @test7(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %dst) { -; SSE2-LABEL: 'test7' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res -; -; SSE42-LABEL: 'test7' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res +; SSE-LABEL: 'test7' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res ; ; AVX-LABEL: 'test7' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer @@ -1038,24 +1023,19 @@ define <2 x float> @test7(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %d } define <2 x i32> @test8(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) { -; SSE2-LABEL: 'test8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res -; -; SSE42-LABEL: 'test8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res +; SSE-LABEL: 'test8' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res ; ; AVX-LABEL: 'test8' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res ; ; AVX512-LABEL: 'test8' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res ; %mask = icmp eq <2 x i32> %trigger, zeroinitializer diff --git a/llvm/test/Analysis/CostModel/X86/min-legal-vector-width.ll b/llvm/test/Analysis/CostModel/X86/min-legal-vector-width.ll index 028ed87a9..1f354e2cf 100644 --- a/llvm/test/Analysis/CostModel/X86/min-legal-vector-width.ll +++ b/llvm/test/Analysis/CostModel/X86/min-legal-vector-width.ll @@ -5,7 +5,7 @@ define void @zext256() "min-legal-vector-width"="256" { ; VEC256-LABEL: 'zext256' -; VEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %A = zext <8 x i16> undef to <8 x i64> +; VEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A = zext <8 x i16> undef to <8 x i64> ; VEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B = zext <8 x i32> undef to <8 x i64> ; VEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C = zext <16 x i8> undef to <16 x i32> ; VEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D = zext <16 x i16> undef to <16 x i32> @@ -30,7 +30,7 @@ define void @zext256() "min-legal-vector-width"="256" { define void @zext512() "min-legal-vector-width"="512" { ; AVX-LABEL: 'zext512' -; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %A = zext <8 x i16> undef to <8 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A = zext <8 x i16> undef to <8 x i64> ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B = zext <8 x i32> undef to <8 x i64> ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C = zext <16 x i8> undef to <16 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D = zext <16 x i16> undef to <16 x i32> @@ -63,8 +63,8 @@ define void @zext512() "min-legal-vector-width"="512" { define void @sext256() "min-legal-vector-width"="256" { ; VEC256-LABEL: 'sext256' -; VEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %A = sext <8 x i8> undef to <8 x i64> -; VEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %B = sext <8 x i16> undef to <8 x i64> +; VEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A = sext <8 x i8> undef to <8 x i64> +; VEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B = sext <8 x i16> undef to <8 x i64> ; VEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C = sext <8 x i32> undef to <8 x i64> ; VEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D = sext <16 x i8> undef to <16 x i32> ; VEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %E = sext <16 x i16> undef to <16 x i32> @@ -72,7 +72,7 @@ define void @sext256() "min-legal-vector-width"="256" { ; VEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; VEC512-LABEL: 'sext256' -; VEC512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %A = sext <8 x i8> undef to <8 x i64> +; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A = sext <8 x i8> undef to <8 x i64> ; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B = sext <8 x i16> undef to <8 x i64> ; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C = sext <8 x i32> undef to <8 x i64> ; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D = sext <16 x i8> undef to <16 x i32> @@ -91,8 +91,8 @@ define void @sext256() "min-legal-vector-width"="256" { define void @sext512() "min-legal-vector-width"="512" { ; AVX-LABEL: 'sext512' -; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %A = sext <8 x i8> undef to <8 x i64> -; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %B = sext <8 x i16> undef to <8 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A = sext <8 x i8> undef to <8 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B = sext <8 x i16> undef to <8 x i64> ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C = sext <8 x i32> undef to <8 x i64> ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D = sext <16 x i8> undef to <16 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %E = sext <16 x i16> undef to <16 x i32> @@ -100,7 +100,7 @@ define void @sext512() "min-legal-vector-width"="512" { ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SKX256-LABEL: 'sext512' -; SKX256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %A = sext <8 x i8> undef to <8 x i64> +; SKX256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A = sext <8 x i8> undef to <8 x i64> ; SKX256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B = sext <8 x i16> undef to <8 x i64> ; SKX256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C = sext <8 x i32> undef to <8 x i64> ; SKX256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D = sext <16 x i8> undef to <16 x i32> @@ -109,7 +109,7 @@ define void @sext512() "min-legal-vector-width"="512" { ; SKX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; VEC512-LABEL: 'sext512' -; VEC512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %A = sext <8 x i8> undef to <8 x i64> +; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A = sext <8 x i8> undef to <8 x i64> ; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B = sext <8 x i16> undef to <8 x i64> ; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C = sext <8 x i32> undef to <8 x i64> ; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D = sext <16 x i8> undef to <16 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/reduce-add.ll b/llvm/test/Analysis/CostModel/X86/reduce-add.ll index 066ddb77a..b5729eac4 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-add.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-add.ll @@ -8,30 +8,16 @@ ; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW ; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512DQ +; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mcpu=slm | FileCheck %s --check-prefixes=SLM + define i32 @reduce_i64(i32 %arg) { -; SSE2-LABEL: 'reduce_i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; SSSE3-LABEL: 'reduce_i64' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; SSE42-LABEL: 'reduce_i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE-LABEL: 'reduce_i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'reduce_i64' ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) @@ -48,6 +34,14 @@ define i32 @reduce_i64(i32 %arg) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SLM-LABEL: 'reduce_i64' +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) @@ -58,32 +52,16 @@ define i32 @reduce_i64(i32 %arg) { } define i32 @reduce_i32(i32 %arg) { -; SSE2-LABEL: 'reduce_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; SSSE3-LABEL: 'reduce_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; SSE42-LABEL: 'reduce_i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE-LABEL: 'reduce_i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'reduce_i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef) @@ -91,12 +69,20 @@ define i32 @reduce_i32(i32 %arg) { ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SLM-LABEL: 'reduce_i32' +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef) %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef) @@ -107,77 +93,59 @@ define i32 @reduce_i32(i32 %arg) { } define i32 @reduce_i16(i32 %arg) { -; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; SSE42-LABEL: 'reduce_i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; AVX1-LABEL: 'reduce_i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; AVX2-LABEL: 'reduce_i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE-LABEL: 'reduce_i16' +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX-LABEL: 'reduce_i16' +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SLM-LABEL: 'reduce_i16' +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) @@ -189,85 +157,65 @@ define i32 @reduce_i16(i32 %arg) { } define i32 @reduce_i8(i32 %arg) { -; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; SSE42-LABEL: 'reduce_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE-LABEL: 'reduce_i8' +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX-LABEL: 'reduce_i8' +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SLM-LABEL: 'reduce_i8' +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-and.ll b/llvm/test/Analysis/CostModel/X86/reduce-and.ll index 77e8ee57e..455f7326f 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-and.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-and.ll @@ -92,8 +92,8 @@ define i32 @reduce_i32(i32 %arg) { define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef) @@ -174,9 +174,9 @@ define i32 @reduce_i16(i32 %arg) { define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef) @@ -299,35 +299,35 @@ define i32 @reduce_i1(i32 %arg) { ; ; AVX512F-LABEL: 'reduce_i1' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i1' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 775 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 776 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i1' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V1 = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> undef) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-mul.ll b/llvm/test/Analysis/CostModel/X86/reduce-mul.ll index ce94216f4..42c9b5ced 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-mul.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-mul.ll @@ -67,7 +67,7 @@ define i32 @reduce_i64(i32 %arg) { define i32 @reduce_i32(i32 %arg) { ; SSE2-LABEL: 'reduce_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef) @@ -75,7 +75,7 @@ define i32 @reduce_i32(i32 %arg) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef) @@ -83,7 +83,7 @@ define i32 @reduce_i32(i32 %arg) { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef) @@ -91,7 +91,7 @@ define i32 @reduce_i32(i32 %arg) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef) @@ -99,36 +99,20 @@ define i32 @reduce_i32(i32 %arg) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; AVX512F-LABEL: 'reduce_i32' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; AVX512BW-LABEL: 'reduce_i32' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; AVX512DQ-LABEL: 'reduce_i32' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; AVX512-LABEL: 'reduce_i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef) %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef) @@ -140,8 +124,8 @@ define i32 @reduce_i32(i32 %arg) { define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef) @@ -149,8 +133,8 @@ define i32 @reduce_i16(i32 %arg) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef) @@ -158,8 +142,8 @@ define i32 @reduce_i16(i32 %arg) { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef) @@ -167,8 +151,8 @@ define i32 @reduce_i16(i32 %arg) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef) @@ -176,8 +160,8 @@ define i32 @reduce_i16(i32 %arg) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef) @@ -185,7 +169,7 @@ define i32 @reduce_i16(i32 %arg) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef) @@ -194,7 +178,7 @@ define i32 @reduce_i16(i32 %arg) { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef) @@ -222,9 +206,9 @@ define i32 @reduce_i16(i32 %arg) { define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef) @@ -232,9 +216,9 @@ define i32 @reduce_i8(i32 %arg) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef) @@ -242,9 +226,9 @@ define i32 @reduce_i8(i32 %arg) { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef) @@ -252,9 +236,9 @@ define i32 @reduce_i8(i32 %arg) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 171 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef) @@ -262,9 +246,9 @@ define i32 @reduce_i8(i32 %arg) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef) @@ -272,9 +256,9 @@ define i32 @reduce_i8(i32 %arg) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef) @@ -282,9 +266,9 @@ define i32 @reduce_i8(i32 %arg) { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef) @@ -292,9 +276,9 @@ define i32 @reduce_i8(i32 %arg) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-or.ll b/llvm/test/Analysis/CostModel/X86/reduce-or.ll index 16978e518..ee05562dc 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-or.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-or.ll @@ -92,8 +92,8 @@ define i32 @reduce_i32(i32 %arg) { define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef) @@ -174,9 +174,9 @@ define i32 @reduce_i16(i32 %arg) { define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef) @@ -299,35 +299,35 @@ define i32 @reduce_i1(i32 %arg) { ; ; AVX512F-LABEL: 'reduce_i1' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i1' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 775 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 776 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i1' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V1 = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> undef) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-smax.ll b/llvm/test/Analysis/CostModel/X86/reduce-smax.ll index 54613791e..a578ad67f 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-smax.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-smax.ll @@ -83,7 +83,7 @@ define i32 @reduce_i32(i32 %arg) { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef) @@ -91,7 +91,7 @@ define i32 @reduce_i32(i32 %arg) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef) @@ -99,7 +99,7 @@ define i32 @reduce_i32(i32 %arg) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef) @@ -107,7 +107,7 @@ define i32 @reduce_i32(i32 %arg) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef) @@ -124,8 +124,8 @@ define i32 @reduce_i32(i32 %arg) { define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef) @@ -133,8 +133,8 @@ define i32 @reduce_i16(i32 %arg) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef) @@ -142,7 +142,7 @@ define i32 @reduce_i16(i32 %arg) { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) @@ -151,7 +151,7 @@ define i32 @reduce_i16(i32 %arg) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) @@ -160,7 +160,7 @@ define i32 @reduce_i16(i32 %arg) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) @@ -169,7 +169,7 @@ define i32 @reduce_i16(i32 %arg) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) @@ -178,7 +178,7 @@ define i32 @reduce_i16(i32 %arg) { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) @@ -187,7 +187,7 @@ define i32 @reduce_i16(i32 %arg) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) @@ -206,8 +206,8 @@ define i32 @reduce_i16(i32 %arg) { define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) @@ -216,8 +216,8 @@ define i32 @reduce_i8(i32 %arg) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) @@ -226,9 +226,9 @@ define i32 @reduce_i8(i32 %arg) { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef) @@ -236,9 +236,9 @@ define i32 @reduce_i8(i32 %arg) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef) @@ -246,9 +246,9 @@ define i32 @reduce_i8(i32 %arg) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef) @@ -256,9 +256,9 @@ define i32 @reduce_i8(i32 %arg) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef) @@ -266,9 +266,9 @@ define i32 @reduce_i8(i32 %arg) { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef) @@ -276,9 +276,9 @@ define i32 @reduce_i8(i32 %arg) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-smin.ll b/llvm/test/Analysis/CostModel/X86/reduce-smin.ll index 777ac088e..01263daea 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-smin.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-smin.ll @@ -83,7 +83,7 @@ define i32 @reduce_i32(i32 %arg) { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef) @@ -91,7 +91,7 @@ define i32 @reduce_i32(i32 %arg) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef) @@ -99,7 +99,7 @@ define i32 @reduce_i32(i32 %arg) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef) @@ -107,7 +107,7 @@ define i32 @reduce_i32(i32 %arg) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef) @@ -124,8 +124,8 @@ define i32 @reduce_i32(i32 %arg) { define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef) @@ -133,8 +133,8 @@ define i32 @reduce_i16(i32 %arg) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef) @@ -142,7 +142,7 @@ define i32 @reduce_i16(i32 %arg) { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) @@ -151,7 +151,7 @@ define i32 @reduce_i16(i32 %arg) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) @@ -160,7 +160,7 @@ define i32 @reduce_i16(i32 %arg) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) @@ -169,7 +169,7 @@ define i32 @reduce_i16(i32 %arg) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) @@ -178,7 +178,7 @@ define i32 @reduce_i16(i32 %arg) { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) @@ -187,7 +187,7 @@ define i32 @reduce_i16(i32 %arg) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) @@ -206,8 +206,8 @@ define i32 @reduce_i16(i32 %arg) { define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) @@ -216,8 +216,8 @@ define i32 @reduce_i8(i32 %arg) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) @@ -226,9 +226,9 @@ define i32 @reduce_i8(i32 %arg) { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef) @@ -236,9 +236,9 @@ define i32 @reduce_i8(i32 %arg) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef) @@ -246,9 +246,9 @@ define i32 @reduce_i8(i32 %arg) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef) @@ -256,9 +256,9 @@ define i32 @reduce_i8(i32 %arg) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef) @@ -266,9 +266,9 @@ define i32 @reduce_i8(i32 %arg) { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef) @@ -276,9 +276,9 @@ define i32 @reduce_i8(i32 %arg) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-umax.ll b/llvm/test/Analysis/CostModel/X86/reduce-umax.ll index 1259bdf8c..f77915e1b 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-umax.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-umax.ll @@ -83,7 +83,7 @@ define i32 @reduce_i32(i32 %arg) { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef) @@ -91,7 +91,7 @@ define i32 @reduce_i32(i32 %arg) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef) @@ -99,7 +99,7 @@ define i32 @reduce_i32(i32 %arg) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef) @@ -107,7 +107,7 @@ define i32 @reduce_i32(i32 %arg) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef) @@ -124,8 +124,8 @@ define i32 @reduce_i32(i32 %arg) { define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef) @@ -133,8 +133,8 @@ define i32 @reduce_i16(i32 %arg) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef) @@ -142,7 +142,7 @@ define i32 @reduce_i16(i32 %arg) { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) @@ -151,7 +151,7 @@ define i32 @reduce_i16(i32 %arg) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) @@ -160,7 +160,7 @@ define i32 @reduce_i16(i32 %arg) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) @@ -169,7 +169,7 @@ define i32 @reduce_i16(i32 %arg) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) @@ -178,7 +178,7 @@ define i32 @reduce_i16(i32 %arg) { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) @@ -187,7 +187,7 @@ define i32 @reduce_i16(i32 %arg) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) @@ -206,9 +206,9 @@ define i32 @reduce_i16(i32 %arg) { define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) @@ -216,9 +216,9 @@ define i32 @reduce_i8(i32 %arg) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) @@ -226,9 +226,9 @@ define i32 @reduce_i8(i32 %arg) { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) @@ -236,9 +236,9 @@ define i32 @reduce_i8(i32 %arg) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) @@ -246,9 +246,9 @@ define i32 @reduce_i8(i32 %arg) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) @@ -256,9 +256,9 @@ define i32 @reduce_i8(i32 %arg) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) @@ -266,9 +266,9 @@ define i32 @reduce_i8(i32 %arg) { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) @@ -276,9 +276,9 @@ define i32 @reduce_i8(i32 %arg) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-umin.ll b/llvm/test/Analysis/CostModel/X86/reduce-umin.ll index b946310e8..ce35e8b24 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-umin.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-umin.ll @@ -83,7 +83,7 @@ define i32 @reduce_i32(i32 %arg) { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef) @@ -91,7 +91,7 @@ define i32 @reduce_i32(i32 %arg) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef) @@ -99,7 +99,7 @@ define i32 @reduce_i32(i32 %arg) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef) @@ -107,7 +107,7 @@ define i32 @reduce_i32(i32 %arg) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef) @@ -124,8 +124,8 @@ define i32 @reduce_i32(i32 %arg) { define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef) @@ -133,8 +133,8 @@ define i32 @reduce_i16(i32 %arg) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef) @@ -142,7 +142,7 @@ define i32 @reduce_i16(i32 %arg) { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) @@ -151,7 +151,7 @@ define i32 @reduce_i16(i32 %arg) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) @@ -160,7 +160,7 @@ define i32 @reduce_i16(i32 %arg) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) @@ -169,7 +169,7 @@ define i32 @reduce_i16(i32 %arg) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) @@ -178,7 +178,7 @@ define i32 @reduce_i16(i32 %arg) { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) @@ -187,7 +187,7 @@ define i32 @reduce_i16(i32 %arg) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) @@ -206,9 +206,9 @@ define i32 @reduce_i16(i32 %arg) { define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) @@ -216,9 +216,9 @@ define i32 @reduce_i8(i32 %arg) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) @@ -226,9 +226,9 @@ define i32 @reduce_i8(i32 %arg) { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) @@ -236,9 +236,9 @@ define i32 @reduce_i8(i32 %arg) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) @@ -246,9 +246,9 @@ define i32 @reduce_i8(i32 %arg) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) @@ -256,9 +256,9 @@ define i32 @reduce_i8(i32 %arg) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) @@ -266,9 +266,9 @@ define i32 @reduce_i8(i32 %arg) { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) @@ -276,9 +276,9 @@ define i32 @reduce_i8(i32 %arg) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-xor.ll b/llvm/test/Analysis/CostModel/X86/reduce-xor.ll index a97fc21f3..df927b436 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-xor.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-xor.ll @@ -92,8 +92,8 @@ define i32 @reduce_i32(i32 %arg) { define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef) @@ -174,9 +174,9 @@ define i32 @reduce_i16(i32 %arg) { define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/X86/reduction.ll b/llvm/test/Analysis/CostModel/X86/reduction.ll index 0acb16c0c..ac3730205 100644 --- a/llvm/test/Analysis/CostModel/X86/reduction.ll +++ b/llvm/test/Analysis/CostModel/X86/reduction.ll @@ -5,6 +5,8 @@ ; RUN: opt < %s -cost-model -costmodel-reduxcost=true -mtriple=x86_64-apple-darwin -analyze -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 ; RUN: opt < %s -cost-model -costmodel-reduxcost=true -mtriple=x86_64-apple-darwin -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 +; RUN: opt < %s -cost-model -costmodel-reduxcost=true -mtriple=x86_64-apple-darwin -analyze -mcpu=slm | FileCheck %s --check-prefixes=SLM + ; Check that we recognize the tree starting at the extractelement as a ; reduction. ; NOTE: We're only really interested in the extractelement cost, which represents the entire reduction. @@ -15,7 +17,7 @@ define fastcc float @reduction_cost_float(<4 x float> %rdx) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; SSSE3-LABEL: 'reduction_cost_float' @@ -23,7 +25,7 @@ define fastcc float @reduction_cost_float(<4 x float> %rdx) { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; SSE42-LABEL: 'reduction_cost_float' @@ -41,6 +43,14 @@ define fastcc float @reduction_cost_float(<4 x float> %rdx) { ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r +; +; SLM-LABEL: 'reduction_cost_float' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf @@ -81,6 +91,16 @@ define fastcc i32 @reduction_cost_int(<8 x i32> %rdx) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.3 = add <8 x i32> %bin.rdx.2, %rdx.shuf.3 ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <8 x i32> %bin.rdx.3, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r +; +; SLM-LABEL: 'reduction_cost_int' +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = add <8 x i32> %rdx, %rdx.shuf +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.2 = add <8 x i32> %bin.rdx, %rdx.shuf.2 +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.3 = shufflevector <8 x i32> %bin.rdx.2, <8 x i32> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.3 = add <8 x i32> %bin.rdx.2, %rdx.shuf.3 +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r = extractelement <8 x i32> %bin.rdx.3, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r ; %rdx.shuf = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> %rdx, float %f1) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r2 = fadd float %r, %f1 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2 ; @@ -118,7 +138,7 @@ define fastcc float @pairwise_hadd(<4 x float> %rdx, float %f1) { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r2 = fadd float %r, %f1 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2 ; @@ -143,6 +163,17 @@ define fastcc float @pairwise_hadd(<4 x float> %rdx, float %f1) { ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r2 = fadd float %r, %f1 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2 +; +; SLM-LABEL: 'pairwise_hadd' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r2 = fadd float %r, %f1 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2 ; %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> @@ -168,7 +199,7 @@ define fastcc float @pairwise_hadd_assoc(<4 x float> %rdx, float %f1) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r2 = fadd float %r, %f1 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2 ; @@ -179,7 +210,7 @@ define fastcc float @pairwise_hadd_assoc(<4 x float> %rdx, float %f1) { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r2 = fadd float %r, %f1 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2 ; @@ -204,6 +235,17 @@ define fastcc float @pairwise_hadd_assoc(<4 x float> %rdx, float %f1) { ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r2 = fadd float %r, %f1 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2 +; +; SLM-LABEL: 'pairwise_hadd_assoc' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.1, %rdx.shuf.0.0 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r2 = fadd float %r, %f1 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2 ; %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> @@ -228,7 +270,7 @@ define fastcc float @pairwise_hadd_skip_first(<4 x float> %rdx, float %f1) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.1 = fadd <4 x float> %bin.rdx.0, %rdx.shuf.1.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r2 = fadd float %r, %f1 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2 ; @@ -238,7 +280,7 @@ define fastcc float @pairwise_hadd_skip_first(<4 x float> %rdx, float %f1) { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.1 = fadd <4 x float> %bin.rdx.0, %rdx.shuf.1.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r2 = fadd float %r, %f1 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2 ; @@ -261,6 +303,16 @@ define fastcc float @pairwise_hadd_skip_first(<4 x float> %rdx, float %f1) { ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r2 = fadd float %r, %f1 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2 +; +; SLM-LABEL: 'pairwise_hadd_skip_first' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.1 = fadd <4 x float> %bin.rdx.0, %rdx.shuf.1.1 +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r2 = fadd float %r, %f1 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2 ; %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> @@ -280,13 +332,13 @@ define fastcc double @no_pairwise_reduction2double(<2 x double> %rdx, double %f1 ; SSE2-LABEL: 'no_pairwise_reduction2double' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <2 x double> %rdx, %rdx.shuf -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <2 x double> %bin.rdx, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = extractelement <2 x double> %bin.rdx, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r ; ; SSSE3-LABEL: 'no_pairwise_reduction2double' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <2 x double> %rdx, %rdx.shuf -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <2 x double> %bin.rdx, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = extractelement <2 x double> %bin.rdx, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r ; ; SSE42-LABEL: 'no_pairwise_reduction2double' @@ -300,6 +352,12 @@ define fastcc double @no_pairwise_reduction2double(<2 x double> %rdx, double %f1 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = fadd <2 x double> %rdx, %rdx.shuf ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = extractelement <2 x double> %bin.rdx, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r +; +; SLM-LABEL: 'no_pairwise_reduction2double' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <2 x double> %rdx, %rdx.shuf +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <2 x double> %bin.rdx, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r ; %rdx.shuf = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> %bin.rdx = fadd <2 x double> %rdx, %rdx.shuf @@ -314,7 +372,7 @@ define fastcc float @no_pairwise_reduction4float(<4 x float> %rdx, float %f1) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; SSSE3-LABEL: 'no_pairwise_reduction4float' @@ -322,7 +380,7 @@ define fastcc float @no_pairwise_reduction4float(<4 x float> %rdx, float %f1) { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; SSE42-LABEL: 'no_pairwise_reduction4float' @@ -340,6 +398,14 @@ define fastcc float @no_pairwise_reduction4float(<4 x float> %rdx, float %f1) { ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r +; +; SLM-LABEL: 'no_pairwise_reduction4float' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf @@ -356,7 +422,7 @@ define fastcc double @no_pairwise_reduction4double(<4 x double> %rdx, double %f1 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = fadd <4 x double> %rdx, %rdx.shuf ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf7 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = fadd <4 x double> %bin.rdx, %rdx.shuf7 -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r ; ; SSSE3-LABEL: 'no_pairwise_reduction4double' @@ -364,7 +430,7 @@ define fastcc double @no_pairwise_reduction4double(<4 x double> %rdx, double %f1 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = fadd <4 x double> %rdx, %rdx.shuf ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf7 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = fadd <4 x double> %bin.rdx, %rdx.shuf7 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r ; ; SSE42-LABEL: 'no_pairwise_reduction4double' @@ -390,6 +456,14 @@ define fastcc double @no_pairwise_reduction4double(<4 x double> %rdx, double %f1 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = fadd <4 x double> %bin.rdx, %rdx.shuf7 ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r +; +; SLM-LABEL: 'no_pairwise_reduction4double' +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = fadd <4 x double> %rdx, %rdx.shuf +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf7 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = fadd <4 x double> %bin.rdx, %rdx.shuf7 +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r ; %rdx.shuf = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> %bin.rdx = fadd <4 x double> %rdx, %rdx.shuf @@ -450,6 +524,16 @@ define fastcc float @no_pairwise_reduction8float(<8 x float> %rdx, float %f1) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = fadd <8 x float> %bin.rdx, %rdx.shuf7 ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <8 x float> %bin.rdx8, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r +; +; SLM-LABEL: 'no_pairwise_reduction8float' +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf3 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx4 = fadd <8 x float> %rdx, %rdx.shuf3 +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf = shufflevector <8 x float> %bin.rdx4, <8 x float> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <8 x float> %bin.rdx4, %rdx.shuf +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf7 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <8 x float> %bin.rdx, %rdx.shuf7 +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r = extractelement <8 x float> %bin.rdx8, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; %rdx.shuf3 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> %bin.rdx4 = fadd <8 x float> %rdx, %rdx.shuf3 @@ -463,29 +547,23 @@ define fastcc float @no_pairwise_reduction8float(<8 x float> %rdx, float %f1) { } define fastcc i64 @no_pairwise_reduction2i64(<2 x i64> %rdx, i64 %f1) { -; SSE2-LABEL: 'no_pairwise_reduction2i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <2 x i64> %rdx, %rdx.shuf -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <2 x i64> %bin.rdx, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r -; -; SSSE3-LABEL: 'no_pairwise_reduction2i64' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <2 x i64> %rdx, %rdx.shuf -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <2 x i64> %bin.rdx, i32 0 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r -; -; SSE42-LABEL: 'no_pairwise_reduction2i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <2 x i64> %rdx, %rdx.shuf -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = extractelement <2 x i64> %bin.rdx, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r +; SSE-LABEL: 'no_pairwise_reduction2i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <2 x i64> %rdx, %rdx.shuf +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = extractelement <2 x i64> %bin.rdx, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r ; ; AVX-LABEL: 'no_pairwise_reduction2i64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <2 x i64> %rdx, %rdx.shuf ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <2 x i64> %bin.rdx, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r +; +; SLM-LABEL: 'no_pairwise_reduction2i64' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = add <2 x i64> %rdx, %rdx.shuf +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <2 x i64> %bin.rdx, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r ; %rdx.shuf = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> %bin.rdx = add <2 x i64> %rdx, %rdx.shuf @@ -495,37 +573,21 @@ define fastcc i64 @no_pairwise_reduction2i64(<2 x i64> %rdx, i64 %f1) { } define fastcc i32 @no_pairwise_reduction4i32(<4 x i32> %rdx, i32 %f1) { -; SSE2-LABEL: 'no_pairwise_reduction4i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <4 x i32> %rdx, %rdx.shuf -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <4 x i32> %bin.rdx, %rdx.shuf7 -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <4 x i32> %bin.rdx8, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r -; -; SSSE3-LABEL: 'no_pairwise_reduction4i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <4 x i32> %rdx, %rdx.shuf -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <4 x i32> %bin.rdx, %rdx.shuf7 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <4 x i32> %bin.rdx8, i32 0 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r -; -; SSE42-LABEL: 'no_pairwise_reduction4i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <4 x i32> %rdx, %rdx.shuf -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <4 x i32> %bin.rdx, %rdx.shuf7 -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <4 x i32> %bin.rdx8, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r -; -; AVX-LABEL: 'no_pairwise_reduction4i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <4 x i32> %rdx, %rdx.shuf -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <4 x i32> %bin.rdx, %rdx.shuf7 -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <4 x i32> %bin.rdx8, i32 0 -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r +; CHECK-LABEL: 'no_pairwise_reduction4i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <4 x i32> %rdx, %rdx.shuf +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <4 x i32> %bin.rdx, %rdx.shuf7 +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <4 x i32> %bin.rdx8, i32 0 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r +; +; SLM-LABEL: 'no_pairwise_reduction4i32' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <4 x i32> %rdx, %rdx.shuf +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <4 x i32> %bin.rdx, %rdx.shuf7 +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <4 x i32> %bin.rdx8, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r ; %rdx.shuf = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> %bin.rdx = add <4 x i32> %rdx, %rdx.shuf @@ -560,6 +622,14 @@ define fastcc i64 @no_pairwise_reduction4i64(<4 x i64> %rdx, i64 %f1) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <4 x i64> %bin.rdx, %rdx.shuf7 ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <4 x i64> %bin.rdx8, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r +; +; SLM-LABEL: 'no_pairwise_reduction4i64' +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %bin.rdx = add <4 x i64> %rdx, %rdx.shuf +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf7 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %bin.rdx8 = add <4 x i64> %bin.rdx, %rdx.shuf7 +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r = extractelement <4 x i64> %bin.rdx8, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r ; %rdx.shuf = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> %bin.rdx = add <4 x i64> %rdx, %rdx.shuf @@ -578,7 +648,7 @@ define fastcc i16 @no_pairwise_reduction8i16(<8 x i16> %rdx, i16 %f1) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <8 x i16> %bin.rdx4, %rdx.shuf ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf7 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i16> %bin.rdx, %rdx.shuf7 -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %r = extractelement <8 x i16> %bin.rdx8, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <8 x i16> %bin.rdx8, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r ; ; SSSE3-LABEL: 'no_pairwise_reduction8i16' @@ -588,7 +658,7 @@ define fastcc i16 @no_pairwise_reduction8i16(<8 x i16> %rdx, i16 %f1) { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <8 x i16> %bin.rdx4, %rdx.shuf ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i16> %bin.rdx, %rdx.shuf7 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r = extractelement <8 x i16> %bin.rdx8, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <8 x i16> %bin.rdx8, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r ; ; SSE42-LABEL: 'no_pairwise_reduction8i16' @@ -610,6 +680,16 @@ define fastcc i16 @no_pairwise_reduction8i16(<8 x i16> %rdx, i16 %f1) { ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i16> %bin.rdx, %rdx.shuf7 ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <8 x i16> %bin.rdx8, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r +; +; SLM-LABEL: 'no_pairwise_reduction8i16' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf3 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx4 = add <8 x i16> %rdx, %rdx.shuf3 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <8 x i16> %bin.rdx4, <8 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <8 x i16> %bin.rdx4, %rdx.shuf +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i16> %bin.rdx, %rdx.shuf7 +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <8 x i16> %bin.rdx8, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r ; %rdx.shuf3 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> %bin.rdx4 = add <8 x i16> %rdx, %rdx.shuf3 @@ -652,6 +732,16 @@ define fastcc i32 @no_pairwise_reduction8i32(<8 x i32> %rdx, i32 %f1) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i32> %bin.rdx, %rdx.shuf7 ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <8 x i32> %bin.rdx8, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r +; +; SLM-LABEL: 'no_pairwise_reduction8i32' +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf3 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx4 = add <8 x i32> %rdx, %rdx.shuf3 +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf = shufflevector <8 x i32> %bin.rdx4, <8 x i32> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = add <8 x i32> %bin.rdx4, %rdx.shuf +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf7 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = add <8 x i32> %bin.rdx, %rdx.shuf7 +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r = extractelement <8 x i32> %bin.rdx8, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r ; %rdx.shuf3 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> %bin.rdx4 = add <8 x i32> %rdx, %rdx.shuf3 @@ -669,14 +759,14 @@ define fastcc double @pairwise_reduction2double(<2 x double> %rdx, double %f1) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <2 x double> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <2 x double> %bin.rdx8, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = extractelement <2 x double> %bin.rdx8, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r ; ; SSSE3-LABEL: 'pairwise_reduction2double' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <2 x double> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <2 x double> %bin.rdx8, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = extractelement <2 x double> %bin.rdx8, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r ; ; SSE42-LABEL: 'pairwise_reduction2double' @@ -692,6 +782,13 @@ define fastcc double @pairwise_reduction2double(<2 x double> %rdx, double %f1) { ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = fadd <2 x double> %rdx.shuf.1.0, %rdx.shuf.1.1 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = extractelement <2 x double> %bin.rdx8, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r +; +; SLM-LABEL: 'pairwise_reduction2double' +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <2 x double> %rdx.shuf.1.0, %rdx.shuf.1.1 +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <2 x double> %bin.rdx8, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r ; %rdx.shuf.1.0 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> %rdx.shuf.1.1 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> @@ -709,7 +806,7 @@ define fastcc float @pairwise_reduction4float(<4 x float> %rdx, float %f1) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; SSSE3-LABEL: 'pairwise_reduction4float' @@ -719,7 +816,7 @@ define fastcc float @pairwise_reduction4float(<4 x float> %rdx, float %f1) { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; SSE42-LABEL: 'pairwise_reduction4float' @@ -741,6 +838,16 @@ define fastcc float @pairwise_reduction4float(<4 x float> %rdx, float %f1) { ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r +; +; SLM-LABEL: 'pairwise_reduction4float' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> @@ -761,7 +868,7 @@ define fastcc double @pairwise_reduction4double(<4 x double> %rdx, double %f1) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.1 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = fadd <4 x double> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r ; ; SSSE3-LABEL: 'pairwise_reduction4double' @@ -771,7 +878,7 @@ define fastcc double @pairwise_reduction4double(<4 x double> %rdx, double %f1) { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.1 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = fadd <4 x double> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r ; ; SSE42-LABEL: 'pairwise_reduction4double' @@ -803,6 +910,16 @@ define fastcc double @pairwise_reduction4double(<4 x double> %rdx, double %f1) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = fadd <4 x double> %rdx.shuf.1.0, %rdx.shuf.1.1 ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r +; +; SLM-LABEL: 'pairwise_reduction4double' +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.0 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.1 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = fadd <4 x double> %rdx.shuf.0.0, %rdx.shuf.0.1 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.1 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = fadd <4 x double> %rdx.shuf.1.0, %rdx.shuf.1.1 +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r ; %rdx.shuf.0.0 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> %rdx.shuf.0.1 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> @@ -826,7 +943,7 @@ define fastcc float @pairwise_reduction8float(<8 x float> %rdx, float %f1) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2.1 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx9 = fadd <8 x float> %rdx.shuf.2.0, %rdx.shuf.2.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %r = extractelement <8 x float> %bin.rdx9, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r = extractelement <8 x float> %bin.rdx9, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; SSSE3-LABEL: 'pairwise_reduction8float' @@ -839,7 +956,7 @@ define fastcc float @pairwise_reduction8float(<8 x float> %rdx, float %f1) { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2.1 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx9 = fadd <8 x float> %rdx.shuf.2.0, %rdx.shuf.2.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %r = extractelement <8 x float> %bin.rdx9, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r = extractelement <8 x float> %bin.rdx9, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; SSE42-LABEL: 'pairwise_reduction8float' @@ -880,6 +997,19 @@ define fastcc float @pairwise_reduction8float(<8 x float> %rdx, float %f1) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx9 = fadd <8 x float> %rdx.shuf.2.0, %rdx.shuf.2.1 ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r = extractelement <8 x float> %bin.rdx9, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r +; +; SLM-LABEL: 'pairwise_reduction8float' +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.0.0 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.0.1 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <8 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.0 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.1 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <8 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2.1 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx9 = fadd <8 x float> %rdx.shuf.2.0, %rdx.shuf.2.1 +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r = extractelement <8 x float> %bin.rdx9, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; %rdx.shuf.0.0 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> %rdx.shuf.0.1 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> @@ -896,26 +1026,12 @@ define fastcc float @pairwise_reduction8float(<8 x float> %rdx, float %f1) { } define fastcc i64 @pairwise_reduction2i64(<2 x i64> %rdx, i64 %f1) { -; SSE2-LABEL: 'pairwise_reduction2i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <2 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <2 x i64> %bin.rdx8, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r -; -; SSSE3-LABEL: 'pairwise_reduction2i64' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <2 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <2 x i64> %bin.rdx8, i32 0 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r -; -; SSE42-LABEL: 'pairwise_reduction2i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <2 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = extractelement <2 x i64> %bin.rdx8, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r +; SSE-LABEL: 'pairwise_reduction2i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <2 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = extractelement <2 x i64> %bin.rdx8, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r ; ; AVX-LABEL: 'pairwise_reduction2i64' ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> @@ -923,6 +1039,13 @@ define fastcc i64 @pairwise_reduction2i64(<2 x i64> %rdx, i64 %f1) { ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <2 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <2 x i64> %bin.rdx8, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r +; +; SLM-LABEL: 'pairwise_reduction2i64' +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = add <2 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1 +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <2 x i64> %bin.rdx8, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r ; %rdx.shuf.1.0 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> %rdx.shuf.1.1 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> @@ -933,45 +1056,25 @@ define fastcc i64 @pairwise_reduction2i64(<2 x i64> %rdx, i64 %f1) { } define fastcc i32 @pairwise_reduction4i32(<4 x i32> %rdx, i32 %f1) { -; SSE2-LABEL: 'pairwise_reduction4i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <4 x i32> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <4 x i32> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r = extractelement <4 x i32> %bin.rdx8, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r -; -; SSSE3-LABEL: 'pairwise_reduction4i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <4 x i32> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <4 x i32> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r = extractelement <4 x i32> %bin.rdx8, i32 0 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r -; -; SSE42-LABEL: 'pairwise_reduction4i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <4 x i32> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <4 x i32> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <4 x i32> %bin.rdx8, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r -; -; AVX-LABEL: 'pairwise_reduction4i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <4 x i32> %rdx.shuf.0.0, %rdx.shuf.0.1 -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <4 x i32> %rdx.shuf.1.0, %rdx.shuf.1.1 -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <4 x i32> %bin.rdx8, i32 0 -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r +; CHECK-LABEL: 'pairwise_reduction4i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <4 x i32> %rdx.shuf.0.0, %rdx.shuf.0.1 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <4 x i32> %rdx.shuf.1.0, %rdx.shuf.1.1 +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <4 x i32> %bin.rdx8, i32 0 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r +; +; SLM-LABEL: 'pairwise_reduction4i32' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <4 x i32> %rdx.shuf.0.0, %rdx.shuf.0.1 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <4 x i32> %rdx.shuf.1.0, %rdx.shuf.1.1 +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <4 x i32> %bin.rdx8, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r ; %rdx.shuf.0.0 = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> %rdx.shuf.0.1 = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> @@ -1014,6 +1117,16 @@ define fastcc i64 @pairwise_reduction4i64(<4 x i64> %rdx, i64 %f1) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <4 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1 ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <4 x i64> %bin.rdx8, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r +; +; SLM-LABEL: 'pairwise_reduction4i64' +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.0 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.1 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %bin.rdx = add <4 x i64> %rdx.shuf.0.0, %rdx.shuf.0.1 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %bin.rdx8 = add <4 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1 +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r = extractelement <4 x i64> %bin.rdx8, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r ; %rdx.shuf.0.0 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> %rdx.shuf.0.1 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> @@ -1037,7 +1150,7 @@ define fastcc i16 @pairwise_reduction8i16(<8 x i16> %rdx, i16 %f1) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx9 = add <8 x i16> %rdx.shuf.2.0, %rdx.shuf.2.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %r = extractelement <8 x i16> %bin.rdx9, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <8 x i16> %bin.rdx9, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r ; ; SSSE3-LABEL: 'pairwise_reduction8i16' @@ -1050,7 +1163,7 @@ define fastcc i16 @pairwise_reduction8i16(<8 x i16> %rdx, i16 %f1) { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx9 = add <8 x i16> %rdx.shuf.2.0, %rdx.shuf.2.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %r = extractelement <8 x i16> %bin.rdx9, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <8 x i16> %bin.rdx9, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r ; ; SSE42-LABEL: 'pairwise_reduction8i16' @@ -1078,6 +1191,19 @@ define fastcc i16 @pairwise_reduction8i16(<8 x i16> %rdx, i16 %f1) { ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx9 = add <8 x i16> %rdx.shuf.2.0, %rdx.shuf.2.1 ; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <8 x i16> %bin.rdx9, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r +; +; SLM-LABEL: 'pairwise_reduction8i16' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <8 x i16> %rdx.shuf.0.0, %rdx.shuf.0.1 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i16> %rdx.shuf.1.0, %rdx.shuf.1.1 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx9 = add <8 x i16> %rdx.shuf.2.0, %rdx.shuf.2.1 +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <8 x i16> %bin.rdx9, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r ; %rdx.shuf.0.0 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> %rdx.shuf.0.1 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> @@ -1094,44 +1220,18 @@ define fastcc i16 @pairwise_reduction8i16(<8 x i16> %rdx, i16 %f1) { } define fastcc i32 @pairwise_reduction8i32(<8 x i32> %rdx, i32 %f1) { -; SSE2-LABEL: 'pairwise_reduction8i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.0.0 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.0.1 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = add <8 x i32> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = add <8 x i32> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx9 = add <8 x i32> %rdx.shuf.2.0, %rdx.shuf.2.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r = extractelement <8 x i32> %bin.rdx9, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r -; -; SSSE3-LABEL: 'pairwise_reduction8i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.0.0 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.0.1 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = add <8 x i32> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = add <8 x i32> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx9 = add <8 x i32> %rdx.shuf.2.0, %rdx.shuf.2.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r = extractelement <8 x i32> %bin.rdx9, i32 0 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r -; -; SSE42-LABEL: 'pairwise_reduction8i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.0.0 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.0.1 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = add <8 x i32> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = add <8 x i32> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx9 = add <8 x i32> %rdx.shuf.2.0, %rdx.shuf.2.1 -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r = extractelement <8 x i32> %bin.rdx9, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r +; SSE-LABEL: 'pairwise_reduction8i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.0.0 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.0.1 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = add <8 x i32> %rdx.shuf.0.0, %rdx.shuf.0.1 +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = add <8 x i32> %rdx.shuf.1.0, %rdx.shuf.1.1 +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx9 = add <8 x i32> %rdx.shuf.2.0, %rdx.shuf.2.1 +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r = extractelement <8 x i32> %bin.rdx9, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r ; ; AVX1-LABEL: 'pairwise_reduction8i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.0.0 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> @@ -1158,6 +1258,19 @@ define fastcc i32 @pairwise_reduction8i32(<8 x i32> %rdx, i32 %f1) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx9 = add <8 x i32> %rdx.shuf.2.0, %rdx.shuf.2.1 ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <8 x i32> %bin.rdx9, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r +; +; SLM-LABEL: 'pairwise_reduction8i32' +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.0.0 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.0.1 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = add <8 x i32> %rdx.shuf.0.0, %rdx.shuf.0.1 +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = add <8 x i32> %rdx.shuf.1.0, %rdx.shuf.1.1 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx9 = add <8 x i32> %rdx.shuf.2.0, %rdx.shuf.2.1 +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r = extractelement <8 x i32> %bin.rdx9, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r ; %rdx.shuf.0.0 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> %rdx.shuf.0.1 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll index f74b30cf1..a794a9a3d 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll @@ -5,11 +5,11 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx | FileCheck %s -check-prefixes=CHECK,AVX,AVX1 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx2 | FileCheck %s -check-prefixes=CHECK,AVX,AVX2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=CHECK,AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW ; -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SSE42,SLM +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE,SSE42,GLM ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=BTVER2 ; @@ -27,6 +27,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Unknown cost for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'test_vXf64' @@ -39,6 +40,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX-NEXT: Cost Model: Unknown cost for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf64' @@ -51,6 +53,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Unknown cost for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'test_vXf64' @@ -63,6 +66,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Unknown cost for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> @@ -74,11 +78,12 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> + %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ret void } -define void @test_vXfi64(<4 x i64> %src256, <8 x i64> %src512) { -; SSE-LABEL: 'test_vXfi64' +define void @test_vXi64(<4 x i64> %src256, <8 x i64> %src512) { +; SSE-LABEL: 'test_vXi64' ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> @@ -90,7 +95,7 @@ define void @test_vXfi64(<4 x i64> %src256, <8 x i64> %src512) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX-LABEL: 'test_vXfi64' +; AVX-LABEL: 'test_vXi64' ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> @@ -102,7 +107,7 @@ define void @test_vXfi64(<4 x i64> %src256, <8 x i64> %src512) { ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX512-LABEL: 'test_vXfi64' +; AVX512-LABEL: 'test_vXi64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> @@ -114,7 +119,7 @@ define void @test_vXfi64(<4 x i64> %src256, <8 x i64> %src512) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; BTVER2-LABEL: 'test_vXfi64' +; BTVER2-LABEL: 'test_vXi64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> @@ -137,3 +142,1733 @@ define void @test_vXfi64(<4 x i64> %src256, <8 x i64> %src512) { %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> ret void } + +define void @test_vXi32(<4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512) { +; SSE-LABEL: 'test_vXi32' +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX-LABEL: 'test_vXi32' +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; BTVER2-LABEL: 'test_vXi32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> undef, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> undef, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> + %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> + %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> + %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> + %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> + %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> + %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> + %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> + %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> + %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> + %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> + %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> + %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> + %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> + %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> + %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> + %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> + %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> + %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> + %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> + %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> undef, <8 x i32> + %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> undef, <8 x i32> + ret void +} + +define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src512) { +; SSE2-LABEL: 'test_vXi16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi16' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX-LABEL: 'test_vXi16' +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SLM-LABEL: 'test_vXi16' +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; GLM-LABEL: 'test_vXi16' +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; BTVER2-LABEL: 'test_vXi16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> + %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> + %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> + %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> + %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> + %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> + %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> + %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> + %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> + %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> + %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> + %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> + %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> + %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> + %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> + %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> + %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> + %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> + %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> + %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> + %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> + %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> + %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> + %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> + %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> + %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> + %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> + %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> + %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> + %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> + ret void +} + +define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) { +; SSE2-LABEL: 'test_vXi8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX-LABEL: 'test_vXi8' +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SLM-LABEL: 'test_vXi8' +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; GLM-LABEL: 'test_vXi8' +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; BTVER2-LABEL: 'test_vXi8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> + %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> + %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> + %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> + %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> + %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> + %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> + %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> + %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> + %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> + %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> + %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> + %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> + %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> + %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> + %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> + %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> + %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> + %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> + %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> + %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> + %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> + %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> + %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> + %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> + %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> + %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> + %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> + %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> + %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> + %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> + %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> + %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> + %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> + %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> + %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> + %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> + %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> + %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> + %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> + %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> + %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-transpose.ll b/llvm/test/Analysis/CostModel/X86/shuffle-transpose.ll index 25a887604..80ea1c980 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-transpose.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-transpose.ll @@ -123,21 +123,21 @@ define void @test_vXf32(<2 x float> %a64, <2 x float> %b64, <4 x float> %a128, < define void @test_vXi32(<2 x i32> %a64, <2 x i32> %b64, <4 x i32> %a128, <4 x i32> %b128, <8 x i32> %a256, <8 x i32> %b256, <16 x i32> %a512, <16 x i32> %b512) { ; SSE-LABEL: 'test_vXi32' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> @@ -151,7 +151,7 @@ define void @test_vXi32(<2 x i32> %a64, <2 x i32> %b64, <4 x i32> %a128, <4 x i3 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'test_vXi32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/sitofp.ll b/llvm/test/Analysis/CostModel/X86/sitofp.ll index c39988a4f..8ac8050da 100644 --- a/llvm/test/Analysis/CostModel/X86/sitofp.ll +++ b/llvm/test/Analysis/CostModel/X86/sitofp.ll @@ -13,9 +13,9 @@ define i32 @sitofp_i8_double() { ; SSE-LABEL: 'sitofp_i8_double' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i8_f64 = sitofp i8 undef to double -; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v2i8_v2f64 = sitofp <2 x i8> undef to <2 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cvt_v4i8_v4f64 = sitofp <4 x i8> undef to <4 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %cvt_v8i8_v8f64 = sitofp <8 x i8> undef to <8 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %cvt_v2i8_v2f64 = sitofp <2 x i8> undef to <2 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %cvt_v4i8_v4f64 = sitofp <4 x i8> undef to <4 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %cvt_v8i8_v8f64 = sitofp <8 x i8> undef to <8 x double> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'sitofp_i8_double' @@ -49,8 +49,8 @@ define i32 @sitofp_i8_double() { define i32 @sitofp_i16_double() { ; SSE-LABEL: 'sitofp_i16_double' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i16_f64 = sitofp i16 undef to double -; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v2i16_v2f64 = sitofp <2 x i16> undef to <2 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cvt_v4i16_v4f64 = sitofp <4 x i16> undef to <4 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %cvt_v2i16_v2f64 = sitofp <2 x i16> undef to <2 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %cvt_v4i16_v4f64 = sitofp <4 x i16> undef to <4 x double> ; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %cvt_v8i16_v8f64 = sitofp <8 x i16> undef to <8 x double> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -86,27 +86,27 @@ define i32 @sitofp_i32_double() { ; SSE-LABEL: 'sitofp_i32_double' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double ; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'sitofp_i32_double' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double> ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'sitofp_i32_double' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> +; AVX512-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'sitofp_i32_double' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -164,8 +164,8 @@ define i32 @sitofp_i64_double() { define i32 @sitofp_i8_float() { ; SSE-LABEL: 'sitofp_i8_float' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i8_f32 = sitofp i8 undef to float -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cvt_v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float> ; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v16i8_v16f32 = sitofp <16 x i8> undef to <16 x float> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -200,7 +200,7 @@ define i32 @sitofp_i8_float() { define i32 @sitofp_i16_float() { ; SSE-LABEL: 'sitofp_i16_float' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i16_f32 = sitofp i16 undef to float -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float> +; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cvt_v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float> ; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cvt_v8i16_v8f32 = sitofp <8 x i16> undef to <8 x float> ; SSE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cvt_v16i16_v16f32 = sitofp <16 x i16> undef to <16 x float> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef diff --git a/llvm/test/Analysis/CostModel/X86/slm-arith-costs.ll b/llvm/test/Analysis/CostModel/X86/slm-arith-costs.ll index ab2eedc67..723cf35aa 100644 --- a/llvm/test/Analysis/CostModel/X86/slm-arith-costs.ll +++ b/llvm/test/Analysis/CostModel/X86/slm-arith-costs.ll @@ -47,11 +47,11 @@ entry: define <2 x i8> @slm-costs_8_v2_mul(<2 x i8> %a, <2 x i8> %b) { ; SLM-LABEL: 'slm-costs_8_v2_mul' -; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res = mul nsw <2 x i8> %a, %b +; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %res = mul nsw <2 x i8> %a, %b ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i8> %res ; ; GLM-LABEL: 'slm-costs_8_v2_mul' -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = mul nsw <2 x i8> %a, %b +; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = mul nsw <2 x i8> %a, %b ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i8> %res ; entry: @@ -61,11 +61,11 @@ entry: define <4 x i8> @slm-costs_8_v4_mul(<4 x i8> %a, <4 x i8> %b) { ; SLM-LABEL: 'slm-costs_8_v4_mul' -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res = mul nsw <4 x i8> %a, %b +; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %res = mul nsw <4 x i8> %a, %b ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i8> %res ; ; GLM-LABEL: 'slm-costs_8_v4_mul' -; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = mul nsw <4 x i8> %a, %b +; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = mul nsw <4 x i8> %a, %b ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i8> %res ; entry: @@ -177,11 +177,11 @@ entry: define <8 x i8> @slm-costs_8_v8_mul(<8 x i8> %a, <8 x i8> %b) { ; SLM-LABEL: 'slm-costs_8_v8_mul' -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = mul nsw <8 x i8> %a, %b +; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %res = mul nsw <8 x i8> %a, %b ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i8> %res ; ; GLM-LABEL: 'slm-costs_8_v8_mul' -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = mul nsw <8 x i8> %a, %b +; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = mul nsw <8 x i8> %a, %b ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i8> %res ; entry: @@ -216,11 +216,11 @@ entry: define <2 x i16> @slm-costs_16_v2_mul(<2 x i16> %a, <2 x i16> %b) { ; SLM-LABEL: 'slm-costs_16_v2_mul' -; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res = mul nsw <2 x i16> %a, %b +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = mul nsw <2 x i16> %a, %b ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i16> %res ; ; GLM-LABEL: 'slm-costs_16_v2_mul' -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = mul nsw <2 x i16> %a, %b +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = mul nsw <2 x i16> %a, %b ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i16> %res ; entry: @@ -230,11 +230,11 @@ entry: define <4 x i16> @slm-costs_16_v4_mul(<4 x i16> %a, <4 x i16> %b) { ; SLM-LABEL: 'slm-costs_16_v4_mul' -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %res = mul nsw <4 x i16> %a, %b +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = mul nsw <4 x i16> %a, %b ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i16> %res ; ; GLM-LABEL: 'slm-costs_16_v4_mul' -; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = mul nsw <4 x i16> %a, %b +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = mul nsw <4 x i16> %a, %b ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i16> %res ; entry: @@ -385,11 +385,11 @@ entry: define <2 x i32> @slm-costs_32_v2_mul(<2 x i32> %a, <2 x i32> %b) { ; SLM-LABEL: 'slm-costs_32_v2_mul' -; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res = mul nsw <2 x i32> %a, %b +; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = mul nsw <2 x i32> %a, %b ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res ; ; GLM-LABEL: 'slm-costs_32_v2_mul' -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = mul nsw <2 x i32> %a, %b +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = mul nsw <2 x i32> %a, %b ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res ; entry: diff --git a/llvm/test/Analysis/CostModel/X86/testshiftashr.ll b/llvm/test/Analysis/CostModel/X86/testshiftashr.ll index 7f588a748..270f13a3e 100644 --- a/llvm/test/Analysis/CostModel/X86/testshiftashr.ll +++ b/llvm/test/Analysis/CostModel/X86/testshiftashr.ll @@ -5,9 +5,9 @@ define %shifttype @shift2i16(%shifttype %a, %shifttype %b) { entry: ; SSE2-LABEL: shift2i16 - ; SSE2: cost of 12 {{.*}} ashr + ; SSE2: cost of 32 {{.*}} ashr ; SSE2-CODEGEN-LABEL: shift2i16 - ; SSE2-CODEGEN: psrlq + ; SSE2-CODEGEN: psraw %0 = ashr %shifttype %a , %b ret %shifttype %0 @@ -17,9 +17,9 @@ entry: define %shifttype4i16 @shift4i16(%shifttype4i16 %a, %shifttype4i16 %b) { entry: ; SSE2-LABEL: shift4i16 - ; SSE2: cost of 16 {{.*}} ashr + ; SSE2: cost of 32 {{.*}} ashr ; SSE2-CODEGEN-LABEL: shift4i16 - ; SSE2-CODEGEN: psrad + ; SSE2-CODEGEN: psraw %0 = ashr %shifttype4i16 %a , %b ret %shifttype4i16 %0 @@ -65,9 +65,9 @@ entry: define %shifttype2i32 @shift2i32(%shifttype2i32 %a, %shifttype2i32 %b) { entry: ; SSE2-LABEL: shift2i32 - ; SSE2: cost of 12 {{.*}} ashr + ; SSE2: cost of 16 {{.*}} ashr ; SSE2-CODEGEN-LABEL: shift2i32 - ; SSE2-CODEGEN: psrlq + ; SSE2-CODEGEN: psrad %0 = ashr %shifttype2i32 %a , %b ret %shifttype2i32 %0 @@ -185,9 +185,9 @@ entry: define %shifttype2i8 @shift2i8(%shifttype2i8 %a, %shifttype2i8 %b) { entry: ; SSE2-LABEL: shift2i8 - ; SSE2: cost of 12 {{.*}} ashr + ; SSE2: cost of 54 {{.*}} ashr ; SSE2-CODEGEN-LABEL: shift2i8 - ; SSE2-CODEGEN: psrlq + ; SSE2-CODEGEN: psrlw %0 = ashr %shifttype2i8 %a , %b ret %shifttype2i8 %0 @@ -197,9 +197,9 @@ entry: define %shifttype4i8 @shift4i8(%shifttype4i8 %a, %shifttype4i8 %b) { entry: ; SSE2-LABEL: shift4i8 - ; SSE2: cost of 16 {{.*}} ashr + ; SSE2: cost of 54 {{.*}} ashr ; SSE2-CODEGEN-LABEL: shift4i8 - ; SSE2-CODEGEN: psrad + ; SSE2-CODEGEN: psraw %0 = ashr %shifttype4i8 %a , %b ret %shifttype4i8 %0 @@ -209,7 +209,7 @@ entry: define %shifttype8i8 @shift8i8(%shifttype8i8 %a, %shifttype8i8 %b) { entry: ; SSE2-LABEL: shift8i8 - ; SSE2: cost of 32 {{.*}} ashr + ; SSE2: cost of 54 {{.*}} ashr ; SSE2-CODEGEN-LABEL: shift8i8 ; SSE2-CODEGEN: psraw @@ -247,9 +247,9 @@ entry: define %shifttypec @shift2i16const(%shifttypec %a, %shifttypec %b) { entry: ; SSE2-LABEL: shift2i16const - ; SSE2: cost of 4 {{.*}} ashr + ; SSE2: cost of 1 {{.*}} ashr ; SSE2-CODEGEN-LABEL: shift2i16const - ; SSE2-CODEGEN: psrad $3 + ; SSE2-CODEGEN: psraw $3 %0 = ashr %shifttypec %a , ret %shifttypec %0 @@ -261,7 +261,7 @@ entry: ; SSE2-LABEL: shift4i16const ; SSE2: cost of 1 {{.*}} ashr ; SSE2-CODEGEN-LABEL: shift4i16const - ; SSE2-CODEGEN: psrad $19 + ; SSE2-CODEGEN: psraw $3 %0 = ashr %shifttypec4i16 %a , ret %shifttypec4i16 %0 @@ -320,7 +320,7 @@ entry: define %shifttypec2i32 @shift2i32c(%shifttypec2i32 %a, %shifttypec2i32 %b) { entry: ; SSE2-LABEL: shift2i32c - ; SSE2: cost of 4 {{.*}} ashr + ; SSE2: cost of 1 {{.*}} ashr ; SSE2-CODEGEN-LABEL: shift2i32c ; SSE2-CODEGEN: psrad $3 @@ -464,7 +464,7 @@ entry: ; SSE2-LABEL: shift2i8c ; SSE2: cost of 4 {{.*}} ashr ; SSE2-CODEGEN-LABEL: shift2i8c - ; SSE2-CODEGEN: psrad $3 + ; SSE2-CODEGEN: psrlw $3 %0 = ashr %shifttypec2i8 %a , ret %shifttypec2i8 %0 @@ -474,9 +474,9 @@ entry: define %shifttypec4i8 @shift4i8c(%shifttypec4i8 %a, %shifttypec4i8 %b) { entry: ; SSE2-LABEL: shift4i8c - ; SSE2: cost of 1 {{.*}} ashr + ; SSE2: cost of 4 {{.*}} ashr ; SSE2-CODEGEN-LABEL: shift4i8c - ; SSE2-CODEGEN: psrad $27 + ; SSE2-CODEGEN: psrlw $3 %0 = ashr %shifttypec4i8 %a , ret %shifttypec4i8 %0 @@ -486,9 +486,9 @@ entry: define %shifttypec8i8 @shift8i8c(%shifttypec8i8 %a, %shifttypec8i8 %b) { entry: ; SSE2-LABEL: shift8i8c - ; SSE2: cost of 1 {{.*}} ashr + ; SSE2: cost of 4 {{.*}} ashr ; SSE2-CODEGEN-LABEL: shift8i8c - ; SSE2-CODEGEN: psraw $11 + ; SSE2-CODEGEN: psrlw $3 %0 = ashr %shifttypec8i8 %a , diff --git a/llvm/test/Analysis/CostModel/X86/testshiftlshr.ll b/llvm/test/Analysis/CostModel/X86/testshiftlshr.ll index 3e30614e1..d60f6895c 100644 --- a/llvm/test/Analysis/CostModel/X86/testshiftlshr.ll +++ b/llvm/test/Analysis/CostModel/X86/testshiftlshr.ll @@ -5,9 +5,9 @@ define %shifttype @shift2i16(%shifttype %a, %shifttype %b) { entry: ; SSE2-LABEL: shift2i16 - ; SSE2: cost of 4 {{.*}} lshr + ; SSE2: cost of 32 {{.*}} lshr ; SSE2-CODEGEN-LABEL: shift2i16 - ; SSE2-CODEGEN: psrlq + ; SSE2-CODEGEN: psrlw %0 = lshr %shifttype %a , %b ret %shifttype %0 @@ -17,9 +17,9 @@ entry: define %shifttype4i16 @shift4i16(%shifttype4i16 %a, %shifttype4i16 %b) { entry: ; SSE2-LABEL: shift4i16 - ; SSE2: cost of 16 {{.*}} lshr + ; SSE2: cost of 32 {{.*}} lshr ; SSE2-CODEGEN-LABEL: shift4i16 - ; SSE2-CODEGEN: psrld + ; SSE2-CODEGEN: psrlw %0 = lshr %shifttype4i16 %a , %b ret %shifttype4i16 %0 @@ -65,9 +65,9 @@ entry: define %shifttype2i32 @shift2i32(%shifttype2i32 %a, %shifttype2i32 %b) { entry: ; SSE2-LABEL: shift2i32 - ; SSE2: cost of 4 {{.*}} lshr + ; SSE2: cost of 16 {{.*}} lshr ; SSE2-CODEGEN-LABEL: shift2i32 - ; SSE2-CODEGEN: psrlq + ; SSE2-CODEGEN: psrld %0 = lshr %shifttype2i32 %a , %b ret %shifttype2i32 %0 @@ -185,9 +185,9 @@ entry: define %shifttype2i8 @shift2i8(%shifttype2i8 %a, %shifttype2i8 %b) { entry: ; SSE2-LABEL: shift2i8 - ; SSE2: cost of 4 {{.*}} lshr + ; SSE2: cost of 26 {{.*}} lshr ; SSE2-CODEGEN-LABEL: shift2i8 - ; SSE2-CODEGEN: psrlq + ; SSE2-CODEGEN: psrlw %0 = lshr %shifttype2i8 %a , %b ret %shifttype2i8 %0 @@ -197,9 +197,9 @@ entry: define %shifttype4i8 @shift4i8(%shifttype4i8 %a, %shifttype4i8 %b) { entry: ; SSE2-LABEL: shift4i8 - ; SSE2: cost of 16 {{.*}} lshr + ; SSE2: cost of 26 {{.*}} lshr ; SSE2-CODEGEN-LABEL: shift4i8 - ; SSE2-CODEGEN: psrld + ; SSE2-CODEGEN: psrlw %0 = lshr %shifttype4i8 %a , %b ret %shifttype4i8 %0 @@ -209,7 +209,7 @@ entry: define %shifttype8i8 @shift8i8(%shifttype8i8 %a, %shifttype8i8 %b) { entry: ; SSE2-LABEL: shift8i8 - ; SSE2: cost of 32 {{.*}} lshr + ; SSE2: cost of 26 {{.*}} lshr ; SSE2-CODEGEN-LABEL: shift8i8 ; SSE2-CODEGEN: psrlw @@ -249,7 +249,7 @@ entry: ; SSE2-LABEL: shift2i16const ; SSE2: cost of 1 {{.*}} lshr ; SSE2-CODEGEN-LABEL: shift2i16const - ; SSE2-CODEGEN: psrlq $3 + ; SSE2-CODEGEN: psrlw $3 %0 = lshr %shifttypec %a , ret %shifttypec %0 @@ -261,7 +261,7 @@ entry: ; SSE2-LABEL: shift4i16const ; SSE2: cost of 1 {{.*}} lshr ; SSE2-CODEGEN-LABEL: shift4i16const - ; SSE2-CODEGEN: psrld $3 + ; SSE2-CODEGEN: psrlw $3 %0 = lshr %shifttypec4i16 %a , ret %shifttypec4i16 %0 @@ -322,7 +322,7 @@ entry: ; SSE2-LABEL: shift2i32c ; SSE2: cost of 1 {{.*}} lshr ; SSE2-CODEGEN-LABEL: shift2i32c - ; SSE2-CODEGEN: psrlq $3 + ; SSE2-CODEGEN: psrld $3 %0 = lshr %shifttypec2i32 %a , ret %shifttypec2i32 %0 @@ -461,9 +461,9 @@ entry: define %shifttypec2i8 @shift2i8c(%shifttypec2i8 %a, %shifttypec2i8 %b) { entry: ; SSE2-LABEL: shift2i8c - ; SSE2: cost of 1 {{.*}} lshr + ; SSE2: cost of 2 {{.*}} lshr ; SSE2-CODEGEN-LABEL: shift2i8c - ; SSE2-CODEGEN: psrlq $3 + ; SSE2-CODEGEN: psrlw $3 %0 = lshr %shifttypec2i8 %a , ret %shifttypec2i8 %0 @@ -473,9 +473,9 @@ entry: define %shifttypec4i8 @shift4i8c(%shifttypec4i8 %a, %shifttypec4i8 %b) { entry: ; SSE2-LABEL: shift4i8c - ; SSE2: cost of 1 {{.*}} lshr + ; SSE2: cost of 2 {{.*}} lshr ; SSE2-CODEGEN-LABEL: shift4i8c - ; SSE2-CODEGEN: psrld $3 + ; SSE2-CODEGEN: psrlw $3 %0 = lshr %shifttypec4i8 %a , ret %shifttypec4i8 %0 @@ -485,7 +485,7 @@ entry: define %shifttypec8i8 @shift8i8c(%shifttypec8i8 %a, %shifttypec8i8 %b) { entry: ; SSE2-LABEL: shift8i8c - ; SSE2: cost of 1 {{.*}} lshr + ; SSE2: cost of 2 {{.*}} lshr ; SSE2-CODEGEN-LABEL: shift8i8c ; SSE2-CODEGEN: psrlw $3 diff --git a/llvm/test/Analysis/CostModel/X86/testshiftshl.ll b/llvm/test/Analysis/CostModel/X86/testshiftshl.ll index 7db82b9fa..359ef1d5b 100644 --- a/llvm/test/Analysis/CostModel/X86/testshiftshl.ll +++ b/llvm/test/Analysis/CostModel/X86/testshiftshl.ll @@ -5,9 +5,9 @@ define %shifttype @shift2i16(%shifttype %a, %shifttype %b) { entry: ; SSE2-LABEL: shift2i16 - ; SSE2: cost of 4 {{.*}} shl + ; SSE2: cost of 32 {{.*}} shl ; SSE2-CODEGEN-LABEL: shift2i16 - ; SSE2-CODEGEN: psllq + ; SSE2-CODEGEN: pmullw %0 = shl %shifttype %a , %b ret %shifttype %0 @@ -17,9 +17,9 @@ entry: define %shifttype4i16 @shift4i16(%shifttype4i16 %a, %shifttype4i16 %b) { entry: ; SSE2-LABEL: shift4i16 - ; SSE2: cost of 10 {{.*}} shl + ; SSE2: cost of 32 {{.*}} shl ; SSE2-CODEGEN-LABEL: shift4i16 - ; SSE2-CODEGEN: pmuludq + ; SSE2-CODEGEN: pmullw %0 = shl %shifttype4i16 %a , %b ret %shifttype4i16 %0 @@ -65,9 +65,9 @@ entry: define %shifttype2i32 @shift2i32(%shifttype2i32 %a, %shifttype2i32 %b) { entry: ; SSE2-LABEL: shift2i32 - ; SSE2: cost of 4 {{.*}} shl + ; SSE2: cost of 10 {{.*}} shl ; SSE2-CODEGEN-LABEL: shift2i32 - ; SSE2-CODEGEN: psllq + ; SSE2-CODEGEN: pmuludq %0 = shl %shifttype2i32 %a , %b ret %shifttype2i32 %0 @@ -185,9 +185,9 @@ entry: define %shifttype2i8 @shift2i8(%shifttype2i8 %a, %shifttype2i8 %b) { entry: ; SSE2-LABEL: shift2i8 - ; SSE2: cost of 4 {{.*}} shl + ; SSE2: cost of 26 {{.*}} shl ; SSE2-CODEGEN-LABEL: shift2i8 - ; SSE2-CODEGEN: psllq + ; SSE2-CODEGEN: psllw %0 = shl %shifttype2i8 %a , %b ret %shifttype2i8 %0 @@ -197,9 +197,9 @@ entry: define %shifttype4i8 @shift4i8(%shifttype4i8 %a, %shifttype4i8 %b) { entry: ; SSE2-LABEL: shift4i8 - ; SSE2: cost of 10 {{.*}} shl + ; SSE2: cost of 26 {{.*}} shl ; SSE2-CODEGEN-LABEL: shift4i8 - ; SSE2-CODEGEN: pmuludq + ; SSE2-CODEGEN: psllw %0 = shl %shifttype4i8 %a , %b ret %shifttype4i8 %0 @@ -209,9 +209,9 @@ entry: define %shifttype8i8 @shift8i8(%shifttype8i8 %a, %shifttype8i8 %b) { entry: ; SSE2-LABEL: shift8i8 - ; SSE2: cost of 32 {{.*}} shl + ; SSE2: cost of 26 {{.*}} shl ; SSE2-CODEGEN-LABEL: shift8i8 - ; SSE2-CODEGEN: pmullw + ; SSE2-CODEGEN: psllw %0 = shl %shifttype8i8 %a , %b ret %shifttype8i8 %0 @@ -249,7 +249,7 @@ entry: ; SSE2-LABEL: shift2i16const ; SSE2: cost of 1 {{.*}} shl ; SSE2-CODEGEN-LABEL: shift2i16const - ; SSE2-CODEGEN: psllq $3 + ; SSE2-CODEGEN: psllw $3 %0 = shl %shifttypec %a , ret %shifttypec %0 @@ -261,7 +261,7 @@ entry: ; SSE2-LABEL: shift4i16const ; SSE2: cost of 1 {{.*}} shl ; SSE2-CODEGEN-LABEL: shift4i16const - ; SSE2-CODEGEN: pslld $3 + ; SSE2-CODEGEN: psllw $3 %0 = shl %shifttypec4i16 %a , ret %shifttypec4i16 %0 @@ -322,7 +322,7 @@ entry: ; SSE2-LABEL: shift2i32c ; SSE2: cost of 1 {{.*}} shl ; SSE2-CODEGEN-LABEL: shift2i32c - ; SSE2-CODEGEN: psllq $3 + ; SSE2-CODEGEN: pslld $3 %0 = shl %shifttypec2i32 %a , ret %shifttypec2i32 %0 @@ -461,9 +461,9 @@ entry: define %shifttypec2i8 @shift2i8c(%shifttypec2i8 %a, %shifttypec2i8 %b) { entry: ; SSE2-LABEL: shift2i8c - ; SSE2: cost of 1 {{.*}} shl + ; SSE2: cost of 2 {{.*}} shl ; SSE2-CODEGEN-LABEL: shift2i8c - ; SSE2-CODEGEN: psllq $3 + ; SSE2-CODEGEN: psllw $3 %0 = shl %shifttypec2i8 %a , ret %shifttypec2i8 %0 @@ -473,9 +473,9 @@ entry: define %shifttypec4i8 @shift4i8c(%shifttypec4i8 %a, %shifttypec4i8 %b) { entry: ; SSE2-LABEL: shift4i8c - ; SSE2: cost of 1 {{.*}} shl + ; SSE2: cost of 2 {{.*}} shl ; SSE2-CODEGEN-LABEL: shift4i8c - ; SSE2-CODEGEN: pslld $3 + ; SSE2-CODEGEN: psllw $3 %0 = shl %shifttypec4i8 %a , ret %shifttypec4i8 %0 @@ -485,7 +485,7 @@ entry: define %shifttypec8i8 @shift8i8c(%shifttypec8i8 %a, %shifttypec8i8 %b) { entry: ; SSE2-LABEL: shift8i8c - ; SSE2: cost of 1 {{.*}} shl + ; SSE2: cost of 2 {{.*}} shl ; SSE2-CODEGEN-LABEL: shift8i8c ; SSE2-CODEGEN: psllw $3 diff --git a/llvm/test/Analysis/CostModel/X86/trunc.ll b/llvm/test/Analysis/CostModel/X86/trunc.ll index 9668da564..37d18c3e3 100644 --- a/llvm/test/Analysis/CostModel/X86/trunc.ll +++ b/llvm/test/Analysis/CostModel/X86/trunc.ll @@ -13,35 +13,35 @@ define i32 @trunc_vXi32() { ; SSE-LABEL: 'trunc_vXi32' -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'trunc_vXi32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'trunc_vXi32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'trunc_vXi32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'trunc_vXi32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i32> @@ -56,12 +56,12 @@ define i32 @trunc_vXi32() { define i32 @trunc_vXi16() { ; SSE2-LABEL: 'trunc_vXi16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16> -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i16> -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16> +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i16> +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> @@ -69,12 +69,12 @@ define i32 @trunc_vXi16() { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'trunc_vXi16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i16> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i16> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i16> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> @@ -82,12 +82,12 @@ define i32 @trunc_vXi16() { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'trunc_vXi16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i16> -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> -; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16> +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i16> +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> +; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> +; SSE42-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i16> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> @@ -95,12 +95,12 @@ define i32 @trunc_vXi16() { ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'trunc_vXi16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> @@ -108,12 +108,12 @@ define i32 @trunc_vXi16() { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'trunc_vXi16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> @@ -121,12 +121,12 @@ define i32 @trunc_vXi16() { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'trunc_vXi16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i16> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i16> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> @@ -134,12 +134,12 @@ define i32 @trunc_vXi16() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'trunc_vXi16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i16> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i16> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> @@ -147,12 +147,12 @@ define i32 @trunc_vXi16() { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'trunc_vXi16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> @@ -175,19 +175,19 @@ define i32 @trunc_vXi16() { define i32 @trunc_vXi8() { ; SSE2-LABEL: 'trunc_vXi8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 159 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> @@ -196,19 +196,19 @@ define i32 @trunc_vXi8() { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'trunc_vXi8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i8> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i8> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i8> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i8> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 159 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i8> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i8> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i8> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> @@ -217,19 +217,19 @@ define i32 @trunc_vXi8() { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'trunc_vXi8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> @@ -238,19 +238,19 @@ define i32 @trunc_vXi8() { ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'trunc_vXi8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> @@ -259,19 +259,19 @@ define i32 @trunc_vXi8() { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'trunc_vXi8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> @@ -280,19 +280,19 @@ define i32 @trunc_vXi8() { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'trunc_vXi8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i8> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i8> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i8> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i8> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i8> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i8> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> @@ -301,19 +301,19 @@ define i32 @trunc_vXi8() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'trunc_vXi8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i8> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i8> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i8> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i8> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i8> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i8> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> @@ -322,19 +322,19 @@ define i32 @trunc_vXi8() { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'trunc_vXi8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> diff --git a/llvm/test/Analysis/CostModel/X86/uitofp.ll b/llvm/test/Analysis/CostModel/X86/uitofp.ll index c76ac14a6..3379bcde7 100644 --- a/llvm/test/Analysis/CostModel/X86/uitofp.ll +++ b/llvm/test/Analysis/CostModel/X86/uitofp.ll @@ -13,9 +13,9 @@ define i32 @uitofp_i8_double() { ; SSE-LABEL: 'uitofp_i8_double' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i8_f64 = uitofp i8 undef to double -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_v2i8_v2f64 = uitofp <2 x i8> undef to <2 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cvt_v4i8_v4f64 = uitofp <4 x i8> undef to <4 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %cvt_v8i8_v8f64 = uitofp <8 x i8> undef to <8 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %cvt_v2i8_v2f64 = uitofp <2 x i8> undef to <2 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %cvt_v4i8_v4f64 = uitofp <4 x i8> undef to <4 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %cvt_v8i8_v8f64 = uitofp <8 x i8> undef to <8 x double> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'uitofp_i8_double' @@ -49,8 +49,8 @@ define i32 @uitofp_i8_double() { define i32 @uitofp_i16_double() { ; SSE-LABEL: 'uitofp_i16_double' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i16_f64 = uitofp i16 undef to double -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_v2i16_v2f64 = uitofp <2 x i16> undef to <2 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cvt_v4i16_v4f64 = uitofp <4 x i16> undef to <4 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %cvt_v2i16_v2f64 = uitofp <2 x i16> undef to <2 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %cvt_v4i16_v4f64 = uitofp <4 x i16> undef to <4 x double> ; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %cvt_v8i16_v8f64 = uitofp <8 x i16> undef to <8 x double> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -85,7 +85,7 @@ define i32 @uitofp_i16_double() { define i32 @uitofp_i32_double() { ; SSE-LABEL: 'uitofp_i32_double' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = uitofp i32 undef to double -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_v2i32_v2f64 = uitofp <2 x i32> undef to <2 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cvt_v2i32_v2f64 = uitofp <2 x i32> undef to <2 x double> ; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cvt_v4i32_v4f64 = uitofp <4 x i32> undef to <4 x double> ; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %cvt_v8i32_v8f64 = uitofp <8 x i32> undef to <8 x double> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -165,7 +165,7 @@ define i32 @uitofp_i8_float() { ; SSE-LABEL: 'uitofp_i8_float' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i8_f32 = uitofp i8 undef to float ; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v4i8_v4f32 = uitofp <4 x i8> undef to <4 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cvt_v8i8_v8f32 = uitofp <8 x i8> undef to <8 x float> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i8_v8f32 = uitofp <8 x i8> undef to <8 x float> ; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v16i8_v16f32 = uitofp <16 x i8> undef to <16 x float> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -200,7 +200,7 @@ define i32 @uitofp_i8_float() { define i32 @uitofp_i16_float() { ; SSE-LABEL: 'uitofp_i16_float' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i16_f32 = uitofp i16 undef to float -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v4i16_v4f32 = uitofp <4 x i16> undef to <4 x float> +; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cvt_v4i16_v4f32 = uitofp <4 x i16> undef to <4 x float> ; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cvt_v8i16_v8f32 = uitofp <8 x i16> undef to <8 x float> ; SSE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cvt_v16i16_v16f32 = uitofp <16 x i16> undef to <16 x float> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -278,7 +278,7 @@ define i32 @uitofp_i32_float() { define i32 @uitofp_i64_float() { ; SSE-LABEL: 'uitofp_i64_float' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f32 = uitofp i64 undef to float +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_i64_f32 = uitofp i64 undef to float ; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cvt_v2i64_v2f32 = uitofp <2 x i64> undef to <2 x float> ; SSE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cvt_v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float> ; SSE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %cvt_v8i64_v8f32 = uitofp <8 x i64> undef to <8 x float> @@ -286,11 +286,11 @@ define i32 @uitofp_i64_float() { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'uitofp_i64_float' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f32 = uitofp i64 undef to float -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i64_v2f32 = uitofp <2 x i64> undef to <2 x float> -; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float> -; AVX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cvt_v8i64_v8f32 = uitofp <8 x i64> undef to <8 x float> -; AVX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %cvt_v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_i64_f32 = uitofp i64 undef to float +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v2i64_v2f32 = uitofp <2 x i64> undef to <2 x float> +; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cvt_v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float> +; AVX-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %cvt_v8i64_v8f32 = uitofp <8 x i64> undef to <8 x float> +; AVX-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %cvt_v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'uitofp_i64_float' @@ -310,11 +310,11 @@ define i32 @uitofp_i64_float() { ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'uitofp_i64_float' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f32 = uitofp i64 undef to float -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i64_v2f32 = uitofp <2 x i64> undef to <2 x float> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cvt_v8i64_v8f32 = uitofp <8 x i64> undef to <8 x float> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %cvt_v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_i64_f32 = uitofp i64 undef to float +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v2i64_v2f32 = uitofp <2 x i64> undef to <2 x float> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cvt_v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %cvt_v8i64_v8f32 = uitofp <8 x i64> undef to <8 x float> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %cvt_v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %cvt_i64_f32 = uitofp i64 undef to float diff --git a/llvm/test/Analysis/CostModel/X86/vector-extract.ll b/llvm/test/Analysis/CostModel/X86/vector-extract.ll index c8a90aa72..974bcc634 100644 --- a/llvm/test/Analysis/CostModel/X86/vector-extract.ll +++ b/llvm/test/Analysis/CostModel/X86/vector-extract.ll @@ -9,16 +9,19 @@ ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW ; -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SSE42,SLM +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE,SSE42,GLM ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=BTVER2 define i32 @extract_double(i32 %arg) { ; SSE-LABEL: 'extract_double' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_a = extractelement <2 x double> undef, i32 %arg ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = extractelement <2 x double> undef, i32 0 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_1 = extractelement <2 x double> undef, i32 1 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_a = extractelement <4 x double> undef, i32 %arg ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64_0 = extractelement <4 x double> undef, i32 0 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_3 = extractelement <4 x double> undef, i32 3 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_a = extractelement <8 x double> undef, i32 %arg ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_0 = extractelement <8 x double> undef, i32 0 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_3 = extractelement <8 x double> undef, i32 3 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_4 = extractelement <8 x double> undef, i32 4 @@ -26,10 +29,13 @@ define i32 @extract_double(i32 %arg) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'extract_double' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_a = extractelement <2 x double> undef, i32 %arg ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = extractelement <2 x double> undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_1 = extractelement <2 x double> undef, i32 1 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_a = extractelement <4 x double> undef, i32 %arg ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64_0 = extractelement <4 x double> undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_3 = extractelement <4 x double> undef, i32 3 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_a = extractelement <8 x double> undef, i32 %arg ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_0 = extractelement <8 x double> undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_3 = extractelement <8 x double> undef, i32 3 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_4 = extractelement <8 x double> undef, i32 4 @@ -37,10 +43,13 @@ define i32 @extract_double(i32 %arg) { ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'extract_double' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_a = extractelement <2 x double> undef, i32 %arg ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = extractelement <2 x double> undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_1 = extractelement <2 x double> undef, i32 1 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_a = extractelement <4 x double> undef, i32 %arg ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64_0 = extractelement <4 x double> undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_3 = extractelement <4 x double> undef, i32 3 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_a = extractelement <8 x double> undef, i32 %arg ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_0 = extractelement <8 x double> undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_3 = extractelement <8 x double> undef, i32 3 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_4 = extractelement <8 x double> undef, i32 4 @@ -48,22 +57,28 @@ define i32 @extract_double(i32 %arg) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'extract_double' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_a = extractelement <2 x double> undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = extractelement <2 x double> undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_1 = extractelement <2 x double> undef, i32 1 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_a = extractelement <4 x double> undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64_0 = extractelement <4 x double> undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_3 = extractelement <4 x double> undef, i32 3 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_a = extractelement <8 x double> undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_0 = extractelement <8 x double> undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_3 = extractelement <8 x double> undef, i32 3 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_4 = extractelement <8 x double> undef, i32 4 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_7 = extractelement <8 x double> undef, i32 7 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; + %v2f64_a = extractelement <2 x double> undef, i32 %arg %v2f64_0 = extractelement <2 x double> undef, i32 0 %v2f64_1 = extractelement <2 x double> undef, i32 1 + %v4f64_a = extractelement <4 x double> undef, i32 %arg %v4f64_0 = extractelement <4 x double> undef, i32 0 %v4f64_3 = extractelement <4 x double> undef, i32 3 + %v8f64_a = extractelement <8 x double> undef, i32 %arg %v8f64_0 = extractelement <8 x double> undef, i32 0 %v8f64_3 = extractelement <8 x double> undef, i32 3 %v8f64_4 = extractelement <8 x double> undef, i32 4 @@ -74,14 +89,18 @@ define i32 @extract_double(i32 %arg) { define i32 @extract_float(i32 %arg) { ; SSE-LABEL: 'extract_float' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = extractelement <2 x float> undef, i32 %arg ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = extractelement <2 x float> undef, i32 0 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = extractelement <2 x float> undef, i32 1 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = extractelement <4 x float> undef, i32 %arg ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = extractelement <4 x float> undef, i32 0 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = extractelement <4 x float> undef, i32 3 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = extractelement <8 x float> undef, i32 %arg ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = extractelement <8 x float> undef, i32 0 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = extractelement <8 x float> undef, i32 3 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_4 = extractelement <8 x float> undef, i32 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_7 = extractelement <8 x float> undef, i32 7 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = extractelement <16 x float> undef, i32 %arg ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = extractelement <16 x float> undef, i32 0 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = extractelement <16 x float> undef, i32 3 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = extractelement <16 x float> undef, i32 8 @@ -89,14 +108,18 @@ define i32 @extract_float(i32 %arg) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'extract_float' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = extractelement <2 x float> undef, i32 %arg ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = extractelement <2 x float> undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = extractelement <2 x float> undef, i32 1 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = extractelement <4 x float> undef, i32 %arg ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = extractelement <4 x float> undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = extractelement <4 x float> undef, i32 3 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = extractelement <8 x float> undef, i32 %arg ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = extractelement <8 x float> undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = extractelement <8 x float> undef, i32 3 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_4 = extractelement <8 x float> undef, i32 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_7 = extractelement <8 x float> undef, i32 7 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = extractelement <16 x float> undef, i32 %arg ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = extractelement <16 x float> undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = extractelement <16 x float> undef, i32 3 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = extractelement <16 x float> undef, i32 8 @@ -104,14 +127,18 @@ define i32 @extract_float(i32 %arg) { ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'extract_float' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = extractelement <2 x float> undef, i32 %arg ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = extractelement <2 x float> undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = extractelement <2 x float> undef, i32 1 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = extractelement <4 x float> undef, i32 %arg ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = extractelement <4 x float> undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = extractelement <4 x float> undef, i32 3 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = extractelement <8 x float> undef, i32 %arg ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = extractelement <8 x float> undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = extractelement <8 x float> undef, i32 3 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_4 = extractelement <8 x float> undef, i32 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_7 = extractelement <8 x float> undef, i32 7 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = extractelement <16 x float> undef, i32 %arg ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = extractelement <16 x float> undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = extractelement <16 x float> undef, i32 3 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_8 = extractelement <16 x float> undef, i32 8 @@ -119,31 +146,39 @@ define i32 @extract_float(i32 %arg) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'extract_float' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = extractelement <2 x float> undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = extractelement <2 x float> undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = extractelement <2 x float> undef, i32 1 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = extractelement <4 x float> undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = extractelement <4 x float> undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = extractelement <4 x float> undef, i32 3 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = extractelement <8 x float> undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = extractelement <8 x float> undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = extractelement <8 x float> undef, i32 3 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_4 = extractelement <8 x float> undef, i32 4 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_7 = extractelement <8 x float> undef, i32 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = extractelement <16 x float> undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = extractelement <16 x float> undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = extractelement <16 x float> undef, i32 3 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = extractelement <16 x float> undef, i32 8 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_15 = extractelement <16 x float> undef, i32 15 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; + %v2f32_a = extractelement <2 x float> undef, i32 %arg %v2f32_0 = extractelement <2 x float> undef, i32 0 %v2f32_1 = extractelement <2 x float> undef, i32 1 + %v4f32_a = extractelement <4 x float> undef, i32 %arg %v4f32_0 = extractelement <4 x float> undef, i32 0 %v4f32_3 = extractelement <4 x float> undef, i32 3 + %v8f32_a = extractelement <8 x float> undef, i32 %arg %v8f32_0 = extractelement <8 x float> undef, i32 0 %v8f32_3 = extractelement <8 x float> undef, i32 3 %v8f32_4 = extractelement <8 x float> undef, i32 4 %v8f32_7 = extractelement <8 x float> undef, i32 7 + %v16f32_a = extractelement <16 x float> undef, i32 %arg %v16f32_0 = extractelement <16 x float> undef, i32 0 %v16f32_3 = extractelement <16 x float> undef, i32 3 %v16f32_8 = extractelement <16 x float> undef, i32 8 @@ -153,34 +188,141 @@ define i32 @extract_float(i32 %arg) { } define i32 @extract_i64(i32 %arg) { -; CHECK-LABEL: 'extract_i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE2-LABEL: 'extract_i64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSE3-LABEL: 'extract_i64' +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSSE3-LABEL: 'extract_i64' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSE41-LABEL: 'extract_i64' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7 +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX-LABEL: 'extract_i64' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7 +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512-LABEL: 'extract_i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7 +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SLM-LABEL: 'extract_i64' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4 +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; GLM-LABEL: 'extract_i64' +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7 +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'extract_i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; + %v2i64_a = extractelement <2 x i64> undef, i32 %arg %v2i64_0 = extractelement <2 x i64> undef, i32 0 %v2i64_1 = extractelement <2 x i64> undef, i32 1 + %v4i64_a = extractelement <4 x i64> undef, i32 %arg %v4i64_0 = extractelement <4 x i64> undef, i32 0 %v4i64_3 = extractelement <4 x i64> undef, i32 3 + %v8i64_a = extractelement <8 x i64> undef, i32 %arg %v8i64_0 = extractelement <8 x i64> undef, i32 0 %v8i64_3 = extractelement <8 x i64> undef, i32 3 %v8i64_4 = extractelement <8 x i64> undef, i32 4 @@ -190,47 +332,192 @@ define i32 @extract_i64(i32 %arg) { } define i32 @extract_i32(i32 %arg) { -; CHECK-LABEL: 'extract_i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15 -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE2-LABEL: 'extract_i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSE3-LABEL: 'extract_i32' +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSSE3-LABEL: 'extract_i32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSE41-LABEL: 'extract_i32' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15 +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX-LABEL: 'extract_i32' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15 +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512-LABEL: 'extract_i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15 +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SLM-LABEL: 'extract_i32' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4 +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; GLM-LABEL: 'extract_i32' +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15 +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'extract_i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; + %v2i32_a = extractelement <2 x i32> undef, i32 %arg %v2i32_0 = extractelement <2 x i32> undef, i32 0 %v2i32_1 = extractelement <2 x i32> undef, i32 1 + %v4i32_a = extractelement <4 x i32> undef, i32 %arg %v4i32_0 = extractelement <4 x i32> undef, i32 0 %v4i32_3 = extractelement <4 x i32> undef, i32 3 + %v8i32_a = extractelement <8 x i32> undef, i32 %arg %v8i32_0 = extractelement <8 x i32> undef, i32 0 %v8i32_3 = extractelement <8 x i32> undef, i32 3 %v8i32_4 = extractelement <8 x i32> undef, i32 4 %v8i32_7 = extractelement <8 x i32> undef, i32 7 + %v16i32_a = extractelement <16 x i32> undef, i32 %arg %v16i32_0 = extractelement <16 x i32> undef, i32 0 %v16i32_3 = extractelement <16 x i32> undef, i32 3 %v16i32_8 = extractelement <16 x i32> undef, i32 8 @@ -240,29 +527,168 @@ define i32 @extract_i32(i32 %arg) { } define i32 @extract_i16(i32 %arg) { -; CHECK-LABEL: 'extract_i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = extractelement <32 x i16> undef, i32 15 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = extractelement <32 x i16> undef, i32 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = extractelement <32 x i16> undef, i32 24 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = extractelement <32 x i16> undef, i32 31 -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE2-LABEL: 'extract_i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = extractelement <32 x i16> undef, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = extractelement <32 x i16> undef, i32 16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = extractelement <32 x i16> undef, i32 24 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = extractelement <32 x i16> undef, i32 31 +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSE3-LABEL: 'extract_i16' +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = extractelement <32 x i16> undef, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = extractelement <32 x i16> undef, i32 16 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = extractelement <32 x i16> undef, i32 24 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = extractelement <32 x i16> undef, i32 31 +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSSE3-LABEL: 'extract_i16' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = extractelement <32 x i16> undef, i32 15 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = extractelement <32 x i16> undef, i32 16 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = extractelement <32 x i16> undef, i32 24 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = extractelement <32 x i16> undef, i32 31 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSE41-LABEL: 'extract_i16' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = extractelement <32 x i16> undef, i32 15 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = extractelement <32 x i16> undef, i32 16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = extractelement <32 x i16> undef, i32 24 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = extractelement <32 x i16> undef, i32 31 +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX-LABEL: 'extract_i16' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = extractelement <32 x i16> undef, i32 15 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = extractelement <32 x i16> undef, i32 16 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = extractelement <32 x i16> undef, i32 24 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = extractelement <32 x i16> undef, i32 31 +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512-LABEL: 'extract_i16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = extractelement <32 x i16> undef, i32 15 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = extractelement <32 x i16> undef, i32 16 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = extractelement <32 x i16> undef, i32 24 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = extractelement <32 x i16> undef, i32 31 +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SLM-LABEL: 'extract_i16' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8 +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i16_15 = extractelement <32 x i16> undef, i32 15 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = extractelement <32 x i16> undef, i32 16 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = extractelement <32 x i16> undef, i32 24 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i16_31 = extractelement <32 x i16> undef, i32 31 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; GLM-LABEL: 'extract_i16' +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = extractelement <32 x i16> undef, i32 15 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = extractelement <32 x i16> undef, i32 16 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = extractelement <32 x i16> undef, i32 24 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = extractelement <32 x i16> undef, i32 31 +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'extract_i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8 @@ -272,14 +698,17 @@ define i32 @extract_i16(i32 %arg) { ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = extractelement <32 x i16> undef, i32 31 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; + %v8i16_a = extractelement <8 x i16> undef, i32 %arg %v8i16_0 = extractelement <8 x i16> undef, i32 0 %v8i16_7 = extractelement <8 x i16> undef, i32 7 + %v16i16_a = extractelement <16 x i16> undef, i32 %arg %v16i16_0 = extractelement <16 x i16> undef, i32 0 %v16i16_7 = extractelement <16 x i16> undef, i32 7 %v16i16_8 = extractelement <16 x i16> undef, i32 8 %v16i16_15 = extractelement <16 x i16> undef, i32 15 + %v32i16_a = extractelement <32 x i16> undef, i32 %arg %v32i16_0 = extractelement <32 x i16> undef, i32 0 %v32i16_7 = extractelement <32 x i16> undef, i32 7 %v32i16_8 = extractelement <32 x i16> undef, i32 8 @@ -292,37 +721,211 @@ define i32 @extract_i16(i32 %arg) { } define i32 @extract_i8(i32 %arg) { -; CHECK-LABEL: 'extract_i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63 -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE2-LABEL: 'extract_i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63 +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSE3-LABEL: 'extract_i8' +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63 +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSSE3-LABEL: 'extract_i8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSE41-LABEL: 'extract_i8' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63 +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX-LABEL: 'extract_i8' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63 +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512-LABEL: 'extract_i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63 +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SLM-LABEL: 'extract_i8' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8 +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7 +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8 +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15 +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24 +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; GLM-LABEL: 'extract_i8' +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63 +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'extract_i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8 @@ -334,10 +937,12 @@ define i32 @extract_i8(i32 %arg) { ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; + %v16i8_a = extractelement <16 x i8> undef, i32 %arg %v16i8_0 = extractelement <16 x i8> undef, i32 0 %v16i8_8 = extractelement <16 x i8> undef, i32 8 %v16i8_15 = extractelement <16 x i8> undef, i32 15 + %v32i8_a = extractelement <32 x i8> undef, i32 %arg %v32i8_0 = extractelement <32 x i8> undef, i32 0 %v32i8_7 = extractelement <32 x i8> undef, i32 7 %v32i8_8 = extractelement <32 x i8> undef, i32 8 @@ -345,6 +950,7 @@ define i32 @extract_i8(i32 %arg) { %v32i8_24 = extractelement <32 x i8> undef, i32 24 %v32i8_31 = extractelement <32 x i8> undef, i32 31 + %v64i8_a = extractelement <64 x i8> undef, i32 %arg %v64i8_0 = extractelement <64 x i8> undef, i32 0 %v64i8_7 = extractelement <64 x i8> undef, i32 7 %v64i8_8 = extractelement <64 x i8> undef, i32 8 diff --git a/llvm/test/Analysis/CostModel/X86/vector-insert.ll b/llvm/test/Analysis/CostModel/X86/vector-insert.ll index 2df0137d6..68b1af571 100644 --- a/llvm/test/Analysis/CostModel/X86/vector-insert.ll +++ b/llvm/test/Analysis/CostModel/X86/vector-insert.ll @@ -15,10 +15,13 @@ define i32 @insert_double(i32 %arg) { ; SSE-LABEL: 'insert_double' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_a = insertelement <2 x double> undef, double undef, i32 %arg ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = insertelement <2 x double> undef, double undef, i32 0 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_1 = insertelement <2 x double> undef, double undef, i32 1 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_a = insertelement <4 x double> undef, double undef, i32 %arg ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64_0 = insertelement <4 x double> undef, double undef, i32 0 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_3 = insertelement <4 x double> undef, double undef, i32 3 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_a = insertelement <8 x double> undef, double undef, i32 %arg ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_0 = insertelement <8 x double> undef, double undef, i32 0 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_3 = insertelement <8 x double> undef, double undef, i32 3 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_4 = insertelement <8 x double> undef, double undef, i32 4 @@ -26,10 +29,13 @@ define i32 @insert_double(i32 %arg) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'insert_double' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_a = insertelement <2 x double> undef, double undef, i32 %arg ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = insertelement <2 x double> undef, double undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_1 = insertelement <2 x double> undef, double undef, i32 1 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_a = insertelement <4 x double> undef, double undef, i32 %arg ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64_0 = insertelement <4 x double> undef, double undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_3 = insertelement <4 x double> undef, double undef, i32 3 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_a = insertelement <8 x double> undef, double undef, i32 %arg ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_0 = insertelement <8 x double> undef, double undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_3 = insertelement <8 x double> undef, double undef, i32 3 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_4 = insertelement <8 x double> undef, double undef, i32 4 @@ -37,10 +43,13 @@ define i32 @insert_double(i32 %arg) { ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'insert_double' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_a = insertelement <2 x double> undef, double undef, i32 %arg ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = insertelement <2 x double> undef, double undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_1 = insertelement <2 x double> undef, double undef, i32 1 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_a = insertelement <4 x double> undef, double undef, i32 %arg ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64_0 = insertelement <4 x double> undef, double undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_3 = insertelement <4 x double> undef, double undef, i32 3 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_a = insertelement <8 x double> undef, double undef, i32 %arg ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_0 = insertelement <8 x double> undef, double undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_3 = insertelement <8 x double> undef, double undef, i32 3 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_4 = insertelement <8 x double> undef, double undef, i32 4 @@ -48,22 +57,28 @@ define i32 @insert_double(i32 %arg) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'insert_double' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_a = insertelement <2 x double> undef, double undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = insertelement <2 x double> undef, double undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_1 = insertelement <2 x double> undef, double undef, i32 1 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_a = insertelement <4 x double> undef, double undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64_0 = insertelement <4 x double> undef, double undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_3 = insertelement <4 x double> undef, double undef, i32 3 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_a = insertelement <8 x double> undef, double undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_0 = insertelement <8 x double> undef, double undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_3 = insertelement <8 x double> undef, double undef, i32 3 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_4 = insertelement <8 x double> undef, double undef, i32 4 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_7 = insertelement <8 x double> undef, double undef, i32 7 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; + %v2f64_a = insertelement <2 x double> undef, double undef, i32 %arg %v2f64_0 = insertelement <2 x double> undef, double undef, i32 0 %v2f64_1 = insertelement <2 x double> undef, double undef, i32 1 + %v4f64_a = insertelement <4 x double> undef, double undef, i32 %arg %v4f64_0 = insertelement <4 x double> undef, double undef, i32 0 %v4f64_3 = insertelement <4 x double> undef, double undef, i32 3 + %v8f64_a = insertelement <8 x double> undef, double undef, i32 %arg %v8f64_0 = insertelement <8 x double> undef, double undef, i32 0 %v8f64_3 = insertelement <8 x double> undef, double undef, i32 3 %v8f64_4 = insertelement <8 x double> undef, double undef, i32 4 @@ -74,14 +89,18 @@ define i32 @insert_double(i32 %arg) { define i32 @insert_float(i32 %arg) { ; SSE-LABEL: 'insert_float' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = insertelement <2 x float> undef, float undef, i32 %arg ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> undef, float undef, i32 0 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = insertelement <2 x float> undef, float undef, i32 1 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = insertelement <4 x float> undef, float undef, i32 %arg ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> undef, float undef, i32 0 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = insertelement <4 x float> undef, float undef, i32 3 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = insertelement <8 x float> undef, float undef, i32 %arg ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> undef, float undef, i32 0 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = insertelement <8 x float> undef, float undef, i32 3 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_4 = insertelement <8 x float> undef, float undef, i32 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_7 = insertelement <8 x float> undef, float undef, i32 7 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = insertelement <16 x float> undef, float undef, i32 %arg ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> undef, float undef, i32 0 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = insertelement <16 x float> undef, float undef, i32 3 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> undef, float undef, i32 8 @@ -89,14 +108,18 @@ define i32 @insert_float(i32 %arg) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'insert_float' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = insertelement <2 x float> undef, float undef, i32 %arg ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> undef, float undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = insertelement <2 x float> undef, float undef, i32 1 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = insertelement <4 x float> undef, float undef, i32 %arg ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> undef, float undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = insertelement <4 x float> undef, float undef, i32 3 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = insertelement <8 x float> undef, float undef, i32 %arg ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> undef, float undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = insertelement <8 x float> undef, float undef, i32 3 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_4 = insertelement <8 x float> undef, float undef, i32 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_7 = insertelement <8 x float> undef, float undef, i32 7 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = insertelement <16 x float> undef, float undef, i32 %arg ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> undef, float undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = insertelement <16 x float> undef, float undef, i32 3 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> undef, float undef, i32 8 @@ -104,14 +127,18 @@ define i32 @insert_float(i32 %arg) { ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'insert_float' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = insertelement <2 x float> undef, float undef, i32 %arg ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> undef, float undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = insertelement <2 x float> undef, float undef, i32 1 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = insertelement <4 x float> undef, float undef, i32 %arg ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> undef, float undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = insertelement <4 x float> undef, float undef, i32 3 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = insertelement <8 x float> undef, float undef, i32 %arg ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> undef, float undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = insertelement <8 x float> undef, float undef, i32 3 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_4 = insertelement <8 x float> undef, float undef, i32 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_7 = insertelement <8 x float> undef, float undef, i32 7 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = insertelement <16 x float> undef, float undef, i32 %arg ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> undef, float undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = insertelement <16 x float> undef, float undef, i32 3 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_8 = insertelement <16 x float> undef, float undef, i32 8 @@ -119,31 +146,39 @@ define i32 @insert_float(i32 %arg) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'insert_float' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = insertelement <2 x float> undef, float undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> undef, float undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = insertelement <2 x float> undef, float undef, i32 1 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = insertelement <4 x float> undef, float undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> undef, float undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = insertelement <4 x float> undef, float undef, i32 3 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = insertelement <8 x float> undef, float undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> undef, float undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = insertelement <8 x float> undef, float undef, i32 3 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_4 = insertelement <8 x float> undef, float undef, i32 4 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_7 = insertelement <8 x float> undef, float undef, i32 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = insertelement <16 x float> undef, float undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> undef, float undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = insertelement <16 x float> undef, float undef, i32 3 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> undef, float undef, i32 8 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_15 = insertelement <16 x float> undef, float undef, i32 15 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; + %v2f32_a = insertelement <2 x float> undef, float undef, i32 %arg %v2f32_0 = insertelement <2 x float> undef, float undef, i32 0 %v2f32_1 = insertelement <2 x float> undef, float undef, i32 1 + %v4f32_a = insertelement <4 x float> undef, float undef, i32 %arg %v4f32_0 = insertelement <4 x float> undef, float undef, i32 0 %v4f32_3 = insertelement <4 x float> undef, float undef, i32 3 + %v8f32_a = insertelement <8 x float> undef, float undef, i32 %arg %v8f32_0 = insertelement <8 x float> undef, float undef, i32 0 %v8f32_3 = insertelement <8 x float> undef, float undef, i32 3 %v8f32_4 = insertelement <8 x float> undef, float undef, i32 4 %v8f32_7 = insertelement <8 x float> undef, float undef, i32 7 + %v16f32_a = insertelement <16 x float> undef, float undef, i32 %arg %v16f32_0 = insertelement <16 x float> undef, float undef, i32 0 %v16f32_3 = insertelement <16 x float> undef, float undef, i32 3 %v16f32_8 = insertelement <16 x float> undef, float undef, i32 8 @@ -154,10 +189,13 @@ define i32 @insert_float(i32 %arg) { define i32 @insert_i64(i32 %arg) { ; CHECK-LABEL: 'insert_i64' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = insertelement <2 x i64> undef, i64 undef, i32 %arg ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = insertelement <2 x i64> undef, i64 undef, i32 0 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 undef, i32 1 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = insertelement <4 x i64> undef, i64 undef, i32 %arg ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = insertelement <4 x i64> undef, i64 undef, i32 0 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = insertelement <4 x i64> undef, i64 undef, i32 3 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = insertelement <8 x i64> undef, i64 undef, i32 %arg ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = insertelement <8 x i64> undef, i64 undef, i32 0 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = insertelement <8 x i64> undef, i64 undef, i32 3 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = insertelement <8 x i64> undef, i64 undef, i32 4 @@ -165,22 +203,28 @@ define i32 @insert_i64(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'insert_i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = insertelement <2 x i64> undef, i64 undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = insertelement <2 x i64> undef, i64 undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 undef, i32 1 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = insertelement <4 x i64> undef, i64 undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = insertelement <4 x i64> undef, i64 undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = insertelement <4 x i64> undef, i64 undef, i32 3 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = insertelement <8 x i64> undef, i64 undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = insertelement <8 x i64> undef, i64 undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = insertelement <8 x i64> undef, i64 undef, i32 3 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = insertelement <8 x i64> undef, i64 undef, i32 4 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = insertelement <8 x i64> undef, i64 undef, i32 7 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; + %v2i64_a = insertelement <2 x i64> undef, i64 undef, i32 %arg %v2i64_0 = insertelement <2 x i64> undef, i64 undef, i32 0 %v2i64_1 = insertelement <2 x i64> undef, i64 undef, i32 1 + %v4i64_a = insertelement <4 x i64> undef, i64 undef, i32 %arg %v4i64_0 = insertelement <4 x i64> undef, i64 undef, i32 0 %v4i64_3 = insertelement <4 x i64> undef, i64 undef, i32 3 + %v8i64_a = insertelement <8 x i64> undef, i64 undef, i32 %arg %v8i64_0 = insertelement <8 x i64> undef, i64 undef, i32 0 %v8i64_3 = insertelement <8 x i64> undef, i64 undef, i32 3 %v8i64_4 = insertelement <8 x i64> undef, i64 undef, i32 4 @@ -191,14 +235,18 @@ define i32 @insert_i64(i32 %arg) { define i32 @insert_i32(i32 %arg) { ; CHECK-LABEL: 'insert_i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 undef, i32 1 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = insertelement <4 x i32> undef, i32 undef, i32 3 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = insertelement <8 x i32> undef, i32 undef, i32 3 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = insertelement <8 x i32> undef, i32 undef, i32 7 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = insertelement <16 x i32> undef, i32 undef, i32 3 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8 @@ -206,31 +254,39 @@ define i32 @insert_i32(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'insert_i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 undef, i32 1 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = insertelement <4 x i32> undef, i32 undef, i32 3 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = insertelement <8 x i32> undef, i32 undef, i32 3 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = insertelement <8 x i32> undef, i32 undef, i32 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = insertelement <16 x i32> undef, i32 undef, i32 3 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = insertelement <16 x i32> undef, i32 undef, i32 15 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; + %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0 %v2i32_1 = insertelement <2 x i32> undef, i32 undef, i32 1 + %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0 %v4i32_3 = insertelement <4 x i32> undef, i32 undef, i32 3 + %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0 %v8i32_3 = insertelement <8 x i32> undef, i32 undef, i32 3 %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4 %v8i32_7 = insertelement <8 x i32> undef, i32 undef, i32 7 + %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0 %v16i32_3 = insertelement <16 x i32> undef, i32 undef, i32 3 %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8 @@ -241,12 +297,15 @@ define i32 @insert_i32(i32 %arg) { define i32 @insert_i16(i32 %arg) { ; CHECK-LABEL: 'insert_i16' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = insertelement <8 x i16> undef, i16 undef, i32 %arg ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = insertelement <8 x i16> undef, i16 undef, i32 0 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = insertelement <8 x i16> undef, i16 undef, i32 7 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = insertelement <16 x i16> undef, i16 undef, i32 %arg ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = insertelement <16 x i16> undef, i16 undef, i32 0 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = insertelement <16 x i16> undef, i16 undef, i32 7 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = insertelement <16 x i16> undef, i16 undef, i32 8 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = insertelement <16 x i16> undef, i16 undef, i32 15 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = insertelement <32 x i16> undef, i16 undef, i32 %arg ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = insertelement <32 x i16> undef, i16 undef, i32 0 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = insertelement <32 x i16> undef, i16 undef, i32 7 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = insertelement <32 x i16> undef, i16 undef, i32 8 @@ -257,12 +316,15 @@ define i32 @insert_i16(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'insert_i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = insertelement <8 x i16> undef, i16 undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = insertelement <8 x i16> undef, i16 undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = insertelement <8 x i16> undef, i16 undef, i32 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = insertelement <16 x i16> undef, i16 undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = insertelement <16 x i16> undef, i16 undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = insertelement <16 x i16> undef, i16 undef, i32 7 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = insertelement <16 x i16> undef, i16 undef, i32 8 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = insertelement <16 x i16> undef, i16 undef, i32 15 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = insertelement <32 x i16> undef, i16 undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = insertelement <32 x i16> undef, i16 undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = insertelement <32 x i16> undef, i16 undef, i32 7 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = insertelement <32 x i16> undef, i16 undef, i32 8 @@ -272,14 +334,17 @@ define i32 @insert_i16(i32 %arg) { ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = insertelement <32 x i16> undef, i16 undef, i32 31 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; + %v8i16_a = insertelement <8 x i16> undef, i16 undef, i32 %arg %v8i16_0 = insertelement <8 x i16> undef, i16 undef, i32 0 %v8i16_7 = insertelement <8 x i16> undef, i16 undef, i32 7 + %v16i16_a = insertelement <16 x i16> undef, i16 undef, i32 %arg %v16i16_0 = insertelement <16 x i16> undef, i16 undef, i32 0 %v16i16_7 = insertelement <16 x i16> undef, i16 undef, i32 7 %v16i16_8 = insertelement <16 x i16> undef, i16 undef, i32 8 %v16i16_15 = insertelement <16 x i16> undef, i16 undef, i32 15 + %v32i16_a = insertelement <32 x i16> undef, i16 undef, i32 %arg %v32i16_0 = insertelement <32 x i16> undef, i16 undef, i32 0 %v32i16_7 = insertelement <32 x i16> undef, i16 undef, i32 7 %v32i16_8 = insertelement <32 x i16> undef, i16 undef, i32 8 @@ -293,15 +358,18 @@ define i32 @insert_i16(i32 %arg) { define i32 @insert_i8(i32 %arg) { ; CHECK-LABEL: 'insert_i8' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8 @@ -314,15 +382,18 @@ define i32 @insert_i8(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'insert_i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8 @@ -334,10 +405,12 @@ define i32 @insert_i8(i32 %arg) { ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = insertelement <64 x i8> undef, i8 undef, i32 63 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; + %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0 %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8 %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15 + %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0 %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7 %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8 @@ -345,6 +418,7 @@ define i32 @insert_i8(i32 %arg) { %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24 %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31 + %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0 %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7 %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8 diff --git a/llvm/test/Analysis/DDG/basic-a.ll b/llvm/test/Analysis/DDG/basic-a.ll new file mode 100644 index 000000000..cebca668b --- /dev/null +++ b/llvm/test/Analysis/DDG/basic-a.ll @@ -0,0 +1,202 @@ +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s + +; CHECK-LABEL: 'DDG' for loop 'test1.for.body': + +; CHECK: Node Address:[[PI:0x[0-9a-f]*]]:pi-block +; CHECK-NEXT: --- start of nodes in pi-block --- +; CHECK: Node Address:[[N1:0x[0-9a-f]*]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %i.02 = phi i64 [ %inc, %test1.for.body ], [ 0, %test1.for.body.preheader ] +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N2:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N2]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %inc = add i64 %i.02, 1 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N1]] +; CHECK-NEXT: --- end of nodes in pi-block --- +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N3:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N4:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N5:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N5]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %exitcond = icmp ne i64 %inc, %n +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N6:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N6]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: br i1 %exitcond, label %test1.for.body, label %for.end.loopexit +; CHECK-NEXT: Edges:none! + +; CHECK: Node Address:[[N4]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %arrayidx1 = getelementptr inbounds float, float* %a, i64 %i.02 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N7:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N3]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %arrayidx = getelementptr inbounds float, float* %b, i64 %i.02 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N8:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N8]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %0 = load float, float* %arrayidx, align 4 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N9:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N10:0x[0-9a-f]*]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %conv = uitofp i64 %n to float +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N9]] + +; CHECK: Node Address:[[N9]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %add = fadd float %0, %conv +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N7]] + +; CHECK: Node Address:[[N7]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: store float %add, float* %arrayidx1, align 4 +; CHECK-NEXT: Edges:none! + + +;; No memory dependencies. +;; void test1(unsigned long n, float * restrict a, float * restrict b) { +;; for (unsigned long i = 0; i < n; i++) +;; a[i] = b[i] + n; +;; } + +define void @test1(i64 %n, float* noalias %a, float* noalias %b) { +entry: + %exitcond1 = icmp ne i64 0, %n + br i1 %exitcond1, label %test1.for.body, label %for.end + +test1.for.body: ; preds = %entry, %test1.for.body + %i.02 = phi i64 [ %inc, %test1.for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float, float* %b, i64 %i.02 + %0 = load float, float* %arrayidx, align 4 + %conv = uitofp i64 %n to float + %add = fadd float %0, %conv + %arrayidx1 = getelementptr inbounds float, float* %a, i64 %i.02 + store float %add, float* %arrayidx1, align 4 + %inc = add i64 %i.02, 1 + %exitcond = icmp ne i64 %inc, %n + br i1 %exitcond, label %test1.for.body, label %for.end + +for.end: ; preds = %test1.for.body, %entry + ret void +} + + +; CHECK-LABEL: 'DDG' for loop 'test2.for.body': + +; CHECK: Node Address:[[PI:0x[0-9a-f]*]]:pi-block +; CHECK-NEXT: --- start of nodes in pi-block --- +; CHECK: Node Address:[[N1:0x[0-9a-f]*]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %i.02 = phi i64 [ %inc, %test2.for.body ], [ 0, %test2.for.body.preheader ] +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N2:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N2]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %inc = add i64 %i.02, 1 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N1]] +; CHECK-NEXT: --- end of nodes in pi-block --- +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N3:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N4:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N5:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N6:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N6]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %exitcond = icmp ne i64 %inc, %n +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N7:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N7]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: br i1 %exitcond, label %test2.for.body, label %for.end.loopexit +; CHECK-NEXT: Edges:none! + +; CHECK: Node Address:[[N5]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %arrayidx2 = getelementptr inbounds float, float* %a, i64 %i.02 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N8:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N4]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %arrayidx1 = getelementptr inbounds float, float* %a, i64 %i.02 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N9:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N9]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %1 = load float, float* %arrayidx1, align 4 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N20:0x[0-9a-f]*]] +; CHECK-NEXT: [memory] to [[N8]] + +; CHECK: Node Address:[[N3]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %arrayidx = getelementptr inbounds float, float* %b, i64 %i.02 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N10:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N10]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %0 = load float, float* %arrayidx, align 4 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N20]] + +; CHECK: Node Address:[[N20]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %add = fadd float %0, %1 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N8]] + +; CHECK: Node Address:[[N8]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: store float %add, float* %arrayidx2, align 4 +; CHECK-NEXT: Edges:none! + + + +;; Loop-independent memory dependencies. +;; void test2(unsigned long n, float * restrict a, float * restrict b) { +;; for (unsigned long i = 0; i < n; i++) +;; a[i] = b[i] + a[i]; +;; } + +define void @test2(i64 %n, float* noalias %a, float* noalias %b) { +entry: + %exitcond1 = icmp ne i64 0, %n + br i1 %exitcond1, label %test2.for.body, label %for.end + +test2.for.body: ; preds = %entry, %test2.for.body + %i.02 = phi i64 [ %inc, %test2.for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float, float* %b, i64 %i.02 + %0 = load float, float* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds float, float* %a, i64 %i.02 + %1 = load float, float* %arrayidx1, align 4 + %add = fadd float %0, %1 + %arrayidx2 = getelementptr inbounds float, float* %a, i64 %i.02 + store float %add, float* %arrayidx2, align 4 + %inc = add i64 %i.02, 1 + %exitcond = icmp ne i64 %inc, %n + br i1 %exitcond, label %test2.for.body, label %for.end + +for.end: ; preds = %test2.for.body, %entry + ret void +} \ No newline at end of file diff --git a/llvm/test/Analysis/DDG/basic-b.ll b/llvm/test/Analysis/DDG/basic-b.ll new file mode 100644 index 000000000..50803d466 --- /dev/null +++ b/llvm/test/Analysis/DDG/basic-b.ll @@ -0,0 +1,233 @@ +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s + +; CHECK-LABEL: 'DDG' for loop 'test1.for.body': + +; CHECK: Node Address:[[N1:0x[0-9a-f]*]]:pi-block +; CHECK-NEXT:--- start of nodes in pi-block --- +; CHECK: Node Address:[[N2:0x[0-9a-f]*]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %i.02 = phi i64 [ %inc, %test1.for.body ], [ 1, %test1.for.body.preheader ] +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N3:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N3]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %inc = add i64 %i.02, 1 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N2]] +; CHECK-NEXT:--- end of nodes in pi-block --- +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N4:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N5:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N6:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N7:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N7]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %cmp = icmp ult i64 %inc, %sub +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N8:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N8]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: br i1 %cmp, label %test1.for.body, label %for.end.loopexit +; CHECK-NEXT: Edges:none! + +; CHECK: Node Address:[[N6]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %arrayidx3 = getelementptr inbounds float, float* %a, i64 %i.02 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N9:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N5]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %sub1 = add i64 %i.02, -1 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N10:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N10]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %arrayidx2 = getelementptr inbounds float, float* %a, i64 %sub1 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N9]] + +; CHECK: Node Address:[[N4]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %arrayidx = getelementptr inbounds float, float* %b, i64 %i.02 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N11:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N11]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %0 = load float, float* %arrayidx, align 4 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N9]] + +; CHECK: Node Address:[[N9]]:pi-block +; CHECK-NEXT: --- start of nodes in pi-block --- +; CHECK: Node Address:[[N12:0x[0-9a-f]*]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %1 = load float, float* %arrayidx2, align 4 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N13:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N13]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %add = fadd float %0, %1 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N14:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N14]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: store float %add, float* %arrayidx3, align 4 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [memory] to [[N12]] +; CHECK-NEXT:--- end of nodes in pi-block --- +; CHECK-NEXT: Edges:none! + + + +;; Loop-carried dependence requiring edge-reversal to expose a cycle +;; in the graph. +;; void test(unsigned long n, float * restrict a, float * restrict b) { +;; for (unsigned long i = 1; i < n-1; i++) +;; a[i] = b[i] + a[i-1]; +;; } + +define void @test1(i64 %n, float* noalias %a, float* noalias %b) { +entry: + %sub = add i64 %n, -1 + %cmp1 = icmp ult i64 1, %sub + br i1 %cmp1, label %test1.for.body, label %for.end + +test1.for.body: ; preds = %entry, %test1.for.body + %i.02 = phi i64 [ %inc, %test1.for.body ], [ 1, %entry ] + %arrayidx = getelementptr inbounds float, float* %b, i64 %i.02 + %0 = load float, float* %arrayidx, align 4 + %sub1 = add i64 %i.02, -1 + %arrayidx2 = getelementptr inbounds float, float* %a, i64 %sub1 + %1 = load float, float* %arrayidx2, align 4 + %add = fadd float %0, %1 + %arrayidx3 = getelementptr inbounds float, float* %a, i64 %i.02 + store float %add, float* %arrayidx3, align 4 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, %sub + br i1 %cmp, label %test1.for.body, label %for.end + +for.end: ; preds = %test1.for.body, %entry + ret void +} + +; CHECK-LABEL: 'DDG' for loop 'test2.for.body': + +; CHECK: Node Address:[[N1:0x[0-9a-f]*]]:pi-block +; CHECK-NEXT:--- start of nodes in pi-block --- + +; CHECK: Node Address:[[N2:0x[0-9a-f]*]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %i.02 = phi i64 [ %inc, %test2.for.body ], [ 1, %test2.for.body.preheader ] +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N3:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N3]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %inc = add i64 %i.02, 1 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N2]] +; CHECK-NEXT:--- end of nodes in pi-block --- +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N4:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N5:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N6:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N7:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N7]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %cmp = icmp ult i64 %inc, %sub +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N8:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N8]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: br i1 %cmp, label %test2.for.body, label %for.end.loopexit +; CHECK-NEXT: Edges:none! + +; CHECK: Node Address:[[N6]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %arrayidx3 = getelementptr inbounds float, float* %a, i64 %i.02 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N9:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N5]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %add1 = add i64 %i.02, 1 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N10:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N10]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %arrayidx2 = getelementptr inbounds float, float* %a, i64 %add1 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N11:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N11]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %1 = load float, float* %arrayidx2, align 4 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N12:0x[0-9a-f]*]] +; CHECK-NEXT: [memory] to [[N9]] + +; CHECK: Node Address:[[N4]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %arrayidx = getelementptr inbounds float, float* %b, i64 %i.02 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N13:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N13]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %0 = load float, float* %arrayidx, align 4 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N12]] + + +; CHECK: Node Address:[[N12]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %add = fadd float %0, %1 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N9]] + +; CHECK: Node Address:[[N9]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: store float %add, float* %arrayidx3, align 4 +; CHECK-NEXT: Edges:none! + + +;; Forward loop-carried dependence *not* causing a cycle. +;; void test2(unsigned long n, float * restrict a, float * restrict b) { +;; for (unsigned long i = 1; i < n-1; i++) +;; a[i] = b[i] + a[i+1]; +;; } + +define void @test2(i64 %n, float* noalias %a, float* noalias %b) { +entry: + %sub = add i64 %n, -1 + %cmp1 = icmp ult i64 1, %sub + br i1 %cmp1, label %test2.for.body, label %for.end + +test2.for.body: ; preds = %entry, %test2.for.body + %i.02 = phi i64 [ %inc, %test2.for.body ], [ 1, %entry ] + %arrayidx = getelementptr inbounds float, float* %b, i64 %i.02 + %0 = load float, float* %arrayidx, align 4 + %add1 = add i64 %i.02, 1 + %arrayidx2 = getelementptr inbounds float, float* %a, i64 %add1 + %1 = load float, float* %arrayidx2, align 4 + %add = fadd float %0, %1 + %arrayidx3 = getelementptr inbounds float, float* %a, i64 %i.02 + store float %add, float* %arrayidx3, align 4 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, %sub + br i1 %cmp, label %test2.for.body, label %for.end + +for.end: ; preds = %test2.for.body, %entry + ret void +} diff --git a/llvm/test/Analysis/DDG/basic-loopnest.ll b/llvm/test/Analysis/DDG/basic-loopnest.ll new file mode 100644 index 000000000..3581c7a68 --- /dev/null +++ b/llvm/test/Analysis/DDG/basic-loopnest.ll @@ -0,0 +1,456 @@ +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s + + +; CHECK-LABEL: 'DDG' for loop 'test1.for.cond1.preheader': + +; CHECK: Node Address:[[N1:0x[0-9a-f]*]]:pi-block +; CHECK-NEXT:--- start of nodes in pi-block --- +; CHECK: Node Address:[[N2:0x[0-9a-f]*]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %j.02 = phi i64 [ %inc, %for.body4 ], [ 1, %for.body4.preheader ] +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N3:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N3]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %inc = add i64 %j.02, 1 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N2]] +; CHECK-NEXT:--- end of nodes in pi-block --- +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N4:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N5:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N6:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N7:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N5]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %sub7 = add i64 %j.02, -1 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N8:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N9:0x[0-9a-f]*]]:pi-block +; CHECK-NEXT:--- start of nodes in pi-block --- +; CHECK: Node Address:[[N10:0x[0-9a-f]*]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %i.04 = phi i64 [ %inc13, %for.inc12 ], [ 0, %test1.for.cond1.preheader.preheader ] +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N11:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N11]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %inc13 = add i64 %i.04, 1 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N10]] +; CHECK-NEXT:--- end of nodes in pi-block --- +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N12:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N13:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N14:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N15:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N15]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %exitcond = icmp ne i64 %inc13, %n +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N16:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N16]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: br i1 %exitcond, label %test1.for.cond1.preheader, label %for.end14.loopexit +; CHECK-NEXT: Edges:none! + +; CHECK: Node Address:[[N14]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %4 = mul nsw i64 %i.04, %n +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N17:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N17]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %arrayidx10 = getelementptr inbounds float, float* %a, i64 %4 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N6]] + +; CHECK: Node Address:[[N6]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %arrayidx11 = getelementptr inbounds float, float* %arrayidx10, i64 %j.02 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N18:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N13]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %2 = mul nsw i64 %i.04, %n +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N19:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N19]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %arrayidx6 = getelementptr inbounds float, float* %a, i64 %2 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N8]] + +; CHECK: Node Address:[[N8]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %arrayidx8 = getelementptr inbounds float, float* %arrayidx6, i64 %sub7 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N18]] + +; CHECK: Node Address:[[N12]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %0 = mul nsw i64 %i.04, %n +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N20:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N20]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %arrayidx = getelementptr inbounds float, float* %b, i64 %0 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N4]] + +; CHECK: Node Address:[[N4]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %arrayidx5 = getelementptr inbounds float, float* %arrayidx, i64 %j.02 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N21:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N21]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %1 = load float, float* %arrayidx5, align 4 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N18]] + +; CHECK: Node Address:[[N18]]:pi-block +; CHECK-NEXT:--- start of nodes in pi-block --- +; CHECK: Node Address:[[N22:0x[0-9a-f]*]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %3 = load float, float* %arrayidx8, align 4 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N23:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N23]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %add = fadd float %1, %3 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N24:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N24]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: store float %add, float* %arrayidx11, align 4 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [memory] to [[N22]] +; CHECK-NEXT:--- end of nodes in pi-block --- +; CHECK-NEXT: Edges:none! + +; CHECK: Node Address:[[N25:0x[0-9a-f]*]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: br label %for.inc12 +; CHECK-NEXT: Edges:none! + +; CHECK: Node Address:[[N26:0x[0-9a-f]*]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: br label %for.body4 +; CHECK-NEXT: Edges:none! + +; CHECK: Node Address:[[N27:0x[0-9a-f]*]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %sub = add i64 %n, -1 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N7]] +; CHECK-NEXT: [def-use] to [[N28:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N28]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %cmp21 = icmp ult i64 1, %sub +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N29:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N29]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: br i1 %cmp21, label %for.body4.preheader, label %for.inc12 +; CHECK-NEXT: Edges:none! + +; CHECK: Node Address:[[N7]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %cmp2 = icmp ult i64 %inc, %sub +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N30:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N30]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: br i1 %cmp2, label %for.body4, label %for.inc12.loopexit +; CHECK-NEXT: Edges:none! + + +;; This test has a cycle. +;; void test1(unsigned long n, float a[][n], float b[][n]) { +;; for (unsigned long i = 0; i < n; i++) +;; for (unsigned long j = 1; j < n-1; j++) +;; a[i][j] = b[i][j] + a[i][j-1]; +;; } + +define void @test1(i64 %n, float* noalias %a, float* noalias %b) { +entry: + %exitcond3 = icmp ne i64 0, %n + br i1 %exitcond3, label %test1.for.cond1.preheader, label %for.end14 + +test1.for.cond1.preheader: ; preds = %entry, %for.inc12 + %i.04 = phi i64 [ %inc13, %for.inc12 ], [ 0, %entry ] + %sub = add i64 %n, -1 + %cmp21 = icmp ult i64 1, %sub + br i1 %cmp21, label %for.body4, label %for.inc12 + +for.body4: ; preds = %test1.for.cond1.preheader, %for.body4 + %j.02 = phi i64 [ %inc, %for.body4 ], [ 1, %test1.for.cond1.preheader ] + %0 = mul nsw i64 %i.04, %n + %arrayidx = getelementptr inbounds float, float* %b, i64 %0 + %arrayidx5 = getelementptr inbounds float, float* %arrayidx, i64 %j.02 + %1 = load float, float* %arrayidx5, align 4 + %2 = mul nsw i64 %i.04, %n + %arrayidx6 = getelementptr inbounds float, float* %a, i64 %2 + %sub7 = add i64 %j.02, -1 + %arrayidx8 = getelementptr inbounds float, float* %arrayidx6, i64 %sub7 + %3 = load float, float* %arrayidx8, align 4 + %add = fadd float %1, %3 + %4 = mul nsw i64 %i.04, %n + %arrayidx10 = getelementptr inbounds float, float* %a, i64 %4 + %arrayidx11 = getelementptr inbounds float, float* %arrayidx10, i64 %j.02 + store float %add, float* %arrayidx11, align 4 + %inc = add i64 %j.02, 1 + %cmp2 = icmp ult i64 %inc, %sub + br i1 %cmp2, label %for.body4, label %for.inc12 + +for.inc12: ; preds = %for.body4, %test1.for.cond1.preheader + %inc13 = add i64 %i.04, 1 + %exitcond = icmp ne i64 %inc13, %n + br i1 %exitcond, label %test1.for.cond1.preheader, label %for.end14 + +for.end14: ; preds = %for.inc12, %entry + ret void +} + + + +; CHECK-LABEL: 'DDG' for loop 'test2.for.cond1.preheader': + +; CHECK: Node Address:[[PI1:0x[0-9a-f]*]]:pi-block +; CHECK-NEXT:--- start of nodes in pi-block --- +; CHECK: Node Address:[[N1:0x[0-9a-f]*]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %j.02 = phi i64 [ %inc, %for.body4 ], [ 1, %for.body4.preheader ] +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N2:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N2]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %inc = add i64 %j.02, 1 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N1]] +; CHECK-NEXT:--- end of nodes in pi-block --- +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N3:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N4:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N5:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N6:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N4]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %add7 = add i64 %j.02, 1 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N7:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N8:0x[0-9a-f]*]]:pi-block +; CHECK-NEXT:--- start of nodes in pi-block --- +; CHECK: Node Address:[[N9:0x[0-9a-f]*]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %i.04 = phi i64 [ %inc13, %for.inc12 ], [ 0, %test2.for.cond1.preheader.preheader ] +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N10:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N10]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %inc13 = add i64 %i.04, 1 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N9]] +; CHECK-NEXT:--- end of nodes in pi-block --- +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N11:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N12:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N13:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N14:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N14]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %exitcond = icmp ne i64 %inc13, %n +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N15:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N15]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: br i1 %exitcond, label %test2.for.cond1.preheader, label %for.end14.loopexit +; CHECK-NEXT: Edges:none! + +; CHECK: Node Address:[[N13]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %4 = mul nsw i64 %i.04, %n +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N16:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N16]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %arrayidx10 = getelementptr inbounds float, float* %a, i64 %4 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N5]] + +; CHECK: Node Address:[[N5]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %arrayidx11 = getelementptr inbounds float, float* %arrayidx10, i64 %j.02 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N17:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N12]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %2 = mul nsw i64 %i.04, %n +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N18:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N18]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %arrayidx6 = getelementptr inbounds float, float* %a, i64 %2 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N7]] + +; CHECK: Node Address:[[N7]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %arrayidx8 = getelementptr inbounds float, float* %arrayidx6, i64 %add7 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N19:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N19]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %3 = load float, float* %arrayidx8, align 4 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N20:0x[0-9a-f]*]] +; CHECK-NEXT: [memory] to [[N17]] + +; CHECK: Node Address:[[N11]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %0 = mul nsw i64 %i.04, %n +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N21:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N21]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %arrayidx = getelementptr inbounds float, float* %b, i64 %0 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N3]] + +; CHECK: Node Address:[[N3]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %arrayidx5 = getelementptr inbounds float, float* %arrayidx, i64 %j.02 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N22:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N22]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %1 = load float, float* %arrayidx5, align 4 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N20]] + +; CHECK: Node Address:[[N20]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %add = fadd float %1, %3 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N17]] + +; CHECK: Node Address:[[N17]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: store float %add, float* %arrayidx11, align 4 +; CHECK-NEXT: Edges:none! + +; CHECK: Node Address:[[N23:0x[0-9a-f]*]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: br label %for.inc12 +; CHECK-NEXT: Edges:none! + +; CHECK: Node Address:[[N24:0x[0-9a-f]*]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: br label %for.body4 +; CHECK-NEXT: Edges:none! + +; CHECK: Node Address:[[N25:0x[0-9a-f]*]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %sub = add i64 %n, -1 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N6]] +; CHECK-NEXT: [def-use] to [[N26:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N26]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %cmp21 = icmp ult i64 1, %sub +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N27:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N27]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: br i1 %cmp21, label %for.body4.preheader, label %for.inc12 +; CHECK-NEXT: Edges:none! + +; CHECK: Node Address:[[N6]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %cmp2 = icmp ult i64 %inc, %sub +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N28:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N28]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: br i1 %cmp2, label %for.body4, label %for.inc12.loopexit +; CHECK-NEXT: Edges:none! + + +;; This test has no cycles. +;; void test2(unsigned long n, float a[][n], float b[][n]) { +;; for (unsigned long i = 0; i < n; i++) +;; for (unsigned long j = 1; j < n-1; j++) +;; a[i][j] = b[i][j] + a[i][j+1]; +;; } + +define void @test2(i64 %n, float* noalias %a, float* noalias %b) { +entry: + %exitcond3 = icmp ne i64 0, %n + br i1 %exitcond3, label %test2.for.cond1.preheader, label %for.end14 + +test2.for.cond1.preheader: ; preds = %entry, %for.inc12 + %i.04 = phi i64 [ %inc13, %for.inc12 ], [ 0, %entry ] + %sub = add i64 %n, -1 + %cmp21 = icmp ult i64 1, %sub + br i1 %cmp21, label %for.body4, label %for.inc12 + +for.body4: ; preds = %test2.for.cond1.preheader, %for.body4 + %j.02 = phi i64 [ %inc, %for.body4 ], [ 1, %test2.for.cond1.preheader ] + %0 = mul nsw i64 %i.04, %n + %arrayidx = getelementptr inbounds float, float* %b, i64 %0 + %arrayidx5 = getelementptr inbounds float, float* %arrayidx, i64 %j.02 + %1 = load float, float* %arrayidx5, align 4 + %2 = mul nsw i64 %i.04, %n + %arrayidx6 = getelementptr inbounds float, float* %a, i64 %2 + %add7 = add i64 %j.02, 1 + %arrayidx8 = getelementptr inbounds float, float* %arrayidx6, i64 %add7 + %3 = load float, float* %arrayidx8, align 4 + %add = fadd float %1, %3 + %4 = mul nsw i64 %i.04, %n + %arrayidx10 = getelementptr inbounds float, float* %a, i64 %4 + %arrayidx11 = getelementptr inbounds float, float* %arrayidx10, i64 %j.02 + store float %add, float* %arrayidx11, align 4 + %inc = add i64 %j.02, 1 + %cmp2 = icmp ult i64 %inc, %sub + br i1 %cmp2, label %for.body4, label %for.inc12 + +for.inc12: ; preds = %for.body4, %test2.for.cond1.preheader + %inc13 = add i64 %i.04, 1 + %exitcond = icmp ne i64 %inc13, %n + br i1 %exitcond, label %test2.for.cond1.preheader, label %for.end14 + +for.end14: ; preds = %for.inc12, %entry + ret void +} \ No newline at end of file diff --git a/llvm/test/Analysis/DDG/root-node.ll b/llvm/test/Analysis/DDG/root-node.ll new file mode 100644 index 000000000..868fb72d4 --- /dev/null +++ b/llvm/test/Analysis/DDG/root-node.ll @@ -0,0 +1,49 @@ +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s + +; CHECK-LABEL: 'DDG' for loop 'test1.for.body': + +; CHECK: Node Address:[[ROOT:0x[0-9a-f]*]]:root +; CHECK-NEXT: Edges: +; CHECK-NEXT: [rooted] to [[N1:0x[0-9a-f]*]] +; CHECK-NEXT: [rooted] to [[N2:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N2]]:pi-block +; CHECK: %i1.02 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %test1.for.body ] + +; CHECK: Node Address:[[N1]]:pi-block +; CHECK: %i2.03 = phi i64 [ 0, %for.body.lr.ph ], [ %inc2, %test1.for.body ] + +;; // Two separate components in the graph. Root node must link to both. +;; void test1(unsigned long n, float * restrict a, float * restrict b) { +;; for (unsigned long i1 = 0, i2 = 0; i1 < n; i1++, i2++) { +;; a[i1] = 1; +;; b[i2] = -1; +;; } +;; } + +define void @test1(i64 %n, float* noalias %a, float* noalias %b) { +entry: + %cmp1 = icmp ult i64 0, %n + br i1 %cmp1, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %test1.for.body + +test1.for.body: ; preds = %for.body.lr.ph, %test1.for.body + %i2.03 = phi i64 [ 0, %for.body.lr.ph ], [ %inc2, %test1.for.body ] + %i1.02 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %test1.for.body ] + %arrayidx = getelementptr inbounds float, float* %a, i64 %i1.02 + store float 1.000000e+00, float* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds float, float* %b, i64 %i2.03 + store float -1.000000e+00, float* %arrayidx1, align 4 + %inc = add i64 %i1.02, 1 + %inc2 = add i64 %i2.03, 1 + %cmp = icmp ult i64 %inc, %n + br i1 %cmp, label %test1.for.body, label %for.cond.for.end_crit_edge + +for.cond.for.end_crit_edge: ; preds = %test1.for.body + br label %for.end + +for.end: ; preds = %for.cond.for.end_crit_edge, %entry + ret void +} diff --git a/llvm/test/Analysis/Delinearization/constant_functions_multi_dim.ll b/llvm/test/Analysis/Delinearization/constant_functions_multi_dim.ll index b44b900d3..18fbdcc60 100644 --- a/llvm/test/Analysis/Delinearization/constant_functions_multi_dim.ll +++ b/llvm/test/Analysis/Delinearization/constant_functions_multi_dim.ll @@ -64,8 +64,8 @@ declare i64 @_Z13get_global_idj(i32) #1 ; Function Attrs: nounwind readnone speculatable declare float @llvm.fmuladd.f32(float, float, float) #2 -attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #2 = { nounwind readnone speculatable } attributes #3 = { nounwind readnone } diff --git a/llvm/test/Analysis/DependenceAnalysis/Dump.ll b/llvm/test/Analysis/DependenceAnalysis/Dump.ll new file mode 100644 index 000000000..d86221d85 --- /dev/null +++ b/llvm/test/Analysis/DependenceAnalysis/Dump.ll @@ -0,0 +1,50 @@ +; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1 \ +; RUN: | FileCheck %s + +;; Test to make sure the dump shows the src and dst +;; instructions (including call instructions). +;; +;; void bar(float * restrict A); +;; void foo(float * restrict A, int n) { +;; for (int i = 0; i < n; i++) { +;; A[i] = i; +;; bar(A); +;; } +;; } + +; CHECK-LABEL: foo + +; CHECK: Src: store float %conv, float* %arrayidx, align 4 --> Dst: store float %conv, float* %arrayidx, align 4 +; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: Src: store float %conv, float* %arrayidx, align 4 --> Dst: call void @bar(float* %A) +; CHECK-NEXT: da analyze - confused! +; CHECK-NEXT: Src: call void @bar(float* %A) --> Dst: call void @bar(float* %A) +; CHECK-NEXT: da analyze - confused! + +define void @foo(float* noalias %A, i32 signext %n) { +entry: + %cmp1 = icmp slt i32 0, %n + br i1 %cmp1, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] + %conv = sitofp i32 %i.02 to float + %idxprom = zext i32 %i.02 to i64 + %arrayidx = getelementptr inbounds float, float* %A, i64 %idxprom + store float %conv, float* %arrayidx, align 4 + call void @bar(float* %A) #3 + %inc = add nuw nsw i32 %i.02, 1 + %cmp = icmp slt i32 %inc, %n + br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge + +for.cond.for.end_crit_edge: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.cond.for.end_crit_edge, %entry + ret void +} + +declare void @bar(float*) diff --git a/llvm/test/Analysis/DependenceAnalysis/MIVCheckConst.ll b/llvm/test/Analysis/DependenceAnalysis/MIVCheckConst.ll index d5dd1050b..9a3692d61 100644 --- a/llvm/test/Analysis/DependenceAnalysis/MIVCheckConst.ll +++ b/llvm/test/Analysis/DependenceAnalysis/MIVCheckConst.ll @@ -71,4 +71,4 @@ bb40: ret void } -attributes #0 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll b/llvm/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll index 2aba99243..1da2672ff 100644 --- a/llvm/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll +++ b/llvm/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll @@ -96,7 +96,7 @@ for.end13: ; preds = %for.cond ret void } -attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } !llvm.ident = !{!0} diff --git a/llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheck.ll b/llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheck.ll index 57ccf85b8..7063f20cd 100644 --- a/llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheck.ll +++ b/llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheck.ll @@ -235,3 +235,4 @@ for.end11.loopexit: ; preds = %for.inc9 for.end11: ; preds = %for.end11.loopexit, %entry ret void } + diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/b42473-r1-crash.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/b42473-r1-crash.ll new file mode 100644 index 000000000..cb3e42de3 --- /dev/null +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/b42473-r1-crash.ll @@ -0,0 +1,111 @@ +; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s + +declare i32 @gf2(i32) +declare i32 @gf1(i32) + +define void @tw1(i32 addrspace(4)* noalias nocapture readonly %A, i32 addrspace(4)* noalias nocapture %B) local_unnamed_addr #2 { +; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'tw1': +; CHECK: DIVERGENT: i32 addrspace(4)* %A +; CHECK: DIVERGENT: i32 addrspace(4)* %B +entry: +; CHECK: DIVERGENT: %call = tail call i32 @gf2(i32 0) #0 +; CHECK: DIVERGENT: %cmp = icmp ult i32 %call, 16 +; CHECK: DIVERGENT: br i1 %cmp, label %if.then, label %new_exit + %call = tail call i32 @gf2(i32 0) #3 + %cmp = icmp ult i32 %call, 16 + br i1 %cmp, label %if.then, label %new_exit + +if.then: +; CHECK: DIVERGENT: %call1 = tail call i32 @gf1(i32 0) #0 +; CHECK: DIVERGENT: %arrayidx = getelementptr inbounds i32, i32 addrspace(4)* %A, i32 %call1 +; CHECK: DIVERGENT: %0 = load i32, i32 addrspace(4)* %arrayidx, align 4 +; CHECK: DIVERGENT: %cmp225 = icmp sgt i32 %0, 0 +; CHECK: DIVERGENT: %arrayidx10 = getelementptr inbounds i32, i32 addrspace(4)* %B, i32 %call1 +; CHECK: DIVERGENT: br i1 %cmp225, label %while.body.preheader, label %if.then.while.end_crit_edge + %call1 = tail call i32 @gf1(i32 0) #4 + %arrayidx = getelementptr inbounds i32, i32 addrspace(4)* %A, i32 %call1 + %0 = load i32, i32 addrspace(4)* %arrayidx, align 4 + %cmp225 = icmp sgt i32 %0, 0 + %arrayidx10 = getelementptr inbounds i32, i32 addrspace(4)* %B, i32 %call1 + br i1 %cmp225, label %while.body.preheader, label %if.then.while.end_crit_edge + +while.body.preheader: + br label %while.body + +if.then.while.end_crit_edge: +; CHECK: DIVERGENT: %.pre = load i32, i32 addrspace(4)* %arrayidx10, align 4 + %.pre = load i32, i32 addrspace(4)* %arrayidx10, align 4 + br label %while.end + +while.body: +; CHECK-NOT: DIVERGENT: %i.026 = phi i32 [ %inc, %if.end.while.body_crit_edge ], [ 0, %while.body.preheader ] +; CHECK: DIVERGENT: %call3 = tail call i32 @gf1(i32 0) #0 +; CHECK: DIVERGENT: %cmp4 = icmp ult i32 %call3, 10 +; CHECK: DIVERGENT: %arrayidx6 = getelementptr inbounds i32, i32 addrspace(4)* %A, i32 %i.026 +; CHECK: DIVERGENT: %1 = load i32, i32 addrspace(4)* %arrayidx6, align 4 +; CHECK: DIVERGENT: br i1 %cmp4, label %if.then5, label %if.else + %i.026 = phi i32 [ %inc, %if.end.while.body_crit_edge ], [ 0, %while.body.preheader ] + %call3 = tail call i32 @gf1(i32 0) #4 + %cmp4 = icmp ult i32 %call3, 10 + %arrayidx6 = getelementptr inbounds i32, i32 addrspace(4)* %A, i32 %i.026 + %1 = load i32, i32 addrspace(4)* %arrayidx6, align 4 + br i1 %cmp4, label %if.then5, label %if.else + +if.then5: +; CHECK: DIVERGENT: %mul = shl i32 %1, 1 +; CHECK: DIVERGENT: %2 = load i32, i32 addrspace(4)* %arrayidx10, align 4 +; CHECK: DIVERGENT: %add = add nsw i32 %2, %mul + %mul = shl i32 %1, 1 + %2 = load i32, i32 addrspace(4)* %arrayidx10, align 4 + %add = add nsw i32 %2, %mul + br label %if.end + +if.else: +; CHECK: DIVERGENT: %mul9 = shl i32 %1, 2 +; CHECK: DIVERGENT: %3 = load i32, i32 addrspace(4)* %arrayidx10, align 4 +; CHECK: DIVERGENT: %add11 = add nsw i32 %3, %mul9 + %mul9 = shl i32 %1, 2 + %3 = load i32, i32 addrspace(4)* %arrayidx10, align 4 + %add11 = add nsw i32 %3, %mul9 + br label %if.end + +if.end: +; CHECK: DIVERGENT: %storemerge = phi i32 [ %add11, %if.else ], [ %add, %if.then5 ] +; CHECK: DIVERGENT: store i32 %storemerge, i32 addrspace(4)* %arrayidx10, align 4 +; CHECK-NOT: DIVERGENT: %inc = add nuw nsw i32 %i.026, 1 +; CHECK: DIVERGENT: %exitcond = icmp ne i32 %inc, %0 +; CHECK: DIVERGENT: br i1 %exitcond, label %if.end.while.body_crit_edge, label %while.end.loopexit + %storemerge = phi i32 [ %add11, %if.else ], [ %add, %if.then5 ] + store i32 %storemerge, i32 addrspace(4)* %arrayidx10, align 4 + %inc = add nuw nsw i32 %i.026, 1 + %exitcond = icmp ne i32 %inc, %0 + br i1 %exitcond, label %if.end.while.body_crit_edge, label %while.end.loopexit + +if.end.while.body_crit_edge: + br label %while.body + +while.end.loopexit: +; CHECK: DIVERGENT: %storemerge.lcssa = phi i32 [ %storemerge, %if.end ] + %storemerge.lcssa = phi i32 [ %storemerge, %if.end ] + br label %while.end + +while.end: +; CHECK: DIVERGENT: %4 = phi i32 [ %.pre, %if.then.while.end_crit_edge ], [ %storemerge.lcssa, %while.end.loopexit ] +; CHECK: DIVERGENT: %i.0.lcssa = phi i32 [ 0, %if.then.while.end_crit_edge ], [ %0, %while.end.loopexit ] +; CHECK: DIVERGENT: %sub = sub nsw i32 %4, %i.0.lcssa +; CHECK: DIVERGENT: store i32 %sub, i32 addrspace(4)* %arrayidx10, align 4 + %4 = phi i32 [ %.pre, %if.then.while.end_crit_edge ], [ %storemerge.lcssa, %while.end.loopexit ] + %i.0.lcssa = phi i32 [ 0, %if.then.while.end_crit_edge ], [ %0, %while.end.loopexit ] + %sub = sub nsw i32 %4, %i.0.lcssa + store i32 %sub, i32 addrspace(4)* %arrayidx10, align 4 + br label %new_exit + +new_exit: + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind readnone } +attributes #3 = { nounwind readnone } +attributes #4 = { nounwind readnone } diff --git a/llvm/test/Analysis/GlobalsModRef/intrinsic_addressnottaken1.ll b/llvm/test/Analysis/GlobalsModRef/intrinsic_addressnottaken1.ll new file mode 100644 index 000000000..934534b5b --- /dev/null +++ b/llvm/test/Analysis/GlobalsModRef/intrinsic_addressnottaken1.ll @@ -0,0 +1,44 @@ +; RUN: opt -globals-aa -gvn -S < %s | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@deallocCalled = internal global i8 0, align 1 + +; CHECK-LABEL: @main() +define dso_local i32 @main() { +entry: + %tmp0 = call i8* @llvm.objc.autoreleasePoolPush() #1 + %tmp6 = load i8, i8* @deallocCalled, align 1 + %tobool = icmp ne i8 %tmp6, 0 + br i1 %tobool, label %if.else, label %if.end + +if.else: ; preds = %entry + call void @__assert_fail() #0 + unreachable + +; CHECK-LABEL: if.end: +; CHECK-NEXT: call void @llvm.objc.autoreleasePoolPop +; CHECK-NOT: load i8, i8* @deallocCalled +if.end: ; preds = %entry + call void @llvm.objc.autoreleasePoolPop(i8* %tmp0) + %tmp7 = load i8, i8* @deallocCalled, align 1 + %tobool3 = icmp ne i8 %tmp7, 0 + br i1 %tobool3, label %if.end6, label %if.else5 + +if.else5: ; preds = %if.end + call void @__assert_fail() #0 + unreachable + +if.end6: ; preds = %if.end + store i8 0, i8* @deallocCalled, align 1 + ret i32 0 +} + +declare i8* @llvm.objc.autoreleasePoolPush() #1 +declare void @llvm.objc.autoreleasePoolPop(i8*) #1 +declare dso_local void @__assert_fail() #0 + +attributes #0 = { noreturn nounwind } +attributes #1 = { nounwind } + diff --git a/llvm/test/Analysis/GlobalsModRef/intrinsic_addressnottaken2.ll b/llvm/test/Analysis/GlobalsModRef/intrinsic_addressnottaken2.ll new file mode 100644 index 000000000..d9874d714 --- /dev/null +++ b/llvm/test/Analysis/GlobalsModRef/intrinsic_addressnottaken2.ll @@ -0,0 +1,50 @@ +; RUN: opt -globals-aa -gvn -S < %s | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@deallocCalled = internal global i8 0, align 1 + +define internal i8* @_i_Associated__dealloc() { +entry: + store i8 1, i8* @deallocCalled, align 1 + ret i8* null +} + +; CHECK-LABEL: @main() +define dso_local i32 @main() { +entry: + %tmp0 = call i8* @llvm.objc.autoreleasePoolPush() #1 + %tmp6 = load i8, i8* @deallocCalled, align 1 + %tobool = icmp ne i8 %tmp6, 0 + br i1 %tobool, label %if.else, label %if.end + +if.else: ; preds = %entry + call void @__assert_fail() #0 + unreachable + +; CHECK-LABEL: if.end: +; CHECK-NEXT: call void @llvm.objc.autoreleasePoolPop +; CHECK-NOT: load i8, i8* @deallocCalled +if.end: ; preds = %entry + call void @llvm.objc.autoreleasePoolPop(i8* %tmp0) + %tmp7 = load i8, i8* @deallocCalled, align 1 + %tobool3 = icmp ne i8 %tmp7, 0 + br i1 %tobool3, label %if.end6, label %if.else5 + +if.else5: ; preds = %if.end + call void @__assert_fail() #0 + unreachable + +if.end6: ; preds = %if.end + store i8 0, i8* @deallocCalled, align 1 + ret i32 0 +} + +declare i8* @llvm.objc.autoreleasePoolPush() #1 +declare void @llvm.objc.autoreleasePoolPop(i8*) #1 +declare dso_local void @__assert_fail() #0 + +attributes #0 = { noreturn nounwind } +attributes #1 = { nounwind } + diff --git a/llvm/test/Analysis/GlobalsModRef/intrinsic_addresstaken.ll b/llvm/test/Analysis/GlobalsModRef/intrinsic_addresstaken.ll new file mode 100644 index 000000000..613533254 --- /dev/null +++ b/llvm/test/Analysis/GlobalsModRef/intrinsic_addresstaken.ll @@ -0,0 +1,52 @@ +; RUN: opt -globals-aa -gvn -S < %s | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@deallocCalled = internal global i8 0, align 1 +@.objc_method_list = internal global { i8* ()* } { i8* ()* @_i_Associated__dealloc }, align 8 +@._OBJC_CLASS_Associated = global { i8* } { i8* bitcast ({ i8* ()* }* @.objc_method_list to i8*) }, align 8 +@._OBJC_INIT_CLASS_Associated = global { i8* }* @._OBJC_CLASS_Associated +@llvm.used = appending global [1 x i8*] [i8* bitcast ({ i8* }** @._OBJC_INIT_CLASS_Associated to i8*)] + +define internal i8* @_i_Associated__dealloc() { +entry: + store i8 1, i8* @deallocCalled, align 1 + ret i8* null +} + +; CHECK-LABEL: @main() +define dso_local i32 @main() { +entry: + %tmp0 = call i8* @llvm.objc.autoreleasePoolPush() #1 + %tmp6 = load i8, i8* @deallocCalled, align 1 + %tobool = icmp ne i8 %tmp6, 0 + br i1 %tobool, label %if.else, label %if.end + +if.else: ; preds = %entry + call void @__assert_fail() #0 + unreachable + +; CHECK-LABEL: if.end: +; CHECK-NEXT: call void @llvm.objc.autoreleasePoolPop +if.end: ; preds = %entry + call void @llvm.objc.autoreleasePoolPop(i8* %tmp0) + %tmp7 = load i8, i8* @deallocCalled, align 1 + %tobool3 = icmp ne i8 %tmp7, 0 + br i1 %tobool3, label %if.end6, label %if.else5 + +if.else5: ; preds = %if.end + call void @__assert_fail() #0 + unreachable + +if.end6: ; preds = %if.end + store i8 0, i8* @deallocCalled, align 1 + ret i32 0 +} + +declare i8* @llvm.objc.autoreleasePoolPush() #1 +declare void @llvm.objc.autoreleasePoolPop(i8*) #1 +declare dso_local void @__assert_fail() #0 + +attributes #0 = { noreturn nounwind } +attributes #1 = { nounwind } diff --git a/llvm/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll b/llvm/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll index fbc2eb135..303c7c4c1 100644 --- a/llvm/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll +++ b/llvm/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll @@ -53,9 +53,9 @@ entry: ; CHECK-LABEL: loop: ; CHECK-NEXT: ; LatticeVal for: 'i32 %n' is: overdefined -; CHECK-NEXT: ; LatticeVal for: ' %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]' in BB: '%loop' is: constantrange<0, -2147483647> +; CHECK-NEXT: ; LatticeVal for: ' %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]' in BB: '%loop' is: constantrange<0, 400> ; CHECK-DAG: ; LatticeVal for: ' %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]' in BB: '%backedge' is: constantrange<0, -2147483648> -; CHECK-DAG: ; LatticeVal for: ' %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]' in BB: '%exit' is: constantrange<0, -2147483647> +; CHECK-DAG: ; LatticeVal for: ' %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]' in BB: '%exit' is: constantrange<0, -2147483648> ; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] loop: %iv = phi i32 [0, %entry], [%iv.next, %backedge] @@ -81,7 +81,7 @@ loop: ; CHECK-LABEL: backedge: ; CHECK-NEXT: ; LatticeVal for: 'i32 %n' is: overdefined -; CHECK-NEXT: ; LatticeVal for: ' %iv.next = add nsw i32 %iv, 1' in BB: '%backedge' is: constantrange<1, -2147483647> +; CHECK-NEXT: ; LatticeVal for: ' %iv.next = add nsw i32 %iv, 1' in BB: '%backedge' is: constantrange<1, -2147483648> ; CHECK-NEXT: %iv.next = add nsw i32 %iv, 1 backedge: %iv.next = add nsw i32 %iv, 1 @@ -108,7 +108,7 @@ define i32 @test3(i32 %i, i1 %f, i32 %n) { ; CHECK-LABEL: LVI for function 'test3': ; CHECK-LABEL: entry ; CHECK: ; LatticeVal for: 'i32 %i' is: overdefined -; CHECK: %c = icmp ne i32 %i, -2134 +; CHECK: %c = icmp ne i32 %i, -2134 ; CHECK: br i1 %c, label %cont, label %exit entry: %c = icmp ne i32 %i, -2134 @@ -141,7 +141,7 @@ do: call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ] %cond.2 = icmp sgt i32 %i, 0 br i1 %cond.2, label %exit, label %cont - + exit2: ; CHECK-LABEL: exit2: ; LatticeVal for: 'i32 %i' is: constantrange<-2134, 1> diff --git a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll new file mode 100644 index 000000000..8afde3697 --- /dev/null +++ b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll @@ -0,0 +1,35 @@ +; RUN: opt < %s -passes='print' -disable-output 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +; Check IndexedReference::computeRefCost can handle type differences between +; Stride and TripCount + +; CHECK: Loop 'for.cond' has cost = 64 + +%struct._Handleitem = type { %struct._Handleitem* } + +define void @handle_to_ptr(%struct._Handleitem** %blocks) { +; Preheader: +entry: + br label %for.cond + +; Loop: +for.cond: ; preds = %for.body, %entry + %i.0 = phi i32 [ 1, %entry ], [ %inc, %for.body ] + %cmp = icmp ult i32 %i.0, 1024 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %idxprom = zext i32 %i.0 to i64 + %arrayidx = getelementptr inbounds %struct._Handleitem*, %struct._Handleitem** %blocks, i64 %idxprom + store %struct._Handleitem* null, %struct._Handleitem** %arrayidx, align 8 + %inc = add nuw nsw i32 %i.0, 1 + br label %for.cond + +; Exit blocks +for.end: ; preds = %for.cond + ret void + +} diff --git a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/loads-store.ll b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/loads-store.ll new file mode 100644 index 000000000..b73abcac9 --- /dev/null +++ b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/loads-store.ll @@ -0,0 +1,88 @@ +; RUN: opt < %s -passes='print' -disable-output 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +; void foo(long n, long m, long o, int A[n][m][o], int B[n][m][o], int C[n][m][o]) { +; for (long i = 0; i < n; i++) +; for (long j = 0; j < m; j++) +; for (long k = 0; k < o; k++) +; A[i][k][j] += B[i][k][j] + C[i][j][k]; +; } + +; CHECK-DAG: Loop 'for.i' has cost = 3000000 +; CHECK-DAG: Loop 'for.k' has cost = 2030000 +; CHECK-DAG: Loop 'for.j' has cost = 1060000 + +define void @foo(i64 %n, i64 %m, i64 %o, i32* %A, i32* %B, i32* %C) { +entry: + %cmp32 = icmp sgt i64 %n, 0 + %cmp230 = icmp sgt i64 %m, 0 + %cmp528 = icmp sgt i64 %o, 0 + br i1 %cmp32, label %for.cond1.preheader.lr.ph, label %for.end + +for.cond1.preheader.lr.ph: ; preds = %entry + br i1 %cmp230, label %for.i.preheader, label %for.end + +for.i.preheader: ; preds = %for.cond1.preheader.lr.ph + br i1 %cmp528, label %for.i.preheader.split, label %for.end + +for.i.preheader.split: ; preds = %for.i.preheader + br label %for.i + +for.i: ; preds = %for.inci, %for.i.preheader.split + %i = phi i64 [ %inci, %for.inci ], [ 0, %for.i.preheader.split ] + %muli = mul i64 %i, %m + br label %for.j + +for.j: ; preds = %for.incj, %for.i + %j = phi i64 [ %incj, %for.incj ], [ 0, %for.i ] + %addj = add i64 %muli, %j + %mulj = mul i64 %addj, %o + br label %for.k + +for.k: ; preds = %for.k, %for.j + %k = phi i64 [ 0, %for.j ], [ %inck, %for.k ] + + ; B[i][k][j] + %addk = add i64 %muli, %k + %mulk = mul i64 %addk, %o + %arrayidx1 = add i64 %j, %mulk + %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %arrayidx1 + %elem_B = load i32, i32* %arrayidx2, align 4 + + ; C[i][j][k] + %arrayidx3 = add i64 %k, %mulj + %arrayidx4 = getelementptr inbounds i32, i32* %C, i64 %arrayidx3 + %elem_C = load i32, i32* %arrayidx4, align 4 + + ; A[i][k][j] + %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %arrayidx1 + %elem_A = load i32, i32* %arrayidx5, align 4 + + ; A[i][k][j] += B[i][k][j] + C[i][j][k] + %add1 = add i32 %elem_B, %elem_C + %add2 = add i32 %add1, %elem_A + %arrayidx6 = getelementptr inbounds i32, i32* %A, i64 %arrayidx1 + store i32 %add2, i32* %arrayidx6, align 4 + + %inck = add nsw i64 %k, 1 + %exitcond.us = icmp eq i64 %inck, %o + br i1 %exitcond.us, label %for.incj, label %for.k + +for.incj: ; preds = %for.k + %incj = add nsw i64 %j, 1 + %exitcond54.us = icmp eq i64 %incj, %m + br i1 %exitcond54.us, label %for.inci, label %for.j + +for.inci: ; preds = %for.incj + %inci = add nsw i64 %i, 1 + %exitcond55.us = icmp eq i64 %inci, %n + br i1 %exitcond55.us, label %for.end.loopexit, label %for.i + +for.end.loopexit: ; preds = %for.inci + br label %for.end + +for.end: ; preds = %for.end.loopexit, %for.cond1.preheader.lr.ph, %entry + ret void +} diff --git a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matmul.ll b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matmul.ll new file mode 100644 index 000000000..c7342fe32 --- /dev/null +++ b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matmul.ll @@ -0,0 +1,81 @@ +; RUN: opt < %s -passes='print' -disable-output 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +; void matmul(long n, long m, long o, int A[n][m], int B[n][m], int C[n]) { +; for (long i = 0; i < n; i++) +; for (long j = 0; j < m; j++) +; for (long k = 0; k < o; k++) +; C[i][j] = C[i][j] + A[i][k] * B[k][j]; +; } + +; CHECK-DAG:Loop 'for.i' has cost = 2010000 +; CHECK-DAG:Loop 'for.k' has cost = 1040000 +; CHECK-DAG:Loop 'for.j' has cost = 70000 + +define void @matmul(i64 %n, i64 %m, i64 %o, i32* %A, i32* %B, i32* %C) { +entry: + br label %for.i + +for.i: ; preds = %entry, %for.inc.i + %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc.i ] + %muli = mul i64 %i, %m + br label %for.j + +for.j: ; preds = %for.i, %for.inc.j + %j = phi i64 [ 0, %for.i ], [ %j.next, %for.inc.j ] + %addj = add i64 %muli, %j + %mulj = mul i64 %addj, %o + br label %for.k + +for.k: ; preds = %for.j, %for.inc.k + %k = phi i64 [ 0, %for.j ], [ %k.next, %for.inc.k ] + + ; A[i][k] + %arrayidx3 = add i64 %k, %muli + %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %arrayidx3 + %elem_A = load i32, i32* %arrayidx4, align 4 + + ; B[k][j] + %mulk = mul i64 %k, %o + %arrayidx5 = add i64 %j, %mulk + %arrayidx6 = getelementptr inbounds i32, i32* %B, i64 %arrayidx5 + %elem_B = load i32, i32* %arrayidx6, align 4 + + ; C[i][k] + %arrayidx7 = add i64 %j, %muli + %arrayidx8 = getelementptr inbounds i32, i32* %C, i64 %arrayidx7 + %elem_C = load i32, i32* %arrayidx8, align 4 + + ; C[i][j] = C[i][j] + A[i][k] * B[k][j]; + %mul = mul nsw i32 %elem_A, %elem_B + %add = add nsw i32 %elem_C, %mul + store i32 %add, i32* %arrayidx8, align 4 + + br label %for.inc.k + +for.inc.k: ; preds = %for.k + %k.next = add nuw nsw i64 %k, 1 + %exitcond = icmp ne i64 %k.next, %o + br i1 %exitcond, label %for.k, label %for.end + +for.end: ; preds = %for.inc + br label %for.inc.j + +for.inc.j: ; preds = %for.end + %j.next = add nuw nsw i64 %j, 1 + %exitcond5 = icmp ne i64 %j.next, %m + br i1 %exitcond5, label %for.j, label %for.end23 + +for.end23: ; preds = %for.inc.j + br label %for.inc.i + +for.inc.i: ; preds = %for.end23 + %i.next = add nuw nsw i64 %i, 1 + %exitcond8 = icmp ne i64 %i.next, %n + br i1 %exitcond8, label %for.i, label %for.end26 + +for.end26: ; preds = %for.inc.i + ret void +} diff --git a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matvecmul.ll b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matvecmul.ll new file mode 100644 index 000000000..313795251 --- /dev/null +++ b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matvecmul.ll @@ -0,0 +1,185 @@ +; RUN: opt < %s -passes='print' -disable-output 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +; void matvecmul(const double *__restrict y, const double * __restrict x, const double * __restrict b, +; const int * __restrict nb, const int * __restrict nx, const int * __restrict ny, const int * __restrict nz) { +; +; for (int k=1;k + +; Function Attrs: norecurse nounwind +define void @mat_vec_mpy([0 x %_elem_type_of_double]* noalias %y, [0 x %_elem_type_of_double]* noalias readonly %x, + [0 x %_elem_type_of_double]* noalias readonly %b, i32* noalias readonly %nb, i32* noalias readonly %nx, + i32* noalias readonly %ny, i32* noalias readonly %nz) { +mat_times_vec_entry: + %_ind_val = load i32, i32* %nb, align 4 + %_conv = sext i32 %_ind_val to i64 + %_grt_tmp.i = icmp sgt i64 %_conv, 0 + %a_b.i = select i1 %_grt_tmp.i, i64 %_conv, i64 0 + %_ind_val1 = load i32, i32* %nx, align 4 + %_conv2 = sext i32 %_ind_val1 to i64 + %_grt_tmp.i266 = icmp sgt i64 %_conv2, 0 + %a_b.i267 = select i1 %_grt_tmp.i266, i64 %_conv2, i64 0 + %_ind_val3 = load i32, i32* %ny, align 4 + %_conv4 = sext i32 %_ind_val3 to i64 + %_grt_tmp.i264 = icmp sgt i64 %_conv4, 0 + %a_b.i265 = select i1 %_grt_tmp.i264, i64 %_conv4, i64 0 + %_ind_val5 = load i32, i32* %nz, align 4 + %_mult_tmp = shl nsw i64 %a_b.i, 3 + %_mult_tmp7 = mul i64 %_mult_tmp, %a_b.i267 + %_mult_tmp8 = mul i64 %_mult_tmp7, %a_b.i265 + %_sub_tmp = sub nuw nsw i64 -8, %_mult_tmp + %_sub_tmp21 = sub i64 %_sub_tmp, %_mult_tmp7 + %_sub_tmp23 = sub i64 %_sub_tmp21, %_mult_tmp8 + %_mult_tmp73 = mul i64 %_mult_tmp, %a_b.i + %_mult_tmp74 = mul i64 %_mult_tmp73, %a_b.i267 + %_mult_tmp75 = mul i64 %_mult_tmp74, %a_b.i265 + %_sub_tmp93 = sub i64 %_sub_tmp, %_mult_tmp73 + %_sub_tmp95 = sub i64 %_sub_tmp93, %_mult_tmp74 + %_sub_tmp97 = sub i64 %_sub_tmp95, %_mult_tmp75 + %_grt_tmp853288 = icmp slt i32 %_ind_val5, 1 + br i1 %_grt_tmp853288, label %_return_bb, label %k_loop.lr.ph + +k_loop.lr.ph: ; preds = %mat_times_vec_entry + %_grt_tmp851279 = icmp slt i32 %_ind_val3, 1 + %_grt_tmp847270 = icmp slt i32 %_ind_val, 1 + %_aa_conv = bitcast [0 x %_elem_type_of_double]* %y to i8* + %_adda_ = getelementptr inbounds i8, i8* %_aa_conv, i64 %_sub_tmp23 + %_aa_conv434 = bitcast [0 x %_elem_type_of_double]* %x to i8* + %_adda_435 = getelementptr inbounds i8, i8* %_aa_conv434, i64 %_sub_tmp23 + %_aa_conv785 = bitcast [0 x %_elem_type_of_double]* %b to i8* + %_adda_786 = getelementptr inbounds i8, i8* %_aa_conv785, i64 %_sub_tmp97 + br i1 %_grt_tmp851279, label %k_loop.us.preheader, label %k_loop.lr.ph.split + +k_loop.us.preheader: ; preds = %k_loop.lr.ph + br label %_return_bb.loopexit + +k_loop.lr.ph.split: ; preds = %k_loop.lr.ph + %_grt_tmp849273 = icmp slt i32 %_ind_val1, 1 + br i1 %_grt_tmp849273, label %k_loop.us291.preheader, label %k_loop.lr.ph.split.split + +k_loop.us291.preheader: ; preds = %k_loop.lr.ph.split + br label %_return_bb.loopexit300 + +k_loop.lr.ph.split.split: ; preds = %k_loop.lr.ph.split + br i1 %_grt_tmp847270, label %k_loop.us294.preheader, label %k_loop.preheader + +k_loop.preheader: ; preds = %k_loop.lr.ph.split.split + %0 = add i32 %_ind_val, 1 + %1 = add i32 %_ind_val1, 1 + %2 = add i32 %_ind_val3, 1 + %3 = add i32 %_ind_val5, 1 + br label %k_loop + +k_loop.us294.preheader: ; preds = %k_loop.lr.ph.split.split + br label %_return_bb.loopexit301 + +k_loop: ; preds = %k_loop._label_18_crit_edge.split.split.split, %k_loop.preheader + %indvars.iv316 = phi i64 [ 1, %k_loop.preheader ], [ %indvars.iv.next317, %k_loop._label_18_crit_edge.split.split.split ] + %indvars.iv.next317 = add nuw nsw i64 %indvars.iv316, 1 + %_ix_x_len = mul i64 %_mult_tmp8, %indvars.iv.next317 + %_ix_x_len410 = mul i64 %_mult_tmp75, %indvars.iv316 + %_ix_x_len822 = mul i64 %_mult_tmp8, %indvars.iv316 + br label %j_loop + +j_loop: ; preds = %j_loop._label_15_crit_edge.split.split, %k_loop + %indvars.iv312 = phi i64 [ %indvars.iv.next313, %j_loop._label_15_crit_edge.split.split ], [ 1, %k_loop ] + %_ix_x_len371 = mul i64 %_mult_tmp7, %indvars.iv312 + %_ix_x_len415 = mul i64 %_mult_tmp74, %indvars.iv312 + br label %i_loop + +i_loop: ; preds = %i_loop._label_12_crit_edge.split, %j_loop + %indvars.iv307 = phi i64 [ %indvars.iv.next308, %i_loop._label_12_crit_edge.split ], [ 1, %j_loop ] + %_ix_x_len375 = mul i64 %_mult_tmp, %indvars.iv307 + %_ix_x_len420 = mul i64 %_mult_tmp73, %indvars.iv307 + br label %l_loop + +l_loop: ; preds = %l_loop._label_9_crit_edge, %i_loop + %indvars.iv303 = phi i64 [ %indvars.iv.next304, %l_loop._label_9_crit_edge ], [ 1, %i_loop ] + %_ix_x_len378 = shl nuw nsw i64 %indvars.iv303, 3 + br label %m_loop + +m_loop: ; preds = %m_loop, %l_loop + %indvars.iv = phi i64 [ %indvars.iv.next, %m_loop ], [ 1, %l_loop ] + %_ix_x_len424 = mul i64 %_mult_tmp, %indvars.iv + %_ix_x_len454 = shl nuw nsw i64 %indvars.iv, 3 + %_ixa_gep = getelementptr inbounds i8, i8* %_adda_, i64 %_ix_x_len + %_ixa_gep791 = getelementptr inbounds i8, i8* %_adda_786, i64 %_ix_x_len410 + %_ixa_gep823 = getelementptr inbounds i8, i8* %_adda_435, i64 %_ix_x_len822 + %_ixa_gep372 = getelementptr inbounds i8, i8* %_ixa_gep, i64 %_ix_x_len371 + %_ixa_gep376 = getelementptr inbounds i8, i8* %_ixa_gep372, i64 %_ix_x_len375 + %_ixa_gep796 = getelementptr inbounds i8, i8* %_ixa_gep791, i64 %_ix_x_len415 + %_ixa_gep828 = getelementptr inbounds i8, i8* %_ixa_gep823, i64 %_ix_x_len371 + %_ixa_gep379 = getelementptr inbounds i8, i8* %_ixa_gep376, i64 %_ix_x_len378 + %_ixa_gep801 = getelementptr inbounds i8, i8* %_ixa_gep796, i64 %_ix_x_len420 + %_ixa_gep833 = getelementptr inbounds i8, i8* %_ixa_gep828, i64 %_ix_x_len375 + %_ixa_gep806 = getelementptr inbounds i8, i8* %_ixa_gep801, i64 %_ix_x_len378 + %_ixa_gep810 = getelementptr inbounds i8, i8* %_ixa_gep806, i64 %_ix_x_len424 + %_gepp = bitcast i8* %_ixa_gep379 to double* + %_gepp813 = bitcast i8* %_ixa_gep810 to double* + %_ind_val814 = load double, double* %_gepp813, align 8 + %_ixa_gep837 = getelementptr inbounds i8, i8* %_ixa_gep833, i64 %_ix_x_len454 + %_gepp840 = bitcast i8* %_ixa_gep837 to double* + %_ind_val841 = load double, double* %_gepp840, align 8 + %_mult_tmp842 = fmul double %_ind_val814, %_ind_val841 + store double %_mult_tmp842, double* %_gepp, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %wide.trip.count = zext i32 %0 to i64 + %wide.trip.count305 = zext i32 %0 to i64 + %wide.trip.count309 = zext i32 %1 to i64 + %wide.trip.count314 = zext i32 %2 to i64 + %wide.trip.count319 = zext i32 %3 to i64 + %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %m_loop, label %l_loop._label_9_crit_edge + +l_loop._label_9_crit_edge: ; preds = %m_loop + %indvars.iv.next304 = add nuw nsw i64 %indvars.iv303, 1 + %exitcond306 = icmp ne i64 %indvars.iv.next304, %wide.trip.count305 + br i1 %exitcond306, label %l_loop, label %i_loop._label_12_crit_edge.split + +i_loop._label_12_crit_edge.split: ; preds = %l_loop._label_9_crit_edge + %indvars.iv.next308 = add nuw nsw i64 %indvars.iv307, 1 + %exitcond310 = icmp ne i64 %indvars.iv.next308, %wide.trip.count309 + br i1 %exitcond310, label %i_loop, label %j_loop._label_15_crit_edge.split.split + +j_loop._label_15_crit_edge.split.split: ; preds = %i_loop._label_12_crit_edge.split + %indvars.iv.next313 = add nuw nsw i64 %indvars.iv312, 1 + %exitcond315 = icmp ne i64 %indvars.iv.next313, %wide.trip.count314 + br i1 %exitcond315, label %j_loop, label %k_loop._label_18_crit_edge.split.split.split + +k_loop._label_18_crit_edge.split.split.split: ; preds = %j_loop._label_15_crit_edge.split.split + %exitcond320 = icmp ne i64 %indvars.iv.next317, %wide.trip.count319 + br i1 %exitcond320, label %k_loop, label %_return_bb.loopexit302 + +_return_bb.loopexit: ; preds = %k_loop.us.preheader + br label %_return_bb + +_return_bb.loopexit300: ; preds = %k_loop.us291.preheader + br label %_return_bb + +_return_bb.loopexit301: ; preds = %k_loop.us294.preheader + br label %_return_bb + +_return_bb.loopexit302: ; preds = %k_loop._label_18_crit_edge.split.split.split + br label %_return_bb + +_return_bb: ; preds = %_return_bb.loopexit302, %_return_bb.loopexit301, %_return_bb.loopexit300, %_return_bb.loopexit, %mat_times_vec_entry + ret void +} + + diff --git a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/single-store.ll b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/single-store.ll new file mode 100644 index 000000000..cce6731fb --- /dev/null +++ b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/single-store.ll @@ -0,0 +1,77 @@ +; RUN: opt < %s -passes='print' -disable-output 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +; void foo(long n, long m, long o, int A[n][m][o]) { +; for (long i = 0; i < n; i++) +; for (long j = 0; j < m; j++) +; for (long k = 0; k < o; k++) +; A[2*i+3][3*j-4][2*k+7] = 1; +; } + +; CHECK-DAG: Loop 'for.i' has cost = 1000000 +; CHECK-DAG: Loop 'for.j' has cost = 1000000 +; CHECK-DAG: Loop 'for.k' has cost = 60000 + +define void @foo(i64 %n, i64 %m, i64 %o, i32* %A) { +entry: + %cmp32 = icmp sgt i64 %n, 0 + %cmp230 = icmp sgt i64 %m, 0 + %cmp528 = icmp sgt i64 %o, 0 + br i1 %cmp32, label %for.cond1.preheader.lr.ph, label %for.end + +for.cond1.preheader.lr.ph: ; preds = %entry + br i1 %cmp230, label %for.i.preheader, label %for.end + +for.i.preheader: ; preds = %for.cond1.preheader.lr.ph + br i1 %cmp528, label %for.i.preheader.split, label %for.end + +for.i.preheader.split: ; preds = %for.i.preheader + br label %for.i + +for.i: ; preds = %for.inci, %for.i.preheader.split + %i = phi i64 [ %inci, %for.inci ], [ 0, %for.i.preheader.split ] + %mul8 = shl i64 %i, 1 + %add9 = add nsw i64 %mul8, 3 + %0 = mul i64 %add9, %m + %sub = add i64 %0, -4 + br label %for.j + +for.j: ; preds = %for.incj, %for.i + %j = phi i64 [ %incj, %for.incj ], [ 0, %for.i ] + %mul7 = mul nsw i64 %j, 3 + %tmp = add i64 %sub, %mul7 + %tmp27 = mul i64 %tmp, %o + br label %for.k + +for.k: ; preds = %for.k, %for.j.us + %k = phi i64 [ 0, %for.j ], [ %inck, %for.k ] + + %mul = mul nsw i64 %k, 2 + %arrayidx.sum = add i64 %mul, 7 + %arrayidx10.sum = add i64 %arrayidx.sum, %tmp27 + %arrayidx11 = getelementptr inbounds i32, i32* %A, i64 %arrayidx10.sum + store i32 1, i32* %arrayidx11, align 4 + + %inck = add nsw i64 %k, 1 + %exitcond.us = icmp eq i64 %inck, %o + br i1 %exitcond.us, label %for.incj, label %for.k + +for.incj: ; preds = %for.k + %incj = add nsw i64 %j, 1 + %exitcond54.us = icmp eq i64 %incj, %m + br i1 %exitcond54.us, label %for.inci, label %for.j + +for.inci: ; preds = %for.incj + %inci = add nsw i64 %i, 1 + %exitcond55.us = icmp eq i64 %inci, %n + br i1 %exitcond55.us, label %for.end.loopexit, label %for.i + +for.end.loopexit: ; preds = %for.inci + br label %for.end + +for.end: ; preds = %for.end.loopexit, %for.cond1.preheader.lr.ph, %entry + ret void +} + diff --git a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/stencil.ll b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/stencil.ll new file mode 100644 index 000000000..1f1515435 --- /dev/null +++ b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/stencil.ll @@ -0,0 +1,98 @@ +; RUN: opt < %s -passes='print' -disable-output 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +; void foo(long n, long m, long o, int A[n][m], int B[n][m], int C[n]) { +; for (long i = 0; i < n; i++) +; for (long j = 0; j < m; j++) { +; A[i][j] = A[i][j+1] + B[i-1][j] + B[i+1][j+1] + C[i]; +; A[i][j] += B[i][i]; +; } +; } + +; CHECK-DAG: Loop 'for.i' has cost = 20600 +; CHECK-DAG: Loop 'for.j' has cost = 800 + +define void @foo(i64 %n, i64 %m, i32* %A, i32* %B, i32* %C) { +entry: + %cmp32 = icmp sgt i64 %n, 0 + %cmp230 = icmp sgt i64 %m, 0 + br i1 %cmp32, label %for.cond1.preheader.lr.ph, label %for.end + +for.cond1.preheader.lr.ph: ; preds = %entry + br i1 %cmp230, label %for.i.preheader, label %for.end + +for.i.preheader: ; preds = %for.cond1.preheader.lr.ph + br label %for.i + +for.i: ; preds = %for.inci, %for.i.preheader.split + %i = phi i64 [ %inci, %for.inci ], [ 0, %for.i.preheader ] + %subione = sub i64 %i, 1 + %addione = add i64 %i, 1 + %muli = mul i64 %i, %m + %muliminusone = mul i64 %subione, %m + %muliplusone = mul i64 %addione, %m + br label %for.j + +for.j: ; preds = %for.incj, %for.i + %j = phi i64 [ %incj, %for.incj ], [ 0, %for.i ] + %addj = add i64 %muli, %j + + ; B[i-1][j] + %arrayidx1 = add i64 %j, %muliminusone + %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %arrayidx1 + %elem_B1 = load i32, i32* %arrayidx2, align 4 + + ; B[i-1][j+1] + %addjone = add i64 %j, 1 + %arrayidx3 = add i64 %addjone, %muliminusone + %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %arrayidx3 + %elem_B2 = load i32, i32* %arrayidx4, align 4 + + ; C[i] + %arrayidx6 = getelementptr inbounds i32, i32* %C, i64 %i + %elem_C = load i32, i32* %arrayidx6, align 4 + + ; A[i][j+1] + %arrayidx7 = add i64 %addjone, %muli + %arrayidx8 = getelementptr inbounds i32, i32* %A, i64 %arrayidx7 + %elem_A = load i32, i32* %arrayidx8, align 4 + + ; A[i][j] = A[i][j+1] + B[i-1][j] + B[i-1][j+1] + C[i] + %addB = add i32 %elem_B1, %elem_B2 + %addC = add i32 %addB, %elem_C + %addA = add i32 %elem_A, %elem_C + %arrayidx9 = add i64 %j, %muli + %arrayidx10 = getelementptr inbounds i32, i32* %A, i64 %arrayidx9 + store i32 %addA, i32* %arrayidx10, align 4 + + ; A[i][j] += B[i][i]; + %arrayidx11 = add i64 %j, %muli + %arrayidx12 = getelementptr inbounds i32, i32* %A, i64 %arrayidx11 + %elem_A1 = load i32, i32* %arrayidx12, align 4 + %arrayidx13 = add i64 %i, %muli + %arrayidx14 = getelementptr inbounds i32, i32* %B, i64 %arrayidx13 + %elem_B3 = load i32, i32* %arrayidx14, align 4 + %addA1 = add i32 %elem_A1, %elem_B3 + store i32 %addA1, i32* %arrayidx12, align 4 + + br label %for.incj + +for.incj: ; preds = %for.j + %incj = add nsw i64 %j, 1 + %exitcond54.us = icmp eq i64 %incj, %m + br i1 %exitcond54.us, label %for.inci, label %for.j + +for.inci: ; preds = %for.incj + %inci = add nsw i64 %i, 1 + %exitcond55.us = icmp eq i64 %inci, %n + br i1 %exitcond55.us, label %for.end.loopexit, label %for.i + +for.end.loopexit: ; preds = %for.inci + br label %for.end + +for.end: ; preds = %for.end.loopexit, %for.cond1.preheader.lr.ph, %entry + ret void +} + diff --git a/llvm/test/Analysis/MemoryDependenceAnalysis/invariant.group-bug.ll b/llvm/test/Analysis/MemoryDependenceAnalysis/invariant.group-bug.ll index 00ed8d6ed..5ef98b888 100644 --- a/llvm/test/Analysis/MemoryDependenceAnalysis/invariant.group-bug.ll +++ b/llvm/test/Analysis/MemoryDependenceAnalysis/invariant.group-bug.ll @@ -96,8 +96,8 @@ define void @fail(i1* noalias sret, %0*, %1*, i8*) local_unnamed_addr #0 { ret void } -attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } !llvm.linker.options = !{} !llvm.module.flags = !{!0, !1, !3, !4, !5} diff --git a/llvm/test/Analysis/MemorySSA/debugvalue.ll b/llvm/test/Analysis/MemorySSA/debugvalue.ll new file mode 100644 index 000000000..2d7055627 --- /dev/null +++ b/llvm/test/Analysis/MemorySSA/debugvalue.ll @@ -0,0 +1,44 @@ +; RUN: opt -disable-basicaa -loop-rotate -enable-mssa-loop-dependency -verify-memoryssa -S %s | FileCheck %s +; REQUIRES: asserts + +; CHECK-LABEL: @f_w4_i2 +define void @f_w4_i2() { +entry: + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %i.0 = phi i16 [ 0, %entry ], [ %inc, %for.body ] + call void @llvm.dbg.value(metadata i16 %i.0, metadata !32, metadata !DIExpression()), !dbg !31 + br i1 undef, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + ret void + +for.body: ; preds = %for.cond + store i32 undef, i32* undef, align 1 + %inc = add i16 %i.0, 1 + call void @llvm.dbg.value(metadata i16 %inc, metadata !32, metadata !DIExpression()), !dbg !31 + br label %for.cond +} + +; Function Attrs: nounwind readnone speculatable willreturn +declare void @llvm.dbg.value(metadata, metadata, metadata) #1 + +attributes #1 = { nounwind readnone speculatable willreturn } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!14} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 10.0.0s", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3, nameTableKind: None) +!1 = !DIFile(filename: "vec.c", directory: "test") +!2 = !{} +!3 = !{!4} +!4 = !DIBasicType(name: "int", size: 16, encoding: DW_ATE_signed) +!14 = !{i32 2, !"Debug Info Version", i32 3} +!15 = distinct !DISubprogram(name: "f_w4_i2", scope: !1, file: !1, line: 36, type: !16, scopeLine: 38, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !23) +!16 = !DISubroutineType(types: !17) +!17 = !{null} +!23 = !{} +!30 = distinct !DILexicalBlock(scope: !15, file: !1, line: 43, column: 5) +!31 = !DILocation(line: 0, scope: !30) +!32 = !DILocalVariable(name: "i", scope: !30, file: !1, line: 43, type: !4) diff --git a/llvm/test/Analysis/MemorySSA/debugvalue2.ll b/llvm/test/Analysis/MemorySSA/debugvalue2.ll new file mode 100644 index 000000000..63144523f --- /dev/null +++ b/llvm/test/Analysis/MemorySSA/debugvalue2.ll @@ -0,0 +1,54 @@ +; RUN: opt -disable-basicaa -print-memoryssa -disable-output %s 2>&1 | FileCheck %s + +; Note that the test crashes the MemorySSA verification when doing loop-rotate, +; if debuginfo is modelled in MemorySSA, due to the fact that MemorySSA is not +; updated when adding/removing debuginfo intrinsics. + +target triple = "x86_64-unknown-linux-gnu" + +; CHECK-LABEL: @overflow_iter_var +; CHECK-NOT: MemoryDef +define void @overflow_iter_var() !dbg !11 { +entry: + call void @llvm.dbg.value(metadata i16 0, metadata !16, metadata !DIExpression()), !dbg !18 + br label %for.cond + +for.cond: ; preds = %for.body, %entry + call void @llvm.dbg.value(metadata i16 0, metadata !16, metadata !DIExpression()), !dbg !18 + call void @llvm.dbg.value(metadata i16 undef, metadata !20, metadata !DIExpression()), !dbg !21 + br i1 undef, label %for.end, label %for.body + +for.body: ; preds = %for.cond + %0 = load i16, i16* undef, align 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} + +; Function Attrs: nounwind readnone speculatable willreturn +declare void @llvm.dbg.value(metadata, metadata, metadata) #1 + +attributes #1 = { nounwind readnone speculatable willreturn } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!10} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 10.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, globals: !3, nameTableKind: None) +!1 = !DIFile(filename: "2_loops.c", directory: "/") +!2 = !{} +!3 = !{} +!6 = !DICompositeType(tag: DW_TAG_array_type, baseType: !7, size: 4096, elements: !8) +!7 = !DIBasicType(name: "int", size: 16, encoding: DW_ATE_signed) +!8 = !{!9} +!9 = !DISubrange(count: 256) +!10 = !{i32 2, !"Debug Info Version", i32 3} +!11 = distinct !DISubprogram(name: "overflow_iter_var", scope: !1, file: !1, line: 20, type: !12, scopeLine: 21, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!12 = !DISubroutineType(types: !13) +!13 = !{null, !14, !14} +!14 = !DIBasicType(name: "unsigned int", size: 16, encoding: DW_ATE_unsigned) +!16 = !DILocalVariable(name: "i", scope: !17, file: !1, line: 23, type: !14) +!17 = distinct !DILexicalBlock(scope: !11, file: !1, line: 23, column: 3) +!18 = !DILocation(line: 0, scope: !17) +!20 = !DILocalVariable(name: "stop1", arg: 1, scope: !11, file: !1, line: 20, type: !14) +!21 = !DILocation(line: 0, scope: !11) diff --git a/llvm/test/Analysis/MemorySSA/loop-rotate-disablebasicaa.ll b/llvm/test/Analysis/MemorySSA/loop-rotate-disablebasicaa.ll new file mode 100644 index 000000000..f601d48f4 --- /dev/null +++ b/llvm/test/Analysis/MemorySSA/loop-rotate-disablebasicaa.ll @@ -0,0 +1,26 @@ +; RUN: opt -disable-basicaa -print-memoryssa -disable-output %s 2>&1 | FileCheck %s + +; Note: if @foo is modelled as a MemoryDef, this test will assert with -loop-rotate, due to MemorySSA not +; being preserved when moving instructions that may not read from or write to memory. + +; CHECK-LABEL: @main +; CHECK-NOT: MemoryDef +define void @main() { +entry: + br label %for.cond120 + +for.cond120: ; preds = %for.body127, %entry + call void @foo() + br i1 undef, label %for.body127, label %for.cond.cleanup126 + +for.cond.cleanup126: ; preds = %for.cond120 + unreachable + +for.body127: ; preds = %for.cond120 + %0 = load i16**, i16*** undef, align 1 + br label %for.cond120 +} + +declare void @foo() readnone + + diff --git a/llvm/test/Analysis/MemorySSA/loop-rotate-simplified-clone.ll b/llvm/test/Analysis/MemorySSA/loop-rotate-simplified-clone.ll new file mode 100644 index 000000000..3d0efc6f6 --- /dev/null +++ b/llvm/test/Analysis/MemorySSA/loop-rotate-simplified-clone.ll @@ -0,0 +1,29 @@ +; RUN: opt -verify-memoryssa -enable-mssa-loop-dependency -loop-rotate %s -S | FileCheck %s +; REQUIRES: asserts + +; CHECK-LABEL: @test() +define dso_local void @test() { +entry: + br label %preheader + +preheader: + br label %l39 + +l39: + %v40 = phi float (float)* [ @foo, %preheader ], [ %v43, %crit_edge ] + %v41 = call float %v40(float undef) + %v42 = load i32, i32* undef, align 8 + br i1 undef, label %crit_edge, label %loopexit + +crit_edge: + %v43 = load float (float)*, float (float)** undef, align 8 + br label %l39 + +loopexit: + unreachable +} + +; Function Attrs: readnone +declare dso_local float @foo(float) #0 align 32 + +attributes #0 = { readnone } diff --git a/llvm/test/Analysis/MemorySSA/loop-unswitch.ll b/llvm/test/Analysis/MemorySSA/loop-unswitch.ll new file mode 100644 index 000000000..f42a9ca15 --- /dev/null +++ b/llvm/test/Analysis/MemorySSA/loop-unswitch.ll @@ -0,0 +1,26 @@ +; RUN: opt -S -loop-unswitch -disable-basicaa -enable-mssa-loop-dependency -verify-memoryssa < %s | FileCheck %s +; REQUIRES: asserts + +target triple = "x86_64-unknown-linux-gnu" + +; CHECK-LABEL: @foo() +; Function Attrs: readnone speculatable +declare i32 @foo() #0 + +define void @main() { +entry: + br label %for.cond2682 + +for.cond2682: ; preds = %if.then2712, %entry + %mul2708 = call i32 @foo() + %tobool2709 = icmp ne i32 %mul2708, 0 + br i1 %tobool2709, label %if.then2712, label %lor.lhs.false2710 + +lor.lhs.false2710: ; preds = %for.cond2682 + unreachable + +if.then2712: ; preds = %for.cond2682 + br label %for.cond2682 +} + +attributes #0 = { readnone speculatable } diff --git a/llvm/test/Analysis/MemorySSA/pr28880.ll b/llvm/test/Analysis/MemorySSA/pr28880.ll index ae64c0c5d..80c0440f8 100644 --- a/llvm/test/Analysis/MemorySSA/pr28880.ll +++ b/llvm/test/Analysis/MemorySSA/pr28880.ll @@ -47,5 +47,5 @@ bb6: ; preds = %bb3 unreachable } -attributes #0 = { nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+ssse3" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+ssse3" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/Analysis/MemorySSA/pr39197.ll b/llvm/test/Analysis/MemorySSA/pr39197.ll index e384c060c..16a321a81 100644 --- a/llvm/test/Analysis/MemorySSA/pr39197.ll +++ b/llvm/test/Analysis/MemorySSA/pr39197.ll @@ -133,7 +133,7 @@ define dso_local void @safe_add_func_uint8_t_u_u() #0 { ret void } -attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="z13" "target-features"="+transactional-execution,+vector" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="z13" "target-features"="+transactional-execution,+vector" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { argmemonly nounwind } attributes #2 = { nounwind } diff --git a/llvm/test/Analysis/MemorySSA/pr40038.ll b/llvm/test/Analysis/MemorySSA/pr40038.ll index b46b52e27..844ede33c 100644 --- a/llvm/test/Analysis/MemorySSA/pr40038.ll +++ b/llvm/test/Analysis/MemorySSA/pr40038.ll @@ -70,7 +70,7 @@ bb18: ; preds = %bb12, %bb1 ; Function Attrs: cold noreturn nounwind declare void @llvm.trap() #1 -attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="z13" "target-features"="+transactional-execution,+vector" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="z13" "target-features"="+transactional-execution,+vector" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { cold noreturn nounwind } !llvm.ident = !{!0} diff --git a/llvm/test/Analysis/MemorySSA/pr40754.ll b/llvm/test/Analysis/MemorySSA/pr40754.ll index 8db320d80..3262a0cdd 100644 --- a/llvm/test/Analysis/MemorySSA/pr40754.ll +++ b/llvm/test/Analysis/MemorySSA/pr40754.ll @@ -11,44 +11,45 @@ target triple = "systemz-unknown" ; Function Attrs: norecurse noreturn nounwind define dso_local void @func_65() local_unnamed_addr { ; CHECK-LABEL: @func_65() - br label %1 +label0: + br label %label1 -;